### Libraries

In [56]:
import numpy as np
import pandas as pd
import pickle

### Get Data

In [75]:
with open('./data/processed/carrier_aircraft_combo_capacity_dict.txt', 'rb') as handle:
    carrier_aircraft_combo_capacity = pickle.loads(handle.read()) 
dat = pd.read_pickle('./data/processed/final/all-sched.pkl')
dat.head()

Unnamed: 0,operator,flight,aircraft,frequency,from,from_time,to,to_time,eff_from,eff_to
0,IND,6E 101,A 320,"[1, 2, 3, 4, 5, 6, 7]",MAA,6.25,CJB,7.25,2019-10-27 00:00:00,2020-03-28 00:00:00
1,IND,6E 103,A 320,"[1, 2, 3, 4, 5, 6, 7]",PNQ,22.33,BLR,0.08,2019-10-27 00:00:00,2020-03-28 00:00:00
2,IND,6E 104,A 320,"[1, 2, 3, 4, 5, 6, 7]",HYD,8.17,ATQ,11.0,2019-10-27 00:00:00,2020-03-28 00:00:00
3,IND,6E 105,A 320,"[1, 2, 3, 4, 5, 7]",PNQ,17.33,BLR,18.92,2019-10-27 00:00:00,2020-03-28 00:00:00
4,IND,6E 105,A 320,"[1, 2, 3, 4, 5, 7]",DEL,14.75,PNQ,16.83,2019-10-27 00:00:00,2020-03-27 00:00:00


#### Weekly Flights for Schedule
Adding a column, <i>weekly_flights</i>, to the dataframe that translates the frequency information into weekly number of flights.

In [77]:
dat['weekly_flights'] = dat.apply(lambda row: len(row.frequency), axis = 1)
dat.insert(4, 'weekly_flights', dat.pop('weekly_flights'))
dat.head()

Unnamed: 0,operator,flight,aircraft,frequency,weekly_flights,from,from_time,to,to_time,eff_from,eff_to
0,IND,6E 101,A 320,"[1, 2, 3, 4, 5, 6, 7]",7,MAA,6.25,CJB,7.25,2019-10-27 00:00:00,2020-03-28 00:00:00
1,IND,6E 103,A 320,"[1, 2, 3, 4, 5, 6, 7]",7,PNQ,22.33,BLR,0.08,2019-10-27 00:00:00,2020-03-28 00:00:00
2,IND,6E 104,A 320,"[1, 2, 3, 4, 5, 6, 7]",7,HYD,8.17,ATQ,11.0,2019-10-27 00:00:00,2020-03-28 00:00:00
3,IND,6E 105,A 320,"[1, 2, 3, 4, 5, 7]",6,PNQ,17.33,BLR,18.92,2019-10-27 00:00:00,2020-03-28 00:00:00
4,IND,6E 105,A 320,"[1, 2, 3, 4, 5, 7]",6,DEL,14.75,PNQ,16.83,2019-10-27 00:00:00,2020-03-27 00:00:00


#### Aircraft Capacity
Adding a column, <i>capacity</i>, to the dataframe that captures the capacity of the aircraft flying the schedule.

In [79]:
dat['craft_capacity'] = dat.apply(lambda row: 
                                     carrier_aircraft_combo_capacity[row.operator + '-' + row.aircraft], 
                                     axis = 1)
dat.insert(3, 'craft_capacity', dat.pop('craft_capacity'))

dat.head()

Unnamed: 0,operator,flight,aircraft,craft_capacity,frequency,weekly_flights,from,from_time,to,to_time,eff_from,eff_to
0,IND,6E 101,A 320,183,"[1, 2, 3, 4, 5, 6, 7]",7,MAA,6.25,CJB,7.25,2019-10-27 00:00:00,2020-03-28 00:00:00
1,IND,6E 103,A 320,183,"[1, 2, 3, 4, 5, 6, 7]",7,PNQ,22.33,BLR,0.08,2019-10-27 00:00:00,2020-03-28 00:00:00
2,IND,6E 104,A 320,183,"[1, 2, 3, 4, 5, 6, 7]",7,HYD,8.17,ATQ,11.0,2019-10-27 00:00:00,2020-03-28 00:00:00
3,IND,6E 105,A 320,183,"[1, 2, 3, 4, 5, 7]",6,PNQ,17.33,BLR,18.92,2019-10-27 00:00:00,2020-03-28 00:00:00
4,IND,6E 105,A 320,183,"[1, 2, 3, 4, 5, 7]",6,DEL,14.75,PNQ,16.83,2019-10-27 00:00:00,2020-03-27 00:00:00


#### Add <i>weekly_capacity</i> and <i>weekly_est_pass</i>
For every schedule, adding columns:
<ul>
    <li>weekly_capacity = weekly_flights*capacity
    <li>weekly_est_pass = weekly_capacity*carrier_plf

In [81]:
dat['weekly_capacity'] = dat.apply(lambda row: row.weekly_flights*row.craft_capacity, axis = 1)
dat.insert(6, 'weekly_capacity', dat.pop('weekly_capacity'))

with open('./data/processed/carrier_plf_dict.txt', 'rb') as handle:
    carrier_plf = pickle.loads(handle.read())
dat['weekly_est_pass'] = dat.apply(lambda row: round(row.weekly_capacity*carrier_plf[row.operator], 0), 
                                   axis = 1)
dat.insert(7, 'weekly_est_pass', dat.pop('weekly_est_pass'))

dat.head()

Unnamed: 0,operator,flight,aircraft,craft_capacity,frequency,weekly_flights,weekly_capacity,weekly_est_pass,from,from_time,to,to_time,eff_from,eff_to
0,IND,6E 101,A 320,183,"[1, 2, 3, 4, 5, 6, 7]",7,1281,1125.0,MAA,6.25,CJB,7.25,2019-10-27 00:00:00,2020-03-28 00:00:00
1,IND,6E 103,A 320,183,"[1, 2, 3, 4, 5, 6, 7]",7,1281,1125.0,PNQ,22.33,BLR,0.08,2019-10-27 00:00:00,2020-03-28 00:00:00
2,IND,6E 104,A 320,183,"[1, 2, 3, 4, 5, 6, 7]",7,1281,1125.0,HYD,8.17,ATQ,11.0,2019-10-27 00:00:00,2020-03-28 00:00:00
3,IND,6E 105,A 320,183,"[1, 2, 3, 4, 5, 7]",6,1098,964.0,PNQ,17.33,BLR,18.92,2019-10-27 00:00:00,2020-03-28 00:00:00
4,IND,6E 105,A 320,183,"[1, 2, 3, 4, 5, 7]",6,1098,964.0,DEL,14.75,PNQ,16.83,2019-10-27 00:00:00,2020-03-27 00:00:00


#### Add <i>from_time_slot</i> and <i>to_time_slot</i>
Divide the day (0.0 hrs to 24.0 hrs) into 48 serially numbered time-slots of 0.5 hours each.
For each schedule, insert columns from_time_slot and to_time_slot that identify which serially numbered time slot the schedule's <i>from_time</i> and <i>to_time</i> belong to.

In [83]:
dat.insert(10, 'from_time_slot', 0)
dat.insert(13, 'to_time_slot', 0)

for i in range(dat.shape[0]):
    hr_, min_ = divmod(dat.from_time[i], 1)
    if min_ < 0.5: dat.loc[i, 'from_time_slot'] = 2*hr_ + 1
    else: dat.loc[i, 'from_time_slot'] = 2*hr_ + 2
        
    hr_, min_ = divmod(dat.to_time[i], 1)
    if min_ < 0.5: dat.loc[i, 'to_time_slot'] = 2*hr_ + 1
    else: dat.loc[i, 'to_time_slot'] = 2*hr_ + 2

dat.head()

Unnamed: 0,operator,flight,aircraft,craft_capacity,frequency,weekly_flights,weekly_capacity,weekly_est_pass,from,from_time,from_time_slot,to,to_time,to_time_slot,eff_from,eff_to
0,IND,6E 101,A 320,183,"[1, 2, 3, 4, 5, 6, 7]",7,1281,1125.0,MAA,6.25,13.0,CJB,7.25,15.0,2019-10-27 00:00:00,2020-03-28 00:00:00
1,IND,6E 103,A 320,183,"[1, 2, 3, 4, 5, 6, 7]",7,1281,1125.0,PNQ,22.33,45.0,BLR,0.08,1.0,2019-10-27 00:00:00,2020-03-28 00:00:00
2,IND,6E 104,A 320,183,"[1, 2, 3, 4, 5, 6, 7]",7,1281,1125.0,HYD,8.17,17.0,ATQ,11.0,23.0,2019-10-27 00:00:00,2020-03-28 00:00:00
3,IND,6E 105,A 320,183,"[1, 2, 3, 4, 5, 7]",6,1098,964.0,PNQ,17.33,35.0,BLR,18.92,38.0,2019-10-27 00:00:00,2020-03-28 00:00:00
4,IND,6E 105,A 320,183,"[1, 2, 3, 4, 5, 7]",6,1098,964.0,DEL,14.75,30.0,PNQ,16.83,34.0,2019-10-27 00:00:00,2020-03-27 00:00:00


#### One-Hot-Encode <i>Frequency</i>
Adding 7 new columns, one for each day of the week and if a particular schedule flies on a given day of the week, set value in the appropriate column to True.

In [85]:
for i in range(1, 8, 1):
    col_name = 'day_' + str(i)
    dat[col_name] = False
    
for i in range(dat.shape[0]):
    for j in range(1, 8, 1):
        col_name = 'day_' + str(j)
        if str(j) in dat['frequency'][i]:
            dat.loc[i, col_name] = True
            
dat.head()

Unnamed: 0,operator,flight,aircraft,craft_capacity,frequency,weekly_flights,weekly_capacity,weekly_est_pass,from,from_time,...,to_time_slot,eff_from,eff_to,day_1,day_2,day_3,day_4,day_5,day_6,day_7
0,IND,6E 101,A 320,183,"[1, 2, 3, 4, 5, 6, 7]",7,1281,1125.0,MAA,6.25,...,15.0,2019-10-27 00:00:00,2020-03-28 00:00:00,False,False,False,False,False,False,False
1,IND,6E 103,A 320,183,"[1, 2, 3, 4, 5, 6, 7]",7,1281,1125.0,PNQ,22.33,...,1.0,2019-10-27 00:00:00,2020-03-28 00:00:00,False,False,False,False,False,False,False
2,IND,6E 104,A 320,183,"[1, 2, 3, 4, 5, 6, 7]",7,1281,1125.0,HYD,8.17,...,23.0,2019-10-27 00:00:00,2020-03-28 00:00:00,False,False,False,False,False,False,False
3,IND,6E 105,A 320,183,"[1, 2, 3, 4, 5, 7]",6,1098,964.0,PNQ,17.33,...,38.0,2019-10-27 00:00:00,2020-03-28 00:00:00,False,False,False,False,False,False,False
4,IND,6E 105,A 320,183,"[1, 2, 3, 4, 5, 7]",6,1098,964.0,DEL,14.75,...,34.0,2019-10-27 00:00:00,2020-03-27 00:00:00,False,False,False,False,False,False,False


In [87]:
dat.to_pickle('./data/processed/final/all-sched-enriched.pkl')
dat.to_excel('./data/processed/final/all-sched-enriched.xlsx', index = False)
dat.to_csv('./data/processed/final/all-sched-enriched.csv', index = False)