### Libraries

In [59]:
import numpy as np
import pandas as pd
import pickle

### Get Data

In [53]:
with open('./data/processed/carrier_aircraft_combo_capacity_dict.txt', 'rb') as handle:
    carrier_aircraft_combo_capacity = pickle.loads(handle.read()) 
dat = pd.read_pickle('./data/processed/final/all-sched.pkl')
dat.head()

Unnamed: 0,operator,flight,aircraft,frequency,from,from_time,to,to_time,eff_from,eff_to
0,IND,6E 101,A 320,"[1, 2, 3, 4, 5, 6, 7]",MAA,6.25,CJB,7.25,2019-10-27 00:00:00,2020-03-28 00:00:00
1,IND,6E 103,A 320,"[1, 2, 3, 4, 5, 6, 7]",PNQ,22.33,BLR,0.08,2019-10-27 00:00:00,2020-03-28 00:00:00
2,IND,6E 104,A 320,"[1, 2, 3, 4, 5, 6, 7]",HYD,8.17,ATQ,11.0,2019-10-27 00:00:00,2020-03-28 00:00:00
3,IND,6E 105,A 320,"[1, 2, 3, 4, 5, 7]",PNQ,17.33,BLR,18.92,2019-10-27 00:00:00,2020-03-28 00:00:00
4,IND,6E 105,A 320,"[1, 2, 3, 4, 5, 7]",DEL,14.75,PNQ,16.83,2019-10-27 00:00:00,2020-03-27 00:00:00


#### Weekly Flights for Schedule
Adding a column, <i>weekly_flights</i>, to the dataframe that translates the frequency information into weekly number of flights.

In [55]:
dat['weekly_flights'] = dat.apply(lambda row: len(row.frequency), axis = 1)
dat.head()

Unnamed: 0,operator,flight,aircraft,frequency,from,from_time,to,to_time,eff_from,eff_to,weekly_flights
0,IND,6E 101,A 320,"[1, 2, 3, 4, 5, 6, 7]",MAA,6.25,CJB,7.25,2019-10-27 00:00:00,2020-03-28 00:00:00,7
1,IND,6E 103,A 320,"[1, 2, 3, 4, 5, 6, 7]",PNQ,22.33,BLR,0.08,2019-10-27 00:00:00,2020-03-28 00:00:00,7
2,IND,6E 104,A 320,"[1, 2, 3, 4, 5, 6, 7]",HYD,8.17,ATQ,11.0,2019-10-27 00:00:00,2020-03-28 00:00:00,7
3,IND,6E 105,A 320,"[1, 2, 3, 4, 5, 7]",PNQ,17.33,BLR,18.92,2019-10-27 00:00:00,2020-03-28 00:00:00,6
4,IND,6E 105,A 320,"[1, 2, 3, 4, 5, 7]",DEL,14.75,PNQ,16.83,2019-10-27 00:00:00,2020-03-27 00:00:00,6


#### Aircraft Capacity
Adding a column, <i>capacity</i>, to the dataframe that captures the capacity of the aircraft flying the schedule.

In [56]:
dat['capacity'] = dat.apply(lambda row: carrier_aircraft_combo_capacity[row.operator + '-' + row.aircraft], 
                            axis = 1)
dat.head()

Unnamed: 0,operator,flight,aircraft,frequency,from,from_time,to,to_time,eff_from,eff_to,weekly_flights,capacity
0,IND,6E 101,A 320,"[1, 2, 3, 4, 5, 6, 7]",MAA,6.25,CJB,7.25,2019-10-27 00:00:00,2020-03-28 00:00:00,7,183
1,IND,6E 103,A 320,"[1, 2, 3, 4, 5, 6, 7]",PNQ,22.33,BLR,0.08,2019-10-27 00:00:00,2020-03-28 00:00:00,7,183
2,IND,6E 104,A 320,"[1, 2, 3, 4, 5, 6, 7]",HYD,8.17,ATQ,11.0,2019-10-27 00:00:00,2020-03-28 00:00:00,7,183
3,IND,6E 105,A 320,"[1, 2, 3, 4, 5, 7]",PNQ,17.33,BLR,18.92,2019-10-27 00:00:00,2020-03-28 00:00:00,6,183
4,IND,6E 105,A 320,"[1, 2, 3, 4, 5, 7]",DEL,14.75,PNQ,16.83,2019-10-27 00:00:00,2020-03-27 00:00:00,6,183


#### One-Hot-Encode <i>Frequency</i>
Adding 7 new columns, one for each day of the week and if a particular schedule flies on a given day of the week, set value in the appropriate column to True.

In [57]:
for i in range(1, 8, 1):
    col_name = 'day_' + str(i)
    dat[col_name] = False
    
for i in range(dat.shape[0]):
    for j in range(1, 8, 1):
        col_name = 'day_' + str(j)
        if str(j) in dat['frequency'][i]:
            dat.loc[i, col_name] = True

dat.head()

Unnamed: 0,operator,flight,aircraft,frequency,from,from_time,to,to_time,eff_from,eff_to,weekly_flights,capacity,day_1,day_2,day_3,day_4,day_5,day_6,day_7
0,IND,6E 101,A 320,"[1, 2, 3, 4, 5, 6, 7]",MAA,6.25,CJB,7.25,2019-10-27 00:00:00,2020-03-28 00:00:00,7,183,False,False,False,False,False,False,False
1,IND,6E 103,A 320,"[1, 2, 3, 4, 5, 6, 7]",PNQ,22.33,BLR,0.08,2019-10-27 00:00:00,2020-03-28 00:00:00,7,183,False,False,False,False,False,False,False
2,IND,6E 104,A 320,"[1, 2, 3, 4, 5, 6, 7]",HYD,8.17,ATQ,11.0,2019-10-27 00:00:00,2020-03-28 00:00:00,7,183,False,False,False,False,False,False,False
3,IND,6E 105,A 320,"[1, 2, 3, 4, 5, 7]",PNQ,17.33,BLR,18.92,2019-10-27 00:00:00,2020-03-28 00:00:00,6,183,False,False,False,False,False,False,False
4,IND,6E 105,A 320,"[1, 2, 3, 4, 5, 7]",DEL,14.75,PNQ,16.83,2019-10-27 00:00:00,2020-03-27 00:00:00,6,183,False,False,False,False,False,False,False


In [61]:
dat.to_pickle('./data/processed/final/all-sched-enriched.pkl')
dat.to_excel('./data/processed/final/all-sched-enriched.xlsx', index = False)
dat.to_csv('./data/processed/final/all-sched-enriched.csv', index = False)