In [1]:
import pandas as pd

In [2]:
countries = pd.read_csv('data/airport-codes.csv', sep=',')[['iata_code', 'iso_country']]
countries = countries[~countries.iata_code.isnull()]
airports = pd.read_csv('data/airports.dat', sep='\t', usecols=['code','latitude','longitude'])
airports = airports.merge(countries, how='left', left_on='code', right_on='iata_code').drop(columns='iata_code')


def save_df(airlines, name, coloring):
    df = pd.read_csv('data/routes.dat', sep='\t', usecols=['airline','A','B']).drop_duplicates()
    df = df[df.airline.isin(airlines)]
    df = df.merge(airports, how='left', left_on='A', right_on='code').merge(airports, how='left', left_on='B', right_on='code', suffixes=('_A','_B'))
    df = df.drop(columns=['code_A','code_B'])
    df['color'] = df.apply(coloring, axis=1)
    df['AB'] = df.apply(lambda row: f"{row['A']}_{row['B']}", axis=1)

    df = df[(df.iso_country_A=='US') & (df.iso_country_B=='US')]

    df = df.rename(columns={'latitude_A':'lat_A','longitude_A':'lon_A','iso_country_A':'country_A',
                            'latitude_B':'lat_B','longitude_B':'lon_B','iso_country_B':'country_B'})

    df = df.sample(frac=1)
    df.to_csv(f'saumik_data/{name}.csv', index=False)
    return df

# All

In [3]:
%%time
def coloring(row):
    if row.airline == 'AA': return '#00ffff'
    if row.airline == 'DL': return '#ff0000'
    if row.airline == 'UA': return '#426ae1'

save_df(['AA', 'DL', 'UA'], 'all', coloring)

CPU times: user 119 ms, sys: 8.19 ms, total: 127 ms
Wall time: 126 ms


Unnamed: 0,airline,A,B,lat_A,lon_A,country_A,lat_B,lon_B,country_B,color,AB
782,AA,ORD,SUX,41.9786,-87.9048,US,42.4026,-96.3844,US,#00ffff,ORD_SUX
767,AA,ORD,RSW,41.9786,-87.9048,US,26.5362,-81.7552,US,#00ffff,ORD_RSW
1521,DL,LGA,CHA,40.7773,-73.8727,US,35.0353,-85.2038,US,#ff0000,LGA_CHA
678,AA,MIA,TYS,25.7933,-80.2906,US,35.8109,-83.9940,US,#00ffff,MIA_TYS
1037,DL,ATL,EWR,33.6367,-84.4281,US,40.6925,-74.1687,US,#ff0000,ATL_EWR
...,...,...,...,...,...,...,...,...,...,...,...
1806,DL,SLC,TUS,40.7884,-111.9778,US,32.1161,-110.9410,US,#ff0000,SLC_TUS
1031,DL,ATL,DSM,33.6367,-84.4281,US,41.5341,-93.6506,US,#ff0000,ATL_DSM
1127,DL,ATL,PIT,33.6367,-84.4281,US,40.4915,-80.2329,US,#ff0000,ATL_PIT
1990,UA,EWR,AUS,40.6925,-74.1687,US,30.1950,-97.6700,US,#426ae1,EWR_AUS


# American

In [4]:
%%time
def coloring(row):
    hubs = ['CLT','ORD','DFW','LAX','MIA','LGA','PHL', 'PHX', 'DCA']
    colors = ['#ff0000', '#ffff00', '#ffa500', '#00ffff', '#6cff7d', '#426ae1', '#fd68b3', '#426ae1', '#6cff7d']

    if row['A'] in hubs and row['B'] in hubs: return 'white'
    for k,v in zip(hubs, colors):
        if k in row.values: return v
    return 'gray'

save_df(['AA'], 'american', coloring)

CPU times: user 103 ms, sys: 6.28 ms, total: 109 ms
Wall time: 110 ms


Unnamed: 0,airline,A,B,lat_A,lon_A,country_A,lat_B,lon_B,country_B,color,AB
873,AA,PHL,ROC,39.8719,-75.2411,US,43.1189,-77.6724,US,#fd68b3,PHL_ROC
369,AA,DFW,MSO,32.8968,-97.0380,US,46.9163,-114.0906,US,#ffa500,DFW_MSO
909,AA,PHX,ELP,33.4342,-112.0115,US,31.8067,-106.3778,US,#426ae1,PHX_ELP
384,AA,DFW,PDX,32.8968,-97.0380,US,45.5887,-122.5975,US,#ffa500,DFW_PDX
388,AA,DFW,PIA,32.8968,-97.0380,US,40.6642,-89.6933,US,#ffa500,DFW_PIA
...,...,...,...,...,...,...,...,...,...,...,...
60,AA,CLT,FWA,35.2140,-80.9431,US,40.9785,-85.1951,US,#ff0000,CLT_FWA
265,AA,DFW,BWI,32.8968,-97.0380,US,39.1754,-76.6683,US,#ffa500,DFW_BWI
465,AA,JFK,IND,40.6397,-73.7789,US,39.7173,-86.2944,US,gray,JFK_IND
219,AA,DCA,RDU,38.8521,-77.0377,US,35.8777,-78.7875,US,#6cff7d,DCA_RDU


# Delta

In [5]:

def coloring(row):
    hubs = ['ATL','BOS','DTW','JFK','LAX','LGA','MSP','SEA','SLC']
    colors = ['#ff0000', '#ffff00', '#ffa500', '#00ffff', '#007f00', '#ffddad', '#fd68b3', '#426ae1', '#6cff7d']

    if row['A'] in hubs and row['B'] in hubs: return 'white'
    for k,v in zip(hubs, colors):
        if k in row.values: return v
    return 'gray'

save_df(['DL'], 'delta', coloring)

Unnamed: 0,airline,A,B,lat_A,lon_A,country_A,lat_B,lon_B,country_B,color,AB
86,DL,ATL,GSO,33.6367,-84.4281,US,36.0977,-79.9373,US,#ff0000,ATL_GSO
743,DL,SEA,OGG,47.4490,-122.3093,US,20.8987,-156.4305,US,#426ae1,SEA_OGG
768,DL,SLC,BUR,40.7884,-111.9778,US,34.2006,-118.3587,US,#6cff7d,SLC_BUR
774,DL,SLC,CPR,40.7884,-111.9778,US,42.9079,-106.4643,US,#6cff7d,SLC_CPR
602,DL,MSP,ATW,44.8820,-93.2218,US,44.2581,-88.5191,US,#fd68b3,MSP_ATW
...,...,...,...,...,...,...,...,...,...,...,...
816,DL,SLC,PSP,40.7884,-111.9778,US,33.8296,-116.5067,US,#6cff7d,SLC_PSP
518,DL,LAX,LIH,33.9425,-118.4081,US,21.9760,-159.3390,US,#007f00,LAX_LIH
408,DL,DTW,TPA,42.2124,-83.3534,US,27.9755,-82.5332,US,#ffa500,DTW_TPA
561,DL,LGA,GSO,40.7773,-73.8727,US,36.0977,-79.9373,US,#ffddad,LGA_GSO


# United

In [6]:

def coloring(row):
    hubs = ['ORD','DEN','IAH','LAX','EWR','SFO','IAD']
    colors = ['#ff0000', '#ffff00', '#ffa500', '#00ffff', '#6cff7d', '#426ae1', '#fd68b3', '#426ae1', '#6cff7d']

    if row['A'] in hubs and row['B'] in hubs: return 'white'
    for k,v in zip(hubs, colors):
        if k in row.values: return v
    return 'gray'

save_df(['UA'], 'united', coloring)

Unnamed: 0,airline,A,B,lat_A,lon_A,country_A,lat_B,lon_B,country_B,color,AB
281,UA,EWR,TPA,40.6925,-74.1687,US,27.9755,-82.5332,US,#6cff7d,EWR_TPA
581,UA,LAX,PHX,33.9425,-118.4081,US,33.4342,-112.0115,US,#00ffff,LAX_PHX
167,UA,DEN,XWA,39.8617,-104.6732,US,48.1535,-103.4502,US,#ffff00,DEN_XWA
244,UA,EWR,OMA,40.6925,-74.1687,US,41.3032,-95.8941,US,#6cff7d,EWR_OMA
4,UA,CLE,IAH,41.4117,-81.8498,US,29.9844,-95.3414,US,#ffa500,CLE_IAH
...,...,...,...,...,...,...,...,...,...,...,...
25,UA,DEN,BNA,39.8617,-104.6732,US,36.1245,-86.6782,US,#ffff00,DEN_BNA
717,UA,ORD,RDU,41.9786,-87.9048,US,35.8777,-78.7875,US,#ff0000,ORD_RDU
283,UA,EWR,TYS,40.6925,-74.1687,US,35.8109,-83.9940,US,#6cff7d,EWR_TYS
324,UA,IAD,CHS,38.9445,-77.4558,US,32.8986,-80.0405,US,#fd68b3,IAD_CHS
