In [208]:
from tqdm import tqdm 
import pandas as pd 

airports     = pd.read_csv('../../data/us_large_airports.csv')
airports     = airports[airports['type'] == 'large_airport']
big_airports =  list(set(airports['iata_code'].dropna().unique()))[:35]  #{'ATL', 'JFK', 'LAX', 'ORD'}

flights  = pd.read_csv('../../data/Airports2.csv', usecols=['Origin_airport', 'Destination_airport', 'Origin_city', 'Destination_city', 'Fly_date'])
flights = flights[flights.apply(lambda x: (x['Origin_airport'] in big_airports) and (x['Destination_airport'] in big_airports), axis=1)]
flights  = flights.drop_duplicates()

flights['Fly_date'] = pd.to_datetime(flights['Fly_date'])               

flights.sort_values(by=['Fly_date'], inplace=True)

flights.drop(columns=['Origin_city', 'Destination_city'], inplace=True)
flights.reset_index(drop=True, inplace=True)

T = flights['Fly_date'].unique()
date_encoding = {v:i for i, v in enumerate(T)}
airport_encoding = {airport:i for i, airport in enumerate(big_airports)}

In [209]:
time_steps = 12

In [210]:
flights['Fly_date'].replace(date_encoding, inplace=True)

In [211]:
flights['Origin_airport']      = flights.apply(lambda x: "%s_%s" % (x['Origin_airport'],      x['Fly_date']), axis=1)
flights['Destination_airport'] = flights.apply(lambda x: "%s_%s" % (x['Destination_airport'], x['Fly_date']+1), axis=1)

filtered = flights[flights['Fly_date'].apply(lambda x: x in range(time_steps))]
filtered.columns = ['origin', 'destination', 'count']
filtered.to_csv('flights.csv', index=False)

## Grid Positioning 

$ (t*c_x, i*c_y)$ where $i \in [0, N]$  and $t \in [0, T]$ such that $N$ is number of nodes (e.g. airports) and $T$ is time (e.g. days). 

$c_x$ and $c_y$ are constants that define the distance distance from origin $(0, 0)$ as well as between other nodes. 

In [212]:
locs = []

# width = 960
# height = 600

c_x = 100
c_y = 20
for k in airport_encoding:
    i = airport_encoding[k]
    for t in range(time_steps+1): #list(date_encoding.values()):
        locs.append(["%s_%s" % (k,t), (t+1)*c_x, (i+1)*c_y])

locs_df = pd.DataFrame(locs)
locs_df.columns = ['iata', 'latitude', 'longitude']
locs_df['latitude']  = locs_df['latitude']
locs_df['latitude']  = locs_df['latitude']
locs_df.to_csv('grid_locs.csv', index=False)