In [9]:
import folium
import numpy as np
import pandas as pd
import os
import json

In [2]:
#### run this line to download the data
!echo "Getting airports data"
!curl https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat 2&>/dev/null > airports.dat
!echo "Getting routes data"
!curl https://raw.githubusercontent.com/jpatokal/openflights/master/data/routes.dat 2&>/dev/null > routes.dat

Getting airports data
Getting routes data


In [3]:
airports = pd.read_csv('airports.dat', header=None, names=
                      ["AirportID","Name", "City", "Country", "IATA", "ICAO",
                       "Latitude", "Longitude", "Altitude", "Timezone", "DST", "TzDatabaseTimeZone",
                       "Type", "Source"],
                       na_values = ["\\N"])

all_routes = pd.read_csv('routes.dat', header=None, names=
                     ['Airline', 'AirlineID', 'SourceAirport', 'SourceAirportID', 'DestinationAirport',
                      'DestinationAirportID', 'Codeshare', 'Stops', 'Equipment'],
                       na_values = ["\\N"])

routes = all_routes[all_routes.Stops == 0]
routes.reset_index(drop=True, inplace=True) #reset the index to (1, ..., nbr of entries)

no_IATA_airports = airports[(airports.IATA.isna() & ~airports.AirportID.isna())]

airports = airports[~airports.IATA.isna()].reset_index(drop=True)
inter = airports.IATA.unique() #get list of airport codes in airport dataset
routes = routes[routes.apply(lambda x: x['SourceAirport'] in inter and x['DestinationAirport'] in inter, axis=1)]

route_ids = list(set(routes.DestinationAirport.unique()).union(set(routes.SourceAirport.unique())))

airports = airports[airports.IATA.map(lambda x: x in route_ids)].reset_index(drop=True)
routes = routes[routes.SourceAirport != routes.DestinationAirport]

features = airports 
features.head()

nodes = features.copy()
edges = routes[['SourceAirport', 'DestinationAirport']]

nodes.reset_index(level=0, inplace=True)
nodes = nodes.rename(columns={'index':'node'})

uid2idx = nodes[['node', 'IATA']]
uid2idx = uid2idx.set_index('IATA')

edges = edges.join(uid2idx, on='SourceAirport')
edges = edges.join(uid2idx, on='DestinationAirport', lsuffix='_source', rsuffix='_dest')
edges = edges.drop(columns=['SourceAirport','DestinationAirport'])

n_nodes = len(nodes)
adjacency = np.zeros((n_nodes, n_nodes), dtype=int)

for idx, row in edges.iterrows():
    if np.isnan(row.node_source) or np.isnan(row.node_dest):
        continue
    i, j = int(row.node_source), int(row.node_dest)
    
    # we add weights to the adjency matrix correspinding to the number of flights between i and j
    # our graph is directed, we add an edge when there is flight from source to dest
    adjacency[i, j] += 1

In [4]:
n_edges =  np.where(adjacency > 0, 1, 0).sum() / 2 # the number of edges in the network

In [5]:
display(nodes.head())
display(edges.head())
display(nodes[['Latitude', 'Longitude']].head())

Unnamed: 0,node,AirportID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,TzDatabaseTimeZone,Type,Source
0,0,1,Goroka Airport,Goroka,Papua New Guinea,GKA,AYGA,-6.08169,145.391998,5282,10.0,U,Pacific/Port_Moresby,airport,OurAirports
1,1,2,Madang Airport,Madang,Papua New Guinea,MAG,AYMD,-5.20708,145.789001,20,10.0,U,Pacific/Port_Moresby,airport,OurAirports
2,2,3,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,AYMH,-5.82679,144.296005,5388,10.0,U,Pacific/Port_Moresby,airport,OurAirports
3,3,4,Nadzab Airport,Nadzab,Papua New Guinea,LAE,AYNZ,-6.569803,146.725977,239,10.0,U,Pacific/Port_Moresby,airport,OurAirports
4,4,5,Port Moresby Jacksons International Airport,Port Moresby,Papua New Guinea,POM,AYPY,-9.44338,147.220001,146,10.0,U,Pacific/Port_Moresby,airport,OurAirports


Unnamed: 0,node_source,node_dest
0,1421,1439
1,1422,1439
2,1422,1418
3,1424,1439
4,1424,2052


Unnamed: 0,Latitude,Longitude
0,-6.08169,145.391998
1,-5.20708,145.789001
2,-5.82679,144.296005
3,-6.569803,146.725977
4,-9.44338,147.220001


In [6]:
node_edges = edges.join(nodes[['Latitude', 'Longitude']], on='node_source', lsuffix ='_source', rsuffix='_source').join(nodes[['Latitude', 'Longitude']], on='node_dest', rsuffix="_dest")

In [7]:
node_edges.head()

Unnamed: 0,node_source,node_dest,Latitude,Longitude,Latitude_dest,Longitude_dest
0,1421,1439,43.449902,39.9566,55.606201,49.278702
1,1422,1439,46.283298,48.006302,55.606201,49.278702
2,1422,1418,46.283298,48.006302,44.225101,43.081902
3,1424,1439,55.305801,61.5033,55.606201,49.278702
4,1424,2052,55.305801,61.5033,55.0126,82.650703


In [8]:
m = folium.Map(location=(0, 0), zoom_start=2)
for index, row in nodes.iterrows():
    folium.CircleMarker(location=[row.Latitude, row.Longitude],
                        popup=folium.Popup(row.Name, parse_html=True),
                        radius=1,
                        fill=True).add_to(m)
for index, row in node_edges.iterrows():
    folium.PolyLine(locations=[[row.Latitude, row.Longitude],
                               [row.Latitude_dest, row.Longitude_dest]],
                   opacity=0.3,
                   weight=1).add_to(m)
m.save('index.html')

In [16]:
DATA_DIR = os.path.join('datavis', 'data')

def dump_json(df, file_path):
    with open(file_path, 'w') as outfile:
        json.dump(json.loads(df.to_json(orient='records')), outfile)

dump_json(nodes, os.path.join(DATA_DIR, 'nodes.json'))
dump_json(edges, os.path.join(DATA_DIR, 'edges.json'))