# create a network graph of flight data

In [2]:
import sqlalchemy as sa
import pandas as pd
import pprint
from datetime import datetime
import time
from datetime import timedelta
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import networkx as nx
import math
from networkx.drawing.nx_agraph import graphviz_layout

CONNECTION_STRING_SQLALCHEMY = 'postgresql://localhost/FlightData'
con = sa.create_engine(CONNECTION_STRING_SQLALCHEMY)

In [3]:
# helper functions 
def runQuery(sql): 
    conn = sa.create_engine(CONNECTION_STRING_SQLALCHEMY)
    retVal = None
    result = pd.read_sql(sql, conn,params=None)
    retVal = result
    return retVal

def setInEdges(collection, source):
    for edge in collection: 
        # create a tuple of (source, target)
        edgeTup = (source, edge.text)
        edgeList.append(edgeTup)
        #citeSources.append(source)
        #citeTargets.append(edge.text)

def setOutEdges(collection, target):
    for edge in collection:
        edgeTup = (edge.text, target)
        edgeList.append(edgeTup)
        #citeSources.append(edge.text)
        #citeTargets.append(target)

In [114]:
query = """select * from airports"""
airports = runQuery(query)
airports.head()

Unnamed: 0,index,iata,airport,city,state,country,lat,long
0,0,00M,Thigpen,Bay Springs,MS,USA,31.953765,-89.234505
1,1,00R,Livingston Municipal,Livingston,TX,USA,30.685861,-95.017928
2,2,00V,Meadow Lake,Colorado Springs,CO,USA,38.945749,-104.569893
3,3,01G,Perry-Warsaw,Perry,NY,USA,42.741347,-78.052081
4,4,01J,Hilliard Airpark,Hilliard,FL,USA,30.688012,-81.905944


In [115]:
# initiate NX objects 

G=nx.MultiDiGraph()
nodes = []
sources = []
targets = []
edgelist = []

In [116]:
# create a node list from airports 
for index, airport in airports.iterrows():
    airportcode = airport['iata']
    name = airport['airport']
    long = airport['long']
    lat=airport['lat']
    state = airport['state'] if airport['state'] != None else 'N/A'
    country = airport['country'] if airport['country'] != None else 'N/A'
    city = airport['city'] if airport['city'] != None else 'N/A'
    #nodes.append(airport['iata'])
    G.add_node(airportcode, name=name, long=long,lat=lat,state=state, country=country, city=city)
    #print(airport['state'])
    

## load the edges 
### possible combinations 
* load edges from flights that are delayed only?
- load edges from unique flights by flight number and replace the repeat flights with a edge weight.
- load edges by carrier 
- load edges by simply ignore the unique flights. All fights from two destinations will be reflected by edge weight. 

In [117]:
# get all flight combinations with edge weight reflective of number of flights
query = "select \"Origin\", \"Dest\", count(*) as number_of_flights from flightdetails group by \"Origin\", \"Dest\";"
allflights = runQuery(query) 

# load the edges 
for index, flights in allflights.iterrows():
    #G.add_edge(flights["Origin"], flights["Dest"], weight=flights["number_of_flights"], capacity=15, length=342.7)
    #print(flights["Origin"],flights["Dest"], flights["number_of_flights"])
    G.add_edge(flights["Origin"], flights["Dest"], weight=flights["number_of_flights"])



#print("loaded edges including duplicates:", len(edgeList))
#edgeSet = set(edgeList) # dedup with sets
#print("edge number after dedup:", len(edgeSet))

# load the edges 
#for edge in edgeSet: 
#    citeSources.append(edge[0])
#    citeTargets.append(edge[1])
    
#zipped = zip(citeSources, citeTargets)
#edges = list(zipped)
#GC.add_edges_from(edges)

In [118]:
# network summary 

print("number of nodes:", len(G))
print("number of edges:", nx.number_of_edges(G))
print("Graph Density:", nx.density(G))

isolatedAirports = nx.isolates(G) # airports with no flight data?
print("Total isolated airports found:", len(isolatedAirports))

G.remove_nodes_from(isolatedAirports)

print("number of nodes post isolated:", len(G))
print("number of edges post isolated:", nx.number_of_edges(G))
print("Graph Density post isolated:", nx.density(G))


number of nodes: 3376
number of edges: 3345
Graph Density: 0.00029357556608741443
Total isolated airports found: 3170
number of nodes post isolated: 206
number of edges post isolated: 3345
Graph Density post isolated: 0.0792090930618044


In [119]:
# write to a graphml file
nx.write_graphml(G, "flightsOverview.graphml")