# create a network graph of flight data

In [10]:
import sqlalchemy as sa
import pandas as pd
import pprint
from datetime import datetime
import time
from datetime import timedelta
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import networkx as nx
import math
from networkx.drawing.nx_agraph import graphviz_layout

CONNECTION_STRING_SQLALCHEMY = 'postgresql://localhost/FlightData'
con = sa.create_engine(CONNECTION_STRING_SQLALCHEMY)

In [11]:
# helper functions 
def runQuery(sql): 
    conn = sa.create_engine(CONNECTION_STRING_SQLALCHEMY)
    retVal = None
    result = pd.read_sql(sql, conn,params=None)
    retVal = result
    return retVal

def setInEdges(collection, source):
    for edge in collection: 
        # create a tuple of (source, target)
        edgeTup = (source, edge.text)
        edgeList.append(edgeTup)
        #citeSources.append(source)
        #citeTargets.append(edge.text)

def setOutEdges(collection, target):
    for edge in collection:
        edgeTup = (edge.text, target)
        edgeList.append(edgeTup)
        #citeSources.append(edge.text)
        #citeTargets.append(target)

## helpers related to data load

In [12]:
def loadNodes(G, airports):
    # create a node list from airports 
    for index, airport in airports.iterrows():
        airportcode = airport['iata']
        name = airport['airport']
        long = airport['long']
        lat=airport['lat']
        state = airport['state'] if airport['state'] != None else 'N/A'
        country = airport['country'] if airport['country'] != None else 'N/A'
        city = airport['city'] if airport['city'] != None else 'N/A'
        G.add_node(airportcode, name=name, long=long,lat=lat,state=state, country=country, city=city)
        return G
        
def loadFlightOverviewEdges(G):
    # get all flight combinations with edge weight reflective of number of flights
    query = "select \"Origin\", \"Dest\", count(*) as number_of_flights from flightdetails group by \"Origin\", \"Dest\";"
    allflights = runQuery(query) 
    # load the edges 
    for index, flights in allflights.iterrows():
        G.add_edge(flights["Origin"], flights["Dest"], weight=flights["number_of_flights"])
    return G

def loadAnnualFlightWithDelaySummary(G):
    # get all flight combinations with edge weight reflective of number of flights
    query = "select \"Origin\", \"Dest\", count(*) as number_of_flights from flightdetails group by \"Origin\", \"Dest\";"
    allflights = runQuery(query) 

    # load the edges 
    for index, flights in allflights.iterrows():
        #G.add_edge(flights["Origin"], flights["Dest"], weight=flights["number_of_flights"], capacity=15, length=342.7)
        #print(flights["Origin"],flights["Dest"], flights["number_of_flights"])
        G.add_edge(flights["Origin"], flights["Dest"], weight=flights["number_of_flights"])
    #print("loaded edges including duplicates:", len(edgeList))
    #edgeSet = set(edgeList) # dedup with sets
    #print("edge number after dedup:", len(edgeSet))

    # load the edges 
    #for edge in edgeSet: 
    #    citeSources.append(edge[0])
    #    citeTargets.append(edge[1])

    #zipped = zip(citeSources, citeTargets)
    #edges = list(zipped)
    #GC.add_edges_from(edges)
    return G

def loadAirports():
    query = """select * from airports"""
    airports = runQuery(query)
    airports.head()
    return airports
    
def getOverviewGraph():
    
    print("start graph load")
    
    # initiate NX objects 
    G=nx.MultiDiGraph()
    nodes = []
    sources = []
    targets = []
    edgelist = []
    
    print("loading airports")
    airports = loadAirports()
    print("creating airport nodes")
    G = loadNodes(G, airports)
    print("Loading edges")
    G = loadFlightOverviewEdges(G)
    
    # network summary 
    print("number of nodes:", len(G))
    print("number of edges:", nx.number_of_edges(G))
    print("Graph Density:", nx.density(G))

    isolatedAirports = nx.isolates(G) # airports with no flight data?
    print("Total isolated airports found:", len(isolatedAirports))

    G.remove_nodes_from(isolatedAirports)

    print("number of nodes post isolated:", len(G))
    print("number of edges post isolated:", nx.number_of_edges(G))
    print("Graph Density post isolated:", nx.density(G))
    
    print("end graph load")
    
    return G

def getOverviewGraphWithDelaySummary():
    
    print("start graph load")
    
    # initiate NX objects 
    G=nx.MultiDiGraph()
    nodes = []
    sources = []
    targets = []
    edgelist = []
    
    print("loading airports")
    airports = loadAirports()
    print("creating airport nodes")
    G = loadNodes(G, airports)
    print("Loading edges")
    G = loadAnnualFlightWithDelaySummary(G) # new method to get summary?
    
    # network summary 
    print("number of nodes:", len(G))
    print("number of edges:", nx.number_of_edges(G))
    print("Graph Density:", nx.density(G))

    isolatedAirports = nx.isolates(G) # airports with no flight data?
    print("Total isolated airports found:", len(isolatedAirports))

    G.remove_nodes_from(isolatedAirports)

    print("number of nodes post isolated:", len(G))
    print("number of edges post isolated:", nx.number_of_edges(G))
    print("Graph Density post isolated:", nx.density(G))
    
    print("end graph load")
    
    return G
    

## load the edges 
### possible combinations 
* load edges from flights that are delayed only?
- load by flight number (capture the unique flights between airports). Repeat flighnts reflected by edge weight.
- load edges by carrier (e.g. AA, Delta) 
- load by all flights (capture any flights between two airports). Repeat flights reflected by edge weight. 
- Load all flights and save aggregated delay in the nodes (which airport has the most amount of delay?) 
- load all flights and capture the total number of delays as opposed to delay in minutes. 
- filter by month. 
- filter daily? 
- filter by year? 

In [13]:
G = getOverviewGraph()

start graph load
loading airports
creating airport nodes
Loading nodes
number of nodes: 207
number of edges: 3345
Graph Density: 0.07844378781483045
Total isolated airports found: 1
number of nodes post isolated: 206
number of edges post isolated: 3345
Graph Density post isolated: 0.0792090930618044
end graph load


In [14]:
# write to a graphml file
nx.write_graphml(G, "flightsOverview.graphml")
G = None