# Flightly

Modelling 2015 US commercial airline flight queries using SQL and Neo4j

In [1]:
3+3

6

In [30]:
from neo4j import GraphDatabase

class Neo4jGDB(object):

    def __init__(self, uri, user, password):
        self._driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self._driver.close()
    
    def load_csv(self):
        with self._driver.session() as session:
            session.write_transaction(self._load_airlines)
            session.write_transaction(self._load_airports)
            session.run(self._load_flights()) #to allow for periodic commit
    
    def match_relationships(self):
        with self._driver.session() as session:
            session.write_transaction(self._match_origin_airport)
            session.write_transaction(self._match_destination_airport)
            session.write_transaction(self._match_airline)
            
    def delete_all(self):
        with self._driver.session() as session:
            session.write_transaction(self._delete_all)
    
    @staticmethod
    def _delete_all(tx):
        tx.run("""
               MATCH (n)
               DETACH DELETE n
               """)
    
    @staticmethod
    def _load_airlines(tx):
        tx.run("""
            LOAD CSV FROM 'file:///flightly/airlines.csv' AS line
            CREATE (:Airline { name: line[1], iata: line[0]})
            """)
    
    @staticmethod
    def _load_airports(tx):
        tx.run("""
            LOAD CSV FROM 'file:///flightly/airports.csv' AS line
            CREATE (:Airport { name: line[1], iata: line[0], city: line[2], state: line[3], country: line[4], latitude: line[5], longitude: line[6]})
            """)
        
    @staticmethod
    def _match_origin_airport(tx):
        tx.run("""
            MATCH (fl:Flight),(ap:Airport)
            WHERE fl.origin_airport = ap.iata
            CREATE (ap)-[r:HAS_DEPARTURE]->(fl)
            RETURN type(r)
            """)
        
    @staticmethod
    def _match_destination_airport(tx):
        tx.run("""
            MATCH (fl:Flight),(ap:Airport)
            WHERE fl.destination_airport = ap.iata
            CREATE (fl)-[:FLIES_TO]->(ap)
            """)
    
    @staticmethod
    def _match_airline(tx):
        tx.run("""
            MATCH (fl:Flight),(al:Airline)
            WHERE fl.airline = al.iata
            CREATE (fl)-[:OPERATED_BY]->(al)
            """)
        
    @staticmethod
    def _load_flights():
        return """
            USING PERIODIC COMMIT 1000
            LOAD CSV FROM 'file:///flightly/flights.csv' AS line
            WITH line LIMIT 10000
            CREATE (:Flight { 
                year: line[0],
                month: line[1],
                day: line[2],
                day_of_week: line[3],
                airline: line[4],
                flight_number: line[5],
                tail_number: line[6],
                origin_airport: line[7],
                destination_airport: line[8],
                scheduled_departure: line[9],
                departure_time: line[10],
                departure_delay: line[11],
                taxi_out: line[12],
                wheels_off: line[13],
                scheduled_time: line[14],
                elapsed_time: line[15],
                air_time: line[16],
                distance: line[17],
                wheels_on: line[18],
                taxi_in: line[19],
                scheduled_arrival: line[20],
                arrival_time: line[21],
                arrival_delay: line[22],
                diverted: line[23],
                cancelled: line[24],
                cancellation_reason: line[25],
                air_system_delay: line[26],
                security_delay: line[27],
                airline_delay: line[28],
                late_aircraft_delay: line[29],
                weather_delay: line[30]
            })
            """

In [23]:
setup = Neo4jGDB('bolt://localhost:7687', 'neo4j', 'password')

In [25]:
setup.delete_all()
setup.load_csv()
setup.match_relationships()

### Time different Queries

In [83]:
from neo4j import GraphDatabase
import timeit

conn = ('bolt://localhost:7687', 'neo4j', 'password')

class NeoQuery(object):

    def __init__(self, query):
        url, user, password = conn
        self._driver = GraphDatabase.driver(url, auth=(user, password))
        self._query = query

    def close(self):
        self._driver.close()
        
    def query(self, times=1):
        results = self._run_cypher()
        time = timeit.timeit(self._run_cypher, number=times)
        self.close()
        return results, time
        
    def _run_cypher(self):
        with self._driver.session() as session:
            return session.run(self._query)
        
    def ord_departures(self):
        with self._driver.session() as session:
            results = session.run("""
                MATCH (ord:Airport {iata: 'ATL'})-[:HAS_DEPARTURE]->(fl:Flight)-[:OPERATED_BY]->(al:Airline {iata: 'AA'})
                RETURN ord, fl
                """)
            return [record['fl'] for record in results]

In [87]:
results, time = NeoQuery("""
    MATCH (ord:Airport {iata: 'ATL'})-[:HAS_DEPARTURE]->(fl:Flight)-[:OPERATED_BY]->(al:Airline {iata: 'AA'}) RETURN ord, fl
""").query(200)

[record['fl'] for record in results]
# time

[<Node id=295957 labels={'Flight'} properties={'wheels_off': '1417', 'taxi_in': '12', 'distance': '731', 'year': '2015', 'airline_delay': '3', 'scheduled_time': '150', 'scheduled_arrival': '1515', 'diverted': '0', 'taxi_out': '17', 'tail_number': 'N575AA', 'elapsed_time': '151', 'air_time': '122', 'weather_delay': '0', 'airline': 'AA', 'day': '1', 'departure_time': '1400', 'departure_delay': '15', 'day_of_week': '4', 'arrival_time': '1531', 'wheels_on': '1519', 'flight_number': '1473', 'origin_airport': 'ATL', 'destination_airport': 'DFW', 'late_aircraft_delay': '12', 'month': '1', 'cancelled': '0', 'scheduled_departure': '1345', 'security_delay': '0', 'arrival_delay': '16', 'air_system_delay': '1'}>,
 <Node id=290969 labels={'Flight'} properties={'wheels_off': '0817', 'taxi_in': '5', 'distance': '594', 'year': '2015', 'scheduled_time': '117', 'scheduled_arrival': '1012', 'diverted': '0', 'taxi_out': '11', 'tail_number': 'N015AA', 'elapsed_time': '97', 'air_time': '81', 'airline': 'AA'