# Flightly

Modelling 2015 US commercial airline flight queries using SQL and Neo4j

### Setup Neo4j Query Object Wrapper
This object will allow us to receive the result of the query and also measure the time it takes for X number of queries to be completed.

In [85]:
LINE_LIMIT = 10000

In [86]:
from neo4j import GraphDatabase
import timeit

conn = ('bolt://localhost:7687', 'neo4j', 'password')

class NeoQuery(object):
    def __init__(self, query):
        url, user, password = conn
        self._driver = GraphDatabase.driver(url, auth=(user, password))
        self._query = query

    def close(self):
        self._driver.close()
        
    def query(self, times=1):
        results = self._run_cypher()
        time = timeit.timeit(self._run_cypher, number=times)
        self.close()
        return results, time
        
    def _run_cypher(self):
        with self._driver.session() as session:
            return session.run(self._query)

### Setup queries
Delete all of the existing nodes and relationships to start with clean slate.
Then, load all the csv files into the DB

In [89]:
init_queries = {
    'delete_all': """
                   MATCH (n)
                   DETACH DELETE n
                   """,
    'load_airlines': """
            LOAD CSV FROM 'file:///flightly/airlines.csv' AS line
            CREATE (:Airline { name: line[1], iata: line[0]})
            """,
    'load_airports': """
            LOAD CSV FROM 'file:///flightly/airports.csv' AS line
            CREATE (:Airport { name: line[1], iata: line[0], city: line[2], state: line[3], country: line[4], latitude: line[5], longitude: line[6]})
            """,
    'load_flights': f"""
            USING PERIODIC COMMIT 1000
            LOAD CSV FROM 'file:///flightly/flights.csv' AS line
            WITH line LIMIT {LINE_LIMIT}
            CREATE (:Flight {{ 
                year: line[0],
                month: line[1],
                day: line[2],
                day_of_week: line[3],
                airline: line[4],
                flight_number: line[5],
                tail_number: line[6],
                origin_airport: line[7],
                destination_airport: line[8],
                scheduled_departure: line[9],
                departure_time: line[10],
                departure_delay: line[11],
                taxi_out: line[12],
                wheels_off: line[13],
                scheduled_time: line[14],
                elapsed_time: line[15],
                air_time: line[16],
                distance: line[17],
                wheels_on: line[18],
                taxi_in: line[19],
                scheduled_arrival: line[20],
                arrival_time: line[21],
                arrival_delay: line[22],
                diverted: line[23],
                cancelled: line[24],
                cancellation_reason: line[25],
                air_system_delay: line[26],
                security_delay: line[27],
                airline_delay: line[28],
                late_aircraft_delay: line[29],
                weather_delay: line[30]
            }})
            CREATE (:Tail {{
                number: line[6]
            }})
            """
}

for query in init_queries.items():
    NeoQuery(query[1]).query(0)

### Match queries
Create the relationships that...

In [90]:
match_queries = {
    'match_origin_airport': """
            MATCH (fl:Flight),(ap:Airport)
            WHERE fl.origin_airport = ap.iata
            CREATE (ap)-[r:HAS_DEPARTURE]->(fl)
            RETURN type(r)
            """,
        
    'match_destination_airport': """
            MATCH (fl:Flight),(ap:Airport)
            WHERE fl.destination_airport = ap.iata
            CREATE (fl)-[:FLIES_TO]->(ap)
            """,
    'match_airline': """
            MATCH (fl:Flight),(al:Airline)
            WHERE fl.airline = al.iata
            CREATE (fl)-[:OPERATED_BY]->(al)
            """,
    'match_tail': """
            MATCH (fl:Flight),(tl:Tail)
            WHERE fl.tail_number = tl.number
            CREATE (fl)-[:USES]->(tl)
            """
}

for query in match_queries.items():
    NeoQuery(query[1]).query(0)

In [91]:
results, time = NeoQuery(
    """
    MATCH (ord:Airport {iata: 'ATL'})-[:HAS_DEPARTURE]->(fl:Flight)-[:OPERATED_BY]->(al:Airline {iata: 'AA'})
    RETURN ord, fl
    """
).query(200)

print(time)
[record['fl'] for record in results]

1.231178969001121


[<Node id=469042 labels={'Flight'} properties={'wheels_off': '1548', 'taxi_in': '12', 'distance': '731', 'year': '2015', 'scheduled_time': '145', 'scheduled_arrival': '1650', 'diverted': '0', 'taxi_out': '14', 'tail_number': 'N025AA', 'elapsed_time': '149', 'air_time': '123', 'airline': 'AA', 'departure_time': '1534', 'day': '1', 'departure_delay': '9', 'day_of_week': '4', 'arrival_time': '1703', 'wheels_on': '1651', 'flight_number': '194', 'origin_airport': 'ATL', 'destination_airport': 'DFW', 'month': '1', 'cancelled': '0', 'scheduled_departure': '1525', 'arrival_delay': '13'}>,
 <Node id=462442 labels={'Flight'} properties={'wheels_off': '1226', 'taxi_in': '16', 'distance': '731', 'year': '2015', 'scheduled_time': '150', 'scheduled_arrival': '1325', 'diverted': '0', 'taxi_out': '16', 'tail_number': 'N585AA', 'elapsed_time': '149', 'air_time': '117', 'airline': 'AA', 'departure_time': '1210', 'day': '1', 'departure_delay': '15', 'day_of_week': '4', 'arrival_time': '1339', 'wheels_on'

In [92]:
results, time = NeoQuery(
    """
    MATCH (:Airport {iata: 'ATL'})-[:HAS_DEPARTURE]->(fl:Flight {day: '1'})-[:FLIES_TO]->(ar:Airport {iata: 'CLT'})
    RETURN fl
    """
).query()

print(time)
[record for record in results]

0.0035252979996585054


[<Record fl=<Node id=470873 labels={'Flight'} properties={'wheels_off': '1636', 'taxi_in': '6', 'distance': '226', 'year': '2015', 'scheduled_time': '74', 'scheduled_arrival': '1734', 'diverted': '0', 'taxi_out': '16', 'tail_number': 'N355NB', 'elapsed_time': '61', 'air_time': '39', 'airline': 'DL', 'departure_time': '1620', 'day': '1', 'departure_delay': '0', 'day_of_week': '4', 'arrival_time': '1721', 'wheels_on': '1715', 'flight_number': '1441', 'origin_airport': 'ATL', 'destination_airport': 'CLT', 'month': '1', 'cancelled': '0', 'scheduled_departure': '1620', 'arrival_delay': '-13'}>>,
 <Record fl=<Node id=460014 labels={'Flight'} properties={'wheels_off': '1049', 'taxi_in': '13', 'distance': '226', 'year': '2015', 'scheduled_time': '75', 'scheduled_arrival': '1155', 'diverted': '0', 'taxi_out': '13', 'tail_number': 'N549UW', 'elapsed_time': '66', 'air_time': '40', 'airline': 'US', 'departure_time': '1036', 'day': '1', 'departure_delay': '-4', 'day_of_week': '4', 'arrival_time': '

Query for All flights with a specific tail number

In [93]:
results, time = NeoQuery(
    """
    MATCH (ar:Airport)-[:HAS_DEPARTURE]->(fl:Flight)-[:USES]->(tl:Tail {number: 'N129DL'})
    RETURN distinct fl
    """
).query(200)

print(time)
[record for record in results]

1.1943601919992943


[<Record fl=<Node id=466784 labels={'Flight'} properties={'wheels_off': '1422', 'taxi_in': '7', 'distance': '2139', 'year': '2015', 'scheduled_time': '269', 'scheduled_arrival': '2139', 'diverted': '0', 'taxi_out': '12', 'tail_number': 'N129DL', 'elapsed_time': '258', 'air_time': '239', 'airline': 'DL', 'departure_time': '1410', 'day': '1', 'departure_delay': '0', 'day_of_week': '4', 'arrival_time': '2128', 'wheels_on': '2121', 'flight_number': '61', 'origin_airport': 'SFO', 'destination_airport': 'ATL', 'month': '1', 'cancelled': '0', 'scheduled_departure': '1410', 'arrival_delay': '-11'}>>,
 <Record fl=<Node id=460084 labels={'Flight'} properties={'wheels_off': '1100', 'taxi_in': '8', 'distance': '2139', 'year': '2015', 'scheduled_time': '330', 'scheduled_arrival': '1310', 'diverted': '0', 'taxi_out': '17', 'tail_number': 'N129DL', 'elapsed_time': '302', 'air_time': '277', 'airline': 'DL', 'departure_time': '1043', 'day': '1', 'departure_delay': '3', 'day_of_week': '4', 'arrival_time

Query for all flights with specific tail number, but this query is slower

In [94]:
results, time = NeoQuery(
    """
    MATCH (ar:Airport)-[:HAS_DEPARTURE]->(fl:Flight {tail_number: 'N129DL'})
    RETURN fl
    """
).query(200)

print(time)
[record for record in results]

3.127421900000627


[<Record fl=<Node id=460084 labels={'Flight'} properties={'wheels_off': '1100', 'taxi_in': '8', 'distance': '2139', 'year': '2015', 'scheduled_time': '330', 'scheduled_arrival': '1310', 'diverted': '0', 'taxi_out': '17', 'tail_number': 'N129DL', 'elapsed_time': '302', 'air_time': '277', 'airline': 'DL', 'departure_time': '1043', 'day': '1', 'departure_delay': '3', 'day_of_week': '4', 'arrival_time': '1245', 'wheels_on': '1237', 'flight_number': '1680', 'origin_airport': 'ATL', 'destination_airport': 'SFO', 'month': '1', 'cancelled': '0', 'scheduled_departure': '1040', 'arrival_delay': '-25'}>>,
 <Record fl=<Node id=466784 labels={'Flight'} properties={'wheels_off': '1422', 'taxi_in': '7', 'distance': '2139', 'year': '2015', 'scheduled_time': '269', 'scheduled_arrival': '2139', 'diverted': '0', 'taxi_out': '12', 'tail_number': 'N129DL', 'elapsed_time': '258', 'air_time': '239', 'airline': 'DL', 'departure_time': '1410', 'day': '1', 'departure_delay': '0', 'day_of_week': '4', 'arrival_ti