### Database creation and loading

##### Initiate driver and connect to local database

In [1]:
import neo4j
from neo4j import GraphDatabase

# Establishing connection with remote db
URI = "bolt://localhost:7687"
AUTH = ("dbms_user", "dbms1234")

driver = GraphDatabase.driver(URI, auth=AUTH)
driver.verify_connectivity()

##### Create database named flights

In [2]:
query = """
CREATE OR REPLACE DATABASE flights
"""

driver.execute_query(query)

EagerResult(records=[], summary=<neo4j._work.summary.ResultSummary object at 0x0000028F955EE650>, keys=[])

##### To make ingestion faster, create index on airport code for airport nodes (two or modes nodes cannot have same aiport code)

In [3]:
query = """
CREATE INDEX airport_code FOR (n:airport) ON (n.airportID)
"""

driver.execute_query(query,database_='flights')

EagerResult(records=[], summary=<neo4j._work.summary.ResultSummary object at 0x0000028F955EDF90>, keys=[])

##### Load CSV file (flights_neo4j.csv) into database flights

In [4]:
query = """
LOAD CSV WITH HEADERS FROM 'file:///flights_neo4j.csv' AS row  FIELDTERMINATOR ','
MERGE (a:airport {airportID: row.Dest})
MERGE (b:airport {airportID: row.Origin})
WITH a,b,row
CREATE (a)-[f:CONNECTS_TO]->(b)
SET f.CRSDepTime = toInteger(row.CRSDepTime), 
f.Marketing_Airline_Network = row.Marketing_Airline_Network,
f.CRSElapsedTime = toInteger(row.CRSElapsedTime)
"""

driver.execute_query(query,database_='flights')

EagerResult(records=[], summary=<neo4j._work.summary.ResultSummary object at 0x0000028F955FB3D0>, keys=[])

### Run Dijkstra Shortest Path Algorithm

##### Create a graph projection

In [12]:
query = """

CALL gds.graph.project(
    'myGraph',
    'airport',
    'CONNECTS_TO',
    {
        relationshipProperties: 'CRSElapsedTime'
    }
)


"""

driver.execute_query(query, result_transformer_=neo4j.Result.to_df, database_='flights')

Unnamed: 0,nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,projectMillis
0,"{'airport': {'label': 'airport', 'properties':...","{'CONNECTS_TO': {'aggregation': 'DEFAULT', 'or...",myGraph,330,20019,37


##### Run algorithm to find the shortest path between Trenton (New Jersey) and Anchorage (Alaska) airports, weighted on flight time duration

In [13]:
query = """

MATCH (start:airport{airportID:"TTN"}), (end:airport{airportID:"ANC"})
CALL gds.shortestPath.dijkstra.stream('myGraph', {
    sourceNode: start,
    targetNode: end,
    relationshipWeightProperty: 'CRSElapsedTime'
})
YIELD index, sourceNode, targetNode, totalCost, nodeIds, costs, path
RETURN
    index,
    gds.util.asNode(sourceNode).airportID AS sourceNodeName,
    gds.util.asNode(targetNode).airportID AS targetNodeName,
    totalCost,
    [nodeId IN nodeIds | gds.util.asNode(nodeId).airportID] AS nodeNames,
    costs,
    nodes(path) as path
ORDER BY index


"""

driver.execute_query(query,  result_transformer_=neo4j.Result.to_df, database_='flights')


Unnamed: 0,index,sourceNodeName,targetNodeName,totalCost,nodeNames,costs,path
0,0,TTN,ANC,548.0,"[TTN, ATL, ANC]","[0.0, 132.0, 548.0]","[(airportID), (airportID), (airportID)]"
