In [1]:
!pip install neo4j

Collecting neo4j
  Downloading neo4j-5.7.0.tar.gz (176 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.3/176.3 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: neo4j
  Building wheel for neo4j (pyproject.toml) ... [?25ldone
[?25h  Created wheel for neo4j: filename=neo4j-5.7.0-py3-none-any.whl size=243940 sha256=4cf16ad0b5758f1399abe0f66e09787047f8eefc3d23aa3e00ba51431f027087
  Stored in directory: /Users/dbabichenko/Library/Caches/pip/wheels/6d/be/1e/749b84bba4b7429145ea09ccb46114fab4748625eb319f286a
Successfully built neo4j
Installing collected packages: neo4j
Successfully installed neo4j-5.7.0


In [69]:
from neo4j import GraphDatabase
from uuid import uuid4
import pandas as pd

In [70]:
# The following class is based on the "Create a graph database in Neo4j using Python" 
# tutorial by CJ Sullivan, Feb 10, 2021. 
# https://towardsdatascience.com/create-a-graph-database-in-neo4j-using-python-4172d40f89c4


class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response
    
    def query_to_dataframe(self, query, parameters=None, db=None):
        df = pd.DataFrame([dict(_) for _ in self.query(query, parameters, db)])
        return df


In [71]:
conn = Neo4jConnection(uri="bolt://localhost:7687", 
                       user="neo4j",              
                       pwd="admin123")

In [73]:
for i in range(0, 10):
    qry = "CREATE (f:Flight) SET f.flight_id = '" + str(uuid4()) + "' RETURN f;"
    #print(qry)
    result = conn.query(qry)
    #print(result)

In [74]:
qry = "MATCH (f:Flight) RETURN f.flight_id;"
result = conn.query(qry)
print(result)

[<Record f.flight_id='c359e4e5-3c76-49e3-8cf2-2071d95862f7'>, <Record f.flight_id='6b1533b0-5255-4e03-bf10-850ac63c05a7'>, <Record f.flight_id='c88dc0a8-dbdf-4088-bb12-484b9dc676cb'>, <Record f.flight_id='926b243e-c13b-4307-8f56-89fa7a0cf891'>, <Record f.flight_id='9a9f726e-ac43-481a-80a4-a23f891a68cc'>, <Record f.flight_id='52801f83-9fcd-46e9-82d6-8406c88f67d2'>, <Record f.flight_id='563c1049-485e-4091-a491-a06d746306b1'>, <Record f.flight_id='e92402ff-497f-4a00-aa0c-577db6ff6665'>, <Record f.flight_id='f7ceabd1-6fd6-439d-9a12-4b9cdd21b88b'>, <Record f.flight_id='defe8857-09cf-4432-8461-0df3ecfa279b'>, <Record f.flight_id='42b8f142-f6b0-4cdc-b2da-078b18dde2a4'>]


In [75]:
for node in result:
    print(node[0])

c359e4e5-3c76-49e3-8cf2-2071d95862f7
6b1533b0-5255-4e03-bf10-850ac63c05a7
c88dc0a8-dbdf-4088-bb12-484b9dc676cb
926b243e-c13b-4307-8f56-89fa7a0cf891
9a9f726e-ac43-481a-80a4-a23f891a68cc
52801f83-9fcd-46e9-82d6-8406c88f67d2
563c1049-485e-4091-a491-a06d746306b1
e92402ff-497f-4a00-aa0c-577db6ff6665
f7ceabd1-6fd6-439d-9a12-4b9cdd21b88b
defe8857-09cf-4432-8461-0df3ecfa279b
42b8f142-f6b0-4cdc-b2da-078b18dde2a4


In [76]:
df = conn.query_to_dataframe(qry)
df.head()

Unnamed: 0,f.flight_id
0,c359e4e5-3c76-49e3-8cf2-2071d95862f7
1,6b1533b0-5255-4e03-bf10-850ac63c05a7
2,c88dc0a8-dbdf-4088-bb12-484b9dc676cb
3,926b243e-c13b-4307-8f56-89fa7a0cf891
4,9a9f726e-ac43-481a-80a4-a23f891a68cc


In [78]:
df = pd.read_csv('flights.csv')
df.head()

Unnamed: 0,dep_time,sched_dep_time,dep_delay,arr_time,sched_arr_time,arr_delay,carrier,flight,tailnum,origin,dest,air_time,distance,std,sta,atd,ata,date
0,1807.0,1630,97.0,1956.0,1837,79.0,EV,4411,N13566,EWR,MEM,144.0,946,16:30:00,18:37:00,18:07:00,19:56:00,2/26/13
1,1459.0,1445,14.0,1801.0,1747,14.0,B6,1171,N661JB,LGA,FLL,147.0,1076,14:45:00,17:47:00,14:59:00,18:01:00,8/17/13
2,1812.0,1815,-3.0,2055.0,2125,-30.0,AS,7,N403AS,EWR,SEA,315.0,2402,18:15:00,21:25:00,18:12:00,20:55:00,2/13/13
3,2122.0,2115,7.0,2339.0,2353,-14.0,B6,97,N656JB,JFK,DEN,221.0,1626,21:15:00,23:53:00,21:22:00,23:39:00,4/11/13
4,1832.0,1835,-3.0,2145.0,2155,-10.0,AA,269,N3EYAA,JFK,SEA,358.0,2422,18:35:00,21:55:00,18:32:00,21:45:00,8/5/13


In [90]:
for airport in set(list(df['origin'].unique()) + list(df['dest'].unique())):
    qry = "CREATE (a:Airport {id: '" + airport + "'}) RETURN a;"
    print(qry)
    conn.query(qry)

CREATE (a:Airport {id: 'DEN'}) RETURN a;
CREATE (a:Airport {id: 'ATL'}) RETURN a;
CREATE (a:Airport {id: 'DCA'}) RETURN a;
CREATE (a:Airport {id: 'IAH'}) RETURN a;
CREATE (a:Airport {id: 'SFO'}) RETURN a;
CREATE (a:Airport {id: 'STL'}) RETURN a;
CREATE (a:Airport {id: 'CLE'}) RETURN a;
CREATE (a:Airport {id: 'MIA'}) RETURN a;
CREATE (a:Airport {id: 'EWR'}) RETURN a;
CREATE (a:Airport {id: 'IAD'}) RETURN a;
CREATE (a:Airport {id: 'LAX'}) RETURN a;
CREATE (a:Airport {id: 'IND'}) RETURN a;
CREATE (a:Airport {id: 'ORD'}) RETURN a;
CREATE (a:Airport {id: 'MDW'}) RETURN a;
CREATE (a:Airport {id: 'LGA'}) RETURN a;
CREATE (a:Airport {id: 'FLL'}) RETURN a;
CREATE (a:Airport {id: 'JFK'}) RETURN a;
CREATE (a:Airport {id: 'CLT'}) RETURN a;
CREATE (a:Airport {id: 'DFW'}) RETURN a;
CREATE (a:Airport {id: 'SEA'}) RETURN a;
CREATE (a:Airport {id: 'MSY'}) RETURN a;
CREATE (a:Airport {id: 'TPA'}) RETURN a;
CREATE (a:Airport {id: 'JAX'}) RETURN a;
CREATE (a:Airport {id: 'SRQ'}) RETURN a;
CREATE (a:Airpor

In [92]:
for idx, row in df.iterrows():
    qry = "MATCH (a1:Airport),(a2:Airport) WHERE a1.id = '" + row['origin'] + "' "
    qry += " AND a2.id = '" + row['dest'] + "' "
    qry += " CREATE (a1)-[f1:FLIES_TO]->(a2) "
    qry += " CREATE (a1)<-[f2:FLIES_TO]-(a2) "
    qry += " RETURN a1, a2, f1, f2;"
    #print(qry)
    conn.query(qry)

In [93]:
qry = "MATCH (a:Airport) RETURN a.id;"
data = conn.query_to_dataframe(qry)
data

Unnamed: 0,a.id
0,DEN
1,ATL
2,DCA
3,IAH
4,SFO
5,STL
6,CLE
7,MIA
8,EWR
9,IAD
