In [None]:
%pip install neo4j-driver

##### Dataset
The original dataset can be found in "https://github.com/krlawrence/graph"

In [43]:
#import libraries
from neo4j import GraphDatabase, basic_auth
import pandas as pd

In [44]:
#define sandbox credentials
uri= "bolt://xx.xx.xx.xx:7687"
pwd = "closures-lifts-payroll"

In [45]:
#establish connection to the Graph Database
driver = GraphDatabase.driver(uri, auth=basic_auth("neo4j", pwd))

In [46]:
driver

<neo4j._sync.driver.BoltDriver at 0x2d496d9c2d0>

In [47]:
#Get Node Labels and relationship types

def get_node_labels(tx):
    result = tx.run("CALL db.labels()")
    return [record["label"] for record in result]

def get_relationship_types(tx):
    result = tx.run("CALL db.relationshipTypes()")
    return [record["relationshipType"] for record in result]

with driver.session() as session:
    node_labels = session.execute_read(get_node_labels)
    relationship_types = session.execute_read(get_relationship_types)

# Print the results
print("Node Labels:", node_labels)
print("Relationship Types:", relationship_types)

Node Labels: ['Airport', 'City', 'Region', 'Country', 'Continent']
Relationship Types: ['IN_CITY', 'IN_COUNTRY', 'IN_REGION', 'ON_CONTINENT', 'HAS_ROUTE']


In [48]:
# Initialize an empty list to store the results
data = []

In [49]:
#Query the number of nodes
query = "MATCH (n) RETURN COUNT(n)"

In [50]:
with driver.session() as session:
    result = session.run(query)
    print(result.single())

<Record COUNT(n)=8627>


In [51]:
with driver.session() as session:
    result = session.run(query)
    print(result.single()["COUNT(n)"])

8627


In [52]:
with driver.session() as session:
    result = session.run(query)
    count = result.single()["COUNT(n)"]
    data.append({"Entity": "Nodes", "Count": count})
    print(f'Number of Nodes: {count}')

Number of Nodes: 8627


In [53]:
# Query number of Country nodes
query = "MATCH (n:Country) RETURN COUNT(n)"

In [54]:
with driver.session() as session:
    result = session.run(query)
    # Get the single result record
    print(result.single())

<Record COUNT(n)=232>


In [55]:
with driver.session() as session:
    result = session.run(query)
    # Get the single result record
    print(result.single()["COUNT(n)"])

232


In [56]:
with driver.session() as session:
    result = session.run(query)
    count = result.single()["COUNT(n)"]
    data.append({"Entity": "Countries", "Count": count})
    print(f'Number of countries: {count}')

Number of countries: 232


In [57]:
# Query number of Airports
query = "MATCH (n:Airport) RETURN COUNT(n)"

In [58]:
with driver.session() as session:
    result = session.run(query)
    count = result.single()["COUNT(n)"]
    data.append({"Entity": "Airports", "Count": count})
    print(f'Number of Airports: {count}')

Number of Airports: 3503


In [59]:
# Query number of Cities
query = "MATCH (n:City) RETURN COUNT(n)"

In [60]:
with driver.session() as session:
    result = session.run(query)
    count = result.single()["COUNT(n)"]
    data.append({"Entity": "Cities", "Count": count})
    print(f'Number of Cities: {count}')

Number of Cities: 3359


In [61]:
# Query number of Regions
query = "MATCH (n:Region) RETURN COUNT(n)"

In [62]:
with driver.session() as session:
    result = session.run(query)
    count = result.single()["COUNT(n)"]
    data.append({"Entity": "Regions", "Count": count})
    print(f'Number of Regions: {count}')

Number of Regions: 1527


In [63]:
# Query number of Continents
query = "MATCH (n:Continent) RETURN COUNT(n)"

In [64]:
with driver.session() as session:
    result = session.run(query)
    count = result.single()["COUNT(n)"]
    data.append({"Entity": "Continents", "Count": count})
    print(f'Number of Continents: {count}')

Number of Continents: 6


In [65]:
# Convert the list of results into a pandas DataFrame
df = pd.DataFrame(data)

# Print the DataFrame
print(df)

       Entity  Count
0       Nodes   8627
1   Countries    232
2    Airports   3503
3      Cities   3359
4     Regions   1527
5  Continents      6


In [66]:
#Distribution of the number of airports per continent 

In [38]:
query = "MATCH (:Airport)-[:ON_CONTINENT]->(c:Continent) RETURN c.name AS continentName, count(*) AS numAirports ORDER BY numAirports DESC"

In [67]:
def get_airports_per_continent(tx):
    query = """
    MATCH (:Airport)-[:ON_CONTINENT]->(c:Continent)
    RETURN c.name AS continentName, count(*) AS numAirports
    ORDER BY numAirports DESC
    """
    result = tx.run(query)
    return [{"continentName": record["continentName"], "numAirports": record["numAirports"]} for record in result]

# Get airports per continent
with driver.session() as session:
    airports_per_continent = session.execute_read(get_airports_per_continent)

In [68]:
# Convert the results to a DataFrame
airports_df = pd.DataFrame(airports_per_continent)

# Print the DataFrame
print(airports_df)

  continentName  numAirports
0            NA          989
1            AS          971
2            EU          605
3            AF          321
4            SA          313
5            OC          304
