In [25]:
import time

from neo4j import GraphDatabase

In [27]:
ITERATIONS = 100

In [19]:
URI = "bolt://localhost:7687"
driver = GraphDatabase.driver(URI, auth=("neo4j", "password"))

In [20]:
def check_connection(driver):
    with driver.session() as session:
        result = session.run("CALL dbms.components()")
        for record in result:
            print(record)


check_connection(driver)

<Record name='Neo4j Kernel' versions=['5.26.0'] edition='community'>


## Które popularne lotniska (minimum 30 połączeń) nie są ze sobą połączone?


In [22]:
query = """
MATCH (t1:Terminal)-[r:FLIGHT_TO]->(t2:Terminal)
WITH t1, count(r) as routes_count
WHERE routes_count > 30
WITH collect(t1) as major_airports
UNWIND major_airports as airport1
UNWIND major_airports as airport2
WITH airport1, airport2
WHERE airport1 <> airport2
AND NOT (airport1)-[:FLIGHT_TO]->(airport2)
RETURN
    airport1.name as Airport1,
    airport1.iata as IATA1,
    airport2.name as Airport2,
    airport2.iata as IATA2,
    point.distance(airport1.location, airport2.location)/1000 as Distance_KM
ORDER BY Distance_KM
LIMIT 10
"""

In [31]:
with driver.session() as session:
    # get timestamp with microsecond
    start = time.time()
    for _ in range(ITERATIONS):
        result = session.run(query)
    end = time.time()

print(f"Query took {(end - start)/ITERATIONS} seconds on average")

Query took 0.00301501989364624 seconds


In [33]:
# let's create indices
with driver.session() as session:
    session.run("""
    CREATE INDEX terminal_details IF NOT EXISTS
    FOR (t:Terminal)
    ON (t.name, t.iata, t.location)
    """)
    # create index on flight_to relationship to speed up the query
    session.run("""
    CREATE INDEX flight_source IF NOT EXISTS
    FOR ()-[r:FLIGHT_TO]->()
    ON (r.source)
    """)
    session.run("""
    CREATE INDEX flight_destination IF NOT EXISTS
    FOR ()-[r:FLIGHT_TO]->()
    ON (r.destination)
    """)


In [35]:
with driver.session() as session:
    # get timestamp with microsecond
    start = time.time()
    for _ in range(ITERATIONS):
        result = session.run(query)
    end = time.time()

print(f"Query took {(end - start)/ITERATIONS} seconds on average")

Query took 0.0018349123001098634 seconds on average


## Które linie lotnicze mają najbardziej zróżnicowane floty?

In [36]:
query = """
MATCH (a:Airline)-[op:OPERATES]->(p:PlaneType)
WITH
    a.name as airline_name,
    a.iata as airline_code,
    COUNT(DISTINCT p) as plane_type_count,
    COLLECT(DISTINCT p.name) as fleet
RETURN
    airline_name as Airline,
    airline_code as IATA,
    plane_type_count as Number_of_Plane_Types,
    fleet as Fleet_List
ORDER BY plane_type_count DESC
"""

In [37]:
with driver.session() as session:
    # get timestamp with microsecond
    start = time.time()
    for _ in range(ITERATIONS):
        result = session.run(query)
    end = time.time()

print(f"Query took {(end - start)/ITERATIONS} seconds on average")

Query took 0.005170118808746338 seconds on average


In [38]:
with driver.session() as session:
    session.run("""
CREATE INDEX airline_details IF NOT EXISTS
FOR (a:Airline)
ON (a.name, a.iata)
""")
    session.run("""
CREATE INDEX plane_type_name IF NOT EXISTS
FOR (p:PlaneType)
ON (p.name)
""")

In [39]:
with driver.session() as session:
    # get timestamp with microsecond
    start = time.time()
    for _ in range(ITERATIONS):
        result = session.run(query)
    end = time.time()


print(f"Query took {(end - start)/ITERATIONS} seconds on average")

Query took 0.0023398399353027344 seconds on average
