In [1]:
import neo4j
import pandas as pd
import geopy.distance

### Search for good osmid pairs

## Neo4j Import

In [2]:
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "krakow123"
NEO4J_DATABASE = "neo4j"

driver = neo4j.GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD), database=NEO4J_DATABASE)

In [3]:
clear_data_query = """
    MATCH (n) 
    CALL {
        WITH n
        DETACH DELETE n
    } IN TRANSACTIONS OF 100000 ROWS
"""

clear_indexes_and_constrains = "CALL apoc.schema.assert({}, {}, true) YIELD label, key RETURN *"

node_constraint_query = "CREATE CONSTRAINT IF NOT EXISTS FOR (i:Intersection) REQUIRE i.osmid IS UNIQUE"

rel_index_query = "CREATE INDEX IF NOT EXISTS FOR ()-[r:ROAD_SEGMENT]-() ON r.osmid"
distance_index_query = "CREATE INDEX IF NOT EXISTS FOR (i:Intersection) ON i.distance"

# TODO: remember ro copy files to import folder in neo4j and change filenames in query

nodes_csv_load = """
    LOAD CSV WITH HEADERS FROM "file:///nodes_neo4j.csv" AS row
    CALL {
        WITH row
        MERGE (i:Intersection {osmid: toInteger(row.osmid)})
        SET i.latitude = toFloat(row.y), 
            i.longitude = toFloat(row.x),
            i.distance = toInteger(row.distance)
    } IN TRANSACTIONS OF 50000 ROWS
    RETURN COUNT(*) as total
    """
    
relationships_csv_load = """
    LOAD CSV WITH HEADERS FROM "file:///relationships_neo4j.csv" AS row
    CALL {
        WITH row
        MATCH (from:Intersection {osmid: toInteger(row.source)})
        MATCH (to:Intersection {osmid: toInteger(row.target)})
        MERGE (from)-[r:ROAD_SEGMENT {osmid: toInteger(row.osmid)}]->(to)
        SET r.length = toFloat(row.length),
            r.name = row.name,
            r.highway = row.highway
    } IN TRANSACTIONS OF 50000 ROWS
    RETURN COUNT(*) AS total
    """


In [4]:
def print_result(results):
    result = [dict(i) for i in results]
    print(result)
    
    result_all = results.consume()
    print(f"result_consumed_after: {result_all.result_consumed_after} ms")
    print(f"result_available_after: {result_all.result_available_after} ms\n")

In [5]:
def clear_indexes(tx):
    results = tx.run(clear_indexes_and_constrains)
    print_result(results)

In [6]:
def create_constraints(tx):
    results = tx.run(node_constraint_query)
    print_result(results)

    results = tx.run(rel_index_query)
    print_result(results)
    
    results = tx.run(distance_index_query)
    print_result(results)

### Clear database

In [7]:
with driver.session() as session:
    results = session.run(clear_data_query)
    print_result(results)
    
with driver.session() as session:
    session.execute_write(clear_indexes)

[]
result_consumed_after: 0 ms
result_available_after: 222 ms

[]
result_consumed_after: 15 ms
result_available_after: 57 ms



### Create constraints

In [8]:
with driver.session() as session:
    session.execute_write(create_constraints)

[]
result_consumed_after: 0 ms
result_available_after: 333 ms

[]
result_consumed_after: 0 ms
result_available_after: 31 ms

[]
result_consumed_after: 0 ms
result_available_after: 16 ms



### Load data

In [9]:
with driver.session() as session:
    results = session.run(nodes_csv_load)
    print_result(results)

[{'total': 13811811}]
result_consumed_after: 372132 ms
result_available_after: 258 ms



In [10]:
with driver.session() as session:
    results = session.run(relationships_csv_load)
    print_result(results)

[{'total': 28776032}]
result_consumed_after: 1655348 ms
result_available_after: 346 ms



### Search queries

```
EXPLAIN PROFILE
```

In [11]:
def dijkstra_query(tx, osmid_1, osmid_2):
    dijkstra_query_string = f"""
        MATCH (source:Intersection {{osmid: {osmid_1}}}) 
        MATCH (target:Intersection {{osmid: {osmid_2}}})
        CALL apoc.algo.dijkstra(source, target, "", "length")
        YIELD path, weight
        RETURN path, weight
        """
    print(dijkstra_query_string)
    results = tx.run(dijkstra_query_string)
    print_result(results)

In [12]:
def astar_query(tx, osmid_1, osmid_2):
    astar_query_string = f"""
        MATCH (source:Intersection {{osmid: {osmid_1}}}) 
        MATCH (target:Intersection {{osmid: {osmid_2}}})
        CALL apoc.algo.aStar(source, target, "", "length", "latitude", "longitude")
        YIELD path, weight
        RETURN path, weight
        """
    print(astar_query_string)
    results = tx.run(astar_query_string)
    print_result(results)

In [14]:
gdf_nodes = pd.read_csv("d:/AGH/NOVA/dm/data/krakow_30/nodes_neo4j.csv")

In [15]:
# two random osmid
distance = 0

while distance < 25:
    start_and_finish = gdf_nodes.sample(2)

    osmid_1 = int(start_and_finish.iloc[0]["osmid"])
    osmid_2 = int(start_and_finish.iloc[1]["osmid"])

    coords_1 = start_and_finish.iloc[0]["y"], start_and_finish.iloc[0]["x"]
    coords_2 = start_and_finish.iloc[1]["y"], start_and_finish.iloc[1]["x"]
    
    distance = geopy.distance.distance(coords_1, coords_2).km

print(f"osmid_1: {osmid_1}, osmid_2: {osmid_2}, distance: {distance} km")

osmid_1: 2271235782, osmid_2: 7036947138, distance: 44.44996557714789 km


In [35]:
# # zakamycze, betel
# node_nr_1 = 356926768
# node_nr_2 = 2104495834

# random 25km apart
node_nr_1 = 357538738
node_nr_2 = 9460794927

#long distance
node_nr_1 = 4538716538
node_nr_2 = 432648629

# random 300km apart
# node_nr_1 = 7576736742
# node_nr_2 = 3041989068

In [36]:
with driver.session() as session:
    session.execute_write(dijkstra_query, node_nr_1, node_nr_2)


        MATCH (source:Intersection {osmid: 4538716538}) 
        MATCH (target:Intersection {osmid: 432648629})
        CALL apoc.algo.dijkstra(source, target, "", "length")
        YIELD path, weight
        RETURN path, weight
        
[{'path': <Path start=<Node element_id='4:13cda273-def0-4740-9b9a-bb3ab19f712a:3002880' labels=frozenset({'Intersection'}) properties={'distance': 2, 'osmid': 4538716538, 'latitude': 50.8102762, 'longitude': 19.1043495}> end=<Node element_id='4:13cda273-def0-4740-9b9a-bb3ab19f712a:7016244' labels=frozenset({'Intersection'}) properties={'distance': 1, 'osmid': 432648629, 'latitude': 49.6197581, 'longitude': 20.6938179}> size=6479>, 'weight': 197753.47900000022}]
result_consumed_after: 592225 ms
result_available_after: 90 ms



In [34]:
with driver.session() as session:
    session.execute_write(astar_query, node_nr_1, node_nr_2)


        MATCH (source:Intersection {osmid: 4538716538}) 
        MATCH (target:Intersection {osmid: 432648629})
        CALL apoc.algo.aStar(source, target, "", "length", "latitude", "longitude")
        YIELD path, weight
        RETURN path, weight
        
[{'path': <Path start=<Node element_id='4:13cda273-def0-4740-9b9a-bb3ab19f712a:3002880' labels=frozenset({'Intersection'}) properties={'distance': 2, 'osmid': 4538716538, 'latitude': 50.8102762, 'longitude': 19.1043495}> end=<Node element_id='4:13cda273-def0-4740-9b9a-bb3ab19f712a:7016244' labels=frozenset({'Intersection'}) properties={'distance': 1, 'osmid': 432648629, 'latitude': 49.6197581, 'longitude': 20.6938179}> size=6479>, 'weight': 197753.47900000022}]
result_consumed_after: 11104 ms
result_available_after: 28 ms

