In [2]:
import osmium
import sys
from neo4j import GraphDatabase, basic_auth, unit_of_work
from copy import deepcopy
from math import radians, cos, sin, asin, sqrt
import os
from dotenv import load_dotenv
from time import time
from utils import *

In [None]:
from logging import getLogger, StreamHandler, DEBUG
handler = StreamHandler()
handler.setLevel(DEBUG)
getLogger("neo4j").addHandler(handler)

In [12]:
# url = 'http://download.geofabrik.de/europe/monaco-latest.osm.pbf'
url = 'sofia_bulgaria.osm.pbf'

In [3]:
load_dotenv()
driver = GraphDatabase.driver(os.getenv('CON_STRING'),
                              auth=basic_auth(os.getenv('CON_USER'), os.getenv('CON_PASS')))

In [14]:
class FileHandler(osmium.SimpleHandler):
    def __init__(self, neoHandler):
        super(FileHandler, self).__init__()
        self.neoHandler = neoHandler

    def node(self, n):
        node = GraphNode(n.id, n.location)
        self.neoHandler.add_node(node)

    def way(self, w):
        if w.nodes[0].ref == w.nodes[-1].ref:
            return
        for i in range(len(w.nodes)-1):
            start = GraphNode(w.nodes[i].ref, w.nodes[i].location)
            end = GraphNode(w.nodes[i+1].ref, w.nodes[i+1].location)
            distance = haversine(start, end)
            edge = GraphEdge(start.node_id, end.node_id, distance, 4)
            self.neoHandler.add_edge(edge)

In [28]:
def batch_nodes(session, nodes, batch_size=7500):
    for i, batch in enumerate(split_list_to_batches(nodes, batch_size)):
        if (i*batch_size >= 500000):
            return
        start_time = time()
        print(f'Start node transaction {i:>3}')
        session.run(f'''
            FOREACH (
                node IN {str(batch)} |
                CREATE (
                    :Node{{
                        node_id:node.node_id,
                        lat:node.lat,
                        long:node.long
                    }}
                )
            )
        ''')
        print(f'Close node transaction {i:>3} Elapsed time: {time()  - start_time:.2f} seconds')

In [29]:
def batch_edges(session, edges, batch_size=200):
    return
    for i, batch in enumerate(split_list_to_batches(edges, batch_size)):
        start_time = time()
        print(f'Start edge transaction {i:>3}')
        session.run(f'''
            UNWIND {str(batch)} as edge
            MATCH
                (a: Node{{
                    node_id: edge.start_node_id
                }}),
                (b: Node{{
                    node_id: edge.end_node_id
                }})
            CREATE (a)-[r:Route {{
                distance: edge.distance,
                rating: edge.rating,
                cost: edge.cost
            }}]->(b)
        ''')
        print(f'Close edge transaction {i:>3} Elapsed time: {time() - start_time:.2f} seconds')

In [37]:
class Neo4jHandler():
    def __init__(self, driver):
        self.driver = driver
        self.nodes = []
        self.edges = []

    def add_node(self, node):
        self.nodes.append(node)

    def add_edge(self, edge):
        self.edges.append(edge)

    def create_objects(self):
        with self.driver.session() as session:
            batch_nodes(session, self.nodes, 7500)
            batch_edges(session, self.edges)
            self.close()

    def close(self):
        self.driver.close()

In [18]:
with driver.session() as session:
    session.run('MATCH (n) DETACH DELETE n')

In [38]:
%%time
neo_handler = Neo4jHandler(driver)
h = FileHandler(neo_handler)

h.apply_file(url, locations=True)
print(f'Nodes: {len(neo_handler.nodes)}')
print(f'Edges: {len(neo_handler.edges)}')

neo_handler.create_objects()
print('finished')

RuntimeError: Way callback keeps reference to OSM object. This is not allowed.