In [None]:
import osmium
import sys
from neo4j import GraphDatabase, basic_auth, unit_of_work
from copy import deepcopy
from math import radians, cos, sin, asin, sqrt
import os
from dotenv import load_dotenv

In [None]:
# url = 'http://download.geofabrik.de/europe/monaco-latest.osm.pbf'
url = 'sofia_bulgaria.osm.pbf'

In [None]:
load_dotenv()
driver = GraphDatabase.driver(os.getenv('CON_STRING'),
                              auth=basic_auth(os.getenv('CON_USER'), os.getenv('CON_PASS')))

In [None]:
class GraphNode:
    def __init__(self, node_id, location):
        self.node_id = deepcopy(node_id)
        self.long, self.lat  = map(float, deepcopy(str(location)).split('/'))

    def __repr__(self):
        return f'{{node_id: {self.node_id}, lat: {self.lat}, long: {self.long}}}'

In [None]:
class GraphEdge:
    def __init__(self, start_node_id, end_node_id, distance, rating):
        self.start_node_id = start_node_id
        self.end_node_id = end_node_id
        self.distance = distance
        self.rating = rating
        self.cost = distance / rating

    def __repr__(self):
        return f'{{start_node_id: {self.start_node_id}, end_node_id: {self.end_node_id}, distance: {self.distance}, rating: {self.rating}, cost: {self.cost}}}'

In [None]:
def split_list_to_batches(initial_list, batch_size = 4000):
    for i in range(0, len(initial_list), batch_size):
        yield initial_list[i:i + batch_size]

In [None]:

def haversine(node_a, node_b):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [node_a.long, node_a.lat, node_b.long, node_b.lat])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

In [None]:
class FileHandler(osmium.SimpleHandler):
    def __init__(self, neoHandler):
        super(FileHandler, self).__init__()
        self.neoHandler = neoHandler

    def node(self, n):
        node = GraphNode(n.id, n.location)
        self.neoHandler.add_node(node)

    def way(self, w):
        if w.nodes[0].ref == w.nodes[-1].ref:
            return
        for i in range(len(w.nodes)-1):
            start = GraphNode(w.nodes[i].ref, w.nodes[i].location)
            end = GraphNode(w.nodes[i+1].ref, w.nodes[i+1].location)
            distance = haversine(start, end)
            edge = GraphEdge(start.node_id, end.node_id, distance, 4)
            self.neoHandler.add_edge(edge)

In [None]:
def batch_nodes(session, nodes, batch_size=10000):
    for i, batch in enumerate(split_list_to_batches(nodes, batch_size)):
        with session.begin_transaction() as tx:
            print(f'Start node transaction {i}')
            # batch_string_list = ','.join(map(lambda x: str(x), batch))
            tx.run(f'''
                FOREACH (
                    node IN {batch} |
                    CREATE (
                        :Node{{
                            node_id:node.node_id,
                            lat:node.lat,
                            long:node.long
                        }}
                    )
                )
            ''')
            tx.commit()
            print(f'Close node transaction {i}')

In [None]:
def batch_edges(session, edges, batch_size=100):
    for i, batch in enumerate(split_list_to_batches(edges, batch_size)):
        
        with session.begin_transaction() as tx:
            print(f'Start edge transaction {i}')
            # batch_string_list = ','.join(map(lambda x: str(x), batch))
            tx.run(f'''
                UNWIND {str(batch)} as edge
                MATCH
                    (a: Node{{
                        node_id: edge.start_node_id
                    }}),
                    (b: Node{{
                        node_id: edge.end_node_id
                    }})
                CREATE (a)-[r:Route {{
                    distance: edge.distance,
                    rating: edge.rating,
                    cost: edge.cost
                }}]->(b)
            ''')
            tx.commit()
            print(f'Close edge transaction {i}')

In [None]:
class Neo4jHandler():
    def __init__(self):
        self.driver = driver
        self.nodes = []
        self.edges = []

    def add_node(self, node):
        self.nodes.append(node)

    def add_edge(self, edge):
        self.edges.append(edge)

    def create_objects(self):
        with self.driver.session() as session:
            batch_nodes(session, self.nodes)
            batch_edges(session, self.edges)

    def close(self):
        self.driver.close()

In [None]:
with driver.session() as session:
    session.run('MATCH (n) DETACH DELETE n')

In [None]:
%%time
neo_handler = Neo4jHandler()
h = FileHandler(neo_handler)

h.apply_file(url, locations=True)
print(f'Nodes: {len(neo_handler.nodes)}')
print(f'Edges: {len(neo_handler.edges)}')

neo_handler.create_objects()
neo_handler.close()
print('finished')