In [26]:
import numpy as np
import pandas as pd
import regex as re
import xml.dom.minidom as xml
import xml.etree.ElementTree as ET
import csv
import codecs
from math import radians, cos, sin, asin, sqrt

In [68]:
def haversine_distance(lon1, lat1, lon2, lat2, unit_m=True):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)
    default unit : km
    """
    # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * asin(sqrt(a))
    r = 6371  # Radius of the Earth in kilometers. Use 3956 for miles
    if unit_m:
        r *= 1000
    return c * r

def add_vertex(v):
    global graph
    global vertices_no
    if v in graph:
        print("")
    else:
        vertices_no = vertices_no + 1
        graph[v] = []

# Add an edge between vertex v1 and v2 with edge weight e
def add_edge(v1, v2, e):
    global graph
  # Check if vertex v1 is a valid vertex
    if v1 not in graph:
        print('')
  # Check if vertex v2 is a valid vertex
    
    elif v2 not in graph:
        print('')
    else:
    # Since this code is not restricted to a directed or 
    # an undirected graph, an edge between v1 v2 does not
    # imply that an edge exists between v2 and v1
        temp = [v2, e]
        graph[v1].append(temp)

# Print the graph
def print_graph():
    global graph
    for vertex in graph:
        for edges in graph[vertex]:
            print(vertex, " -> ", edges[0], " edge weight: ", edges[1])



In [69]:
file = open('josm.txt')
lines = file.readlines()

nodes = {}
way = []
ways = {}
checkpoints = {}

way_flag = 0 ## to understand <way is closed by </way>
save_flag = 0 ## to save

for line in lines: ## iterate over the osm file 

    if (way_flag == 1):

        if(line.find("k=\'highway\'") != -1): ## if highway
            if(line.find("v=\'footway\'") == -1): ## if not footway
                save_flag = 1 ## save it when find /way
                continue
            continue
        
        elif(line.find("</way>")) != -1:

            way_flag = 0
            if(save_flag == 1): ## if we found way closing tag and its highway not footway save
                ways[way[0]] = way[1:]
                save_flag = 0
            way = []

        elif(line.find("<nd ref")) != -1:
            
            x = re.findall(r'\d+.\d+',line) ## extract digits from the string
            way.append(x[0])
            if x[0] in nodes.keys():
                node = nodes[x[0]]
                node[2] = node[2]+1
                node.append(way[0])

        
    ## NODE ADDING ##
    elif (line.find("<node id=") != -1): ## if the line starts with <node id

        node = [] 
        x = re.findall(r'\d+.\d+',line) ## extract digits from the string
        node.append(x[-2]) ## lat
        node.append(x[-1]) ## lon
        node.append(0)
        nodes[x[0]] = node ## add to nodes list
    
    ## WAY ID ADDING ##
    elif (line.find("<way id=") != -1 and way_flag == 0): ## if the line starts with <way id    
        x = re.findall(r'\d+.\d+',line)
        way.append(x[0])
        way_flag = 1
        continue


In [70]:
for key, value in nodes.items():
    if value[2] > 1:
        checkpoints[key] = value
    

### This List Contains nodes with their lat lon, how many way they are in, and id's of the ways

for key, value in nodes.items():
    print(key,value)

### This list contains the nodes and their adjacent nodes

In [72]:
adj_list = {}

for key, value in ways.items():
    length = len(value)
    i = 0
    while i < length-1:
        temp_list = []
        if value[i] in adj_list.keys():
            temp_list = adj_list[value[i]]
        
        temp_list.append(value[i+1])
        adj_list[value[i]] = temp_list
        i+=1
    

### This list contains the nodes and their adjacent nodes

for key, value in adj_list.items():
    print(key,':',value)

In [75]:
graph = {}
vertices_no = 0

for key, val in adj_list.items():
    length = len(value)
    lat1 = float(nodes[key][0])
    lon1 = float(nodes[key][1])
    print(val)
    add_vertex(key)
    for node in val:
        lat2 = float(nodes[node][0])
        lon2 = float(nodes[node][1])
        distance = haversine_distance(lon1,lat1,lon2,lat2)
        add_edge(key,node,distance)
        
print_graph()

['29515666']

['29515667']

['2024879435']

['29515669']

['29515670']

['29515671']

['4022352264']

['29515672']

['3155432540']

['29515673']

['29515674']

['29515675']

['29515677']

['29515678']

['29515679']

['3155432538']

['29515680']

['29515681']

['3155432537']

['29515682']

['29515683']

['29515685']

['3155432535']

['29515686']

['29515687']

['3155425773']

['29515688']

['2024879288', '3155425775']


['29515690']

['29515691']

['29515692']

['29515693']

['29515695', '2024879370']


['29515696']

['3155425767']

['29515698']

['29515699']

['29515700']

['29515703']

['29515704']

['29515705']

['29515707']

['29515708']

['29515709', '2024879404']


['29515710']

['29515711']

['29515712', '2024879436']


['29515713']

['3155425764']

['29515715']

['2024879574']

['29515717', '4088504253']


['29515718']

['2024879744']

['2024879788', '2024879755']


['2024879831', '2024879807']


['8982649766', '2024879881']


['8982649764', '8982649767']


['29515721', '8982649


['3742407774']

['3742407786']

['3742407789']

['3742407784']

['3742407778']

['3742407779']

['3742407791']

['3742407802']

['3742407821']

['3742407941']

['3742407946']

['3742407948']

['3742407952']

['3742407956']

['3742407962']

['3742407969']

['3742407972']

['3742407975']

['3742407978']

['3742407986']

['3742408003']

['3742408029']

['3742408040']

['3742406908']

['3742406917']

['3742406913']

['3742408147']
['3742407982', '2024879058']

['3742407980']

['3742407939', '3742407957']


['3742407943']

['3742407332']

['3742407367']

['3742407384', '3742407380']


['3742407409']
['3742407520']

['3742407727']

['3742407732']

['3742407738']

['3742407650']

['3742407662']

['3742407675']

['3742407683']

['3742407702']

['3742407720']

['3742407826']

['3742407828']

['3742407723']

['3742407832']

['3742407746']

['3742407761']

['3742407808']

['3742407936']

['3742407949']

['3742407966']

['3742408001']

['3742408013']
['3742406914']

['4584956977']

['3742407355']

['3155432542']

['2024879416']

['3742408370']

['3742408360']

['3742408336']

['3742408223']

['3742408217']

['3742408210']

['8420201498']
['8420201492']

['8420201499']

['8420201500', '8420201499']

['2024878941']
['8979447131']

['8979447132']

['8979447133']

['8979447134']

['8979447135']

['8979447136']
803423908  ->  2024880304  edge weight:  11.183040834380149
2024879792  ->  2024879740  edge weight:  59.65252017252645
2024879805  ->  2024879792  edge weight:  83.32672851992898
2024879786  ->  2024879788  edge weight:  22.40266592588447
2024879723  ->  2024879752  edge weight:  47.897541899780016
2024879916  ->  2024880123  edge weight:  120.54788033724607
2024879645  ->  2024879651  edge weight:  63.90298817611831
2024879657  ->  2024879746  edge weight:  58.04447121845044
2024879674  ->  2024879676  edge weight:  28.42546030930742
2024879471  ->  2024879469  edge weight:  10.505957066846403
2024879386  ->  2024879395  edge weight:  26.693988147605626
2024880216  ->  20248