# 0. Configuration

We use the ``Euroroads`` dataset, a graph representing the international E-road network, a road network located mostly in Europe.

Vertices represent cities and an edge between two vertices denotes that they are connected by an E-road.

It has 1,174 vertices and 1,417 edges.

It is undirected and unweighted (we are going to add edge weights on it, by computing distances among cities; see below).

The dataset is publicly available [here](http://konect.cc/networks/subelj_euroroad/) (from the [``KONECT``](http://konect.cc/) network data repository).

In [1]:
data_dir = 'data/euroroads'

graph_filename = 'out.subelj_euroroad_euroroad'
graph_path = f'{data_dir}/{graph_filename}'
sep = ' '
directed = False
header = True

metadata_filename = 'ent.subelj_euroroad_euroroad.city.name'
metadata_path = f'{data_dir}/{metadata_filename}'

In [2]:
import time
from collections import defaultdict
import networkx as nx

# 1. Case study on shortest path discovery

### 1.1 Data loading

In [3]:
def load_metadata(metadata_path):
    with open(metadata_path, encoding='utf8') as f:
        id2cityname = defaultdict(int)
        cityname2id = defaultdict(str)
        id = 1
        for c in f.readlines():
            id2cityname[id] = c.strip().lower()
            cityname2id[c.strip().lower()] = id
            id += 1
        f.close()
    return id2cityname, cityname2id

def load_graph_nx(graph_path,sep,directed,header):
    graph_type = nx.DiGraph if directed else nx.Graph
    with open(graph_path, 'rb') as f:
        if header:
            next(f, '')
            next(f, '') # skip 2 header lines
        G = nx.read_adjlist(f, delimiter=sep, create_using=graph_type, nodetype=int)
        f.close()
    return G

In [4]:
id2cityname, cityname2id = load_metadata(metadata_path)

### 1.2 Getting latitude-longitude coordinates for cities
We use the [``GeoPy``](https://geopy.readthedocs.io/) Python package for this (latest version as of Mar 2024: 2.4.1).

In particular, the open-source geocoding API provided by the [``Nominatim``](https://nominatim.org/) tool, which uses ``OpenStreetMap`` data to find locations on Earth by name and address. 

In [5]:
#!pip install geopy #installing geopy package (latest version as of Mar 2025: 2.4.1)

In [6]:
import geopy
from geopy.geocoders import Nominatim

import certifi, ssl

In [7]:
def compute_latitude_longitude(citynames):
    ctx = ssl.create_default_context(cafile=certifi.where())
    geopy.geocoders.options.default_ssl_context = ctx
    geolocator = Nominatim(user_agent= 'MyApp') #initializing the Nominatim API
    city2latlong = {} #output dictionary storing latitude-longitude coordinates of cities
    for city in citynames:
        city_location = geolocator.geocode(city, timeout=1000)
        if city_location:
           city2latlong[city] = (city_location.latitude, city_location.longitude)
        else:
            print('No geolocation found for city ' + city)
            city2latlong[city] = (None, None)
    return city2latlong

def dump_geolocation(city2latlong, geolocation_path):
    with open(geolocation_path, 'w', encoding='utf8') as output:
        output.write('CITY' + '\t' + 'LATITUDE' + '\t' + 'LONGITUDE')
        for c in city2latlong.keys():
            lat_long = city2latlong[c] 
            s = c.lower() + '\t' + str(lat_long[0]) + '\t' + str(lat_long[1])
            output.write('\n' + s)
        output.close()

def load_latitude_longitude(geolocation_path):
    with open(geolocation_path, encoding='utf8') as f:
        city2latlong = {}
        for line in f.readlines()[1:]:
            tokens = line.split('\t')
            city, latitude, longitude = tokens[0], tokens[1], tokens[2]
            if latitude != 'None' and longitude != 'None':
                city2latlong[city.lower()] = (float(latitude), float(longitude))
            else:
                city2latlong[city.lower()] = (None, None)
        f.close()
    return city2latlong

In [9]:
# city2latlong = compute_latitude_longitude(cityname2id.keys())

No geolocation found for city gotthard pass
No geolocation found for city zhetybai
No geolocation found for city burubaytal
No geolocation found for city maikapshagai
No geolocation found for city jirgatal
No geolocation found for city kristiansund mainland connection
No geolocation found for city karavanke tunnel
No geolocation found for city tolpaki
No geolocation found for city kristalopigi
No geolocation found for city mehgri
No geolocation found for city djulfa
No geolocation found for city kzylorda
No geolocation found for city kalaikhumb
No geolocation found for city chundzha
No geolocation found for city bakhty
No geolocation found for city glukhkov
No geolocation found for city djoubga
No geolocation found for city torniyiszentmiklós


In [8]:
geolocation_filename = 'city.name.geolocation'
geolocation_path = f'{data_dir}/{geolocation_filename}'

In [9]:
# dump_geolocation(city2latlong, geolocation_path)

In [10]:
city2latlong = load_latitude_longitude(geolocation_path)

### 1.3 Loading road network and computing distances among cities

In [11]:
from geopy.distance import geodesic as GD

In [12]:
graph = load_graph_nx(graph_path,sep,directed,header)
graph.number_of_nodes(), graph.number_of_edges()

(1174, 1417)

In [13]:
#getting subgraph induced by cities with latitude-longitude coordinates assigned
valid_cities = [cityname2id[city] for city in city2latlong.keys() if city2latlong[city][0] and city2latlong[city][1]]

subgraph = graph.subgraph(valid_cities)
subgraph.number_of_nodes(), subgraph.number_of_edges()

(1156, 1386)

In [14]:
#computing distances among cities and adding them as edge weights
edge2weight = {}
for e in subgraph.edges():
    u, v = e[0], e[1]
    city1 = id2cityname[u]
    city2 = id2cityname[v]
    d = GD(city2latlong[city1], city2latlong[city2]).km
    edge2weight[e] = d

nx.set_edge_attributes(subgraph, values = edge2weight, name = 'weight')

In [15]:
#sanity check
d1 = GD(city2latlong['rome'] , city2latlong['genoa']).km
d2 = subgraph.edges()[cityname2id['rome'],cityname2id['genoa']]['weight']
d1, d2

(401.59859649457826, 401.59859649457826)

### 1.4 Computing shortest paths

See [here](https://networkx.org/documentation/stable/reference/algorithms/shortest_paths.html), [here](https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.generic.shortest_path.html#networkx.algorithms.shortest_paths.generic.shortest_path) and [here](https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.generic.shortest_path_length.html#networkx.algorithms.shortest_paths.generic.shortest_path_length) for documentation.

In [16]:
# auxiliary function to convert a path into a string
def path2str(path,id2cityname):
    s = id2cityname[path[0]]
    for i in range(1,len(path)):
        s += ' -> ' + id2cityname[path[i]]
    return s

#### 1.4.1 Single-Pair Shortest Path (SPSP)

In [17]:
sp1 = nx.shortest_path(subgraph, source=cityname2id['rome'], target=cityname2id['genoa'], weight='weight', method='dijkstra')
sp1_weight = nx.path_weight(subgraph, sp1, weight='weight')
sp2 = nx.shortest_path(subgraph, source=cityname2id['rome'], target=cityname2id['barcelona'], weight='weight', method='dijkstra')
sp2_weight = nx.path_weight(subgraph, sp2, weight='weight')
sp3 = nx.shortest_path(subgraph, source=cityname2id['rome'], target=cityname2id['bari'], weight='weight', method='dijkstra')
sp3_weight = nx.path_weight(subgraph, sp3, weight='weight')
sp4 = nx.shortest_path(subgraph, source=cityname2id['barcelona'], target=cityname2id['belgrade'], weight='weight', method='dijkstra')
sp4_weight = nx.path_weight(subgraph, sp4, weight='weight')
sp5 = nx.shortest_path(subgraph, source=cityname2id['barcelona'], target=cityname2id['nantes'], weight='weight', method='dijkstra')
sp5_weight = nx.path_weight(subgraph, sp5, weight='weight')

sp1_bf = nx.shortest_path(subgraph, source=cityname2id['rome'], target=cityname2id['genoa'], weight='weight', method='bellman-ford')
sp1_bf_weight = nx.path_weight(subgraph, sp1_bf, weight='weight')
sp2_bf = nx.shortest_path(subgraph, source=cityname2id['rome'], target=cityname2id['barcelona'], weight='weight', method='bellman-ford')
sp2_bf_weight = nx.path_weight(subgraph, sp2_bf, weight='weight')
sp3_bf = nx.shortest_path(subgraph, source=cityname2id['rome'], target=cityname2id['bari'], weight='weight', method='bellman-ford')
sp3_bf_weight = nx.path_weight(subgraph, sp3_bf, weight='weight')
sp4_bf = nx.shortest_path(subgraph, source=cityname2id['barcelona'], target=cityname2id['belgrade'], weight='weight', method='bellman-ford')
sp4_bf_weight = nx.path_weight(subgraph, sp4_bf, weight='weight')
sp5_bf = nx.shortest_path(subgraph, source=cityname2id['barcelona'], target=cityname2id['nantes'], weight='weight', method='bellman-ford')
sp5_bf_weight = nx.path_weight(subgraph, sp5_bf, weight='weight')

sp1_noweight = nx.shortest_path(subgraph, source=cityname2id['rome'], target=cityname2id['genoa'], weight=None, method='dijkstra')
sp2_noweight = nx.shortest_path(subgraph, source=cityname2id['rome'], target=cityname2id['barcelona'], weight=None, method='dijkstra')
sp3_noweight = nx.shortest_path(subgraph, source=cityname2id['rome'], target=cityname2id['bari'], weight=None, method='dijkstra')
sp4_noweight = nx.shortest_path(subgraph, source=cityname2id['barcelona'], target=cityname2id['belgrade'], weight=None, method='dijkstra')
sp5_noweight = nx.shortest_path(subgraph, source=cityname2id['barcelona'], target=cityname2id['nantes'], weight=None, method='dijkstra')

print()
print(path2str(sp1,id2cityname) + ', TOTAL DISTANCE: ' + str(sp1_weight))
print(path2str(sp2,id2cityname) + ', TOTAL DISTANCE: ' + str(sp2_weight))
print(path2str(sp3,id2cityname) + ', TOTAL DISTANCE: ' + str(sp3_weight))
print(path2str(sp4,id2cityname) + ', TOTAL DISTANCE: ' + str(sp4_weight))
print(path2str(sp5,id2cityname) + ', TOTAL DISTANCE: ' + str(sp5_weight))

print()
print(path2str(sp1_bf,id2cityname) + ', TOTAL DISTANCE: ' + str(sp1_bf_weight))
print(path2str(sp2_bf,id2cityname) + ', TOTAL DISTANCE: ' + str(sp2_bf_weight))
print(path2str(sp3_bf,id2cityname) + ', TOTAL DISTANCE: ' + str(sp3_bf_weight))
print(path2str(sp4_bf,id2cityname) + ', TOTAL DISTANCE: ' + str(sp4_bf_weight))
print(path2str(sp5_bf,id2cityname) + ', TOTAL DISTANCE: ' + str(sp5_bf_weight))

print()
print(path2str(sp1_noweight,id2cityname) + ', LENGTH: ' + str(len(sp1_noweight)-1))
print(path2str(sp2_noweight,id2cityname) + ', LENGTH: ' + str(len(sp2_noweight)-1))
print(path2str(sp3_noweight,id2cityname) + ', LENGTH: ' + str(len(sp3_noweight)-1))
print(path2str(sp4_noweight,id2cityname) + ', LENGTH: ' + str(len(sp4_noweight)-1))
print(path2str(sp5_noweight,id2cityname) + ', LENGTH: ' + str(len(sp5_noweight)-1))


rome -> genoa, TOTAL DISTANCE: 401.59859649457826
rome -> genoa -> nice -> toulouse -> barcelona, TOTAL DISTANCE: 1280.322572768997
rome -> pescara -> canosa di puglia -> bari, TOTAL DISTANCE: 411.15154776726297
barcelona -> toulouse -> nice -> cuneo -> asti -> alessandria -> tortona -> brescia -> verona -> mestre -> palmanova -> trieste -> ljubljana -> zagreb -> slavonski brod -> belgrade, TOTAL DISTANCE: 1945.3350500534987
barcelona -> toulouse -> orléans -> tours -> angers -> nantes, TOTAL DISTANCE: 1015.9958226507174

rome -> genoa, TOTAL DISTANCE: 401.59859649457826
rome -> genoa -> nice -> toulouse -> barcelona, TOTAL DISTANCE: 1280.322572768997
rome -> pescara -> canosa di puglia -> bari, TOTAL DISTANCE: 411.15154776726297
barcelona -> toulouse -> nice -> cuneo -> asti -> alessandria -> tortona -> brescia -> verona -> mestre -> palmanova -> trieste -> ljubljana -> zagreb -> slavonski brod -> belgrade, TOTAL DISTANCE: 1945.3350500534987
barcelona -> toulouse -> orléans -> tours 

In [18]:
print(path2str(sp4_noweight,id2cityname) + ', TOTAL DISTANCE: ' + str(nx.path_weight(subgraph, sp4_noweight, weight='weight')))

barcelona -> toulouse -> nice -> genoa -> tortona -> brescia -> verona -> mestre -> palmanova -> trieste -> ljubljana -> zagreb -> slavonski brod -> belgrade, TOTAL DISTANCE: 1947.53303969286


#### 1.4.2 All-Pair Shortest Path (APSP)

In [19]:
k = 5
subgraph_small = nx.ego_graph(subgraph, cityname2id['barcelona'], radius=k) #subgraph induced by k-hop neighborhood of 'barcelona' 
subgraph_small.number_of_nodes(), subgraph_small.number_of_edges()

(89, 111)

In [20]:
[id2cityname[i] for i in subgraph_small.nodes()]

['le havre',
 'paris',
 'orléans',
 'bordeaux',
 'san sebastián',
 'burgos',
 'madrid',
 'seville',
 'algeciras',
 'calais',
 'a coruña',
 'lyon',
 'orange',
 'chambéry',
 'narbonne',
 'girona',
 'barcelona',
 'tarragona',
 'castellón de la plana',
 'valencia',
 'alicante',
 'clermont-ferrand',
 'málaga',
 'murcia',
 'tortona',
 'almería',
 'brescia',
 'metz',
 'mulhouse',
 'lisbon',
 'valladolid',
 'toulouse',
 'nice',
 'angoulême',
 'saintes',
 'limoges',
 "pont-d'ain",
 'vercelli',
 'alessandria',
 'genoa',
 'amsterdam',
 'pontevedra',
 'valença',
 'porto',
 'albufeira',
 'castro marim',
 'huelva',
 'grenoble',
 'marseille',
 'rome',
 'la rochelle',
 'florence',
 'milan',
 'viseu',
 'chaves',
 'zaragoza',
 'vierzon',
 'montluçon',
 'montpellier',
 'salamanca',
 'mérida',
 'logroño',
 'san cesareo',
 'granada',
 'bilbao',
 'pescara',
 'turin',
 'charleroi',
 'amiens',
 'charleville-mézières',
 'rouen',
 'chaumont',
 'cuneo',
 'asti',
 'vila real',
 'bragança',
 'bruges',
 'ghent',
 '

In [21]:
#APSP - Dijkstra
start = time.time()
apsp_dijkstra = nx.shortest_path(subgraph_small, source=None, target=None, weight='weight', method='dijkstra')
end = time.time()
runtime = int(round((end-start)*1000))
print("Runtime for Dijkstra-based APSP: " + str(runtime) + " ms")

Runtime for Dijkstra-based APSP: 23 ms



shortest_path will return an iterator that yields
(node, path) pairs instead of a dictionary when source
and target are unspecified beginning in version 3.5

To keep the current behavior, use:

	dict(nx.shortest_path(G))


In [22]:
#APSP - Bellman-Ford
start = time.time()
apsp_bellmanford = nx.shortest_path(subgraph_small, source=None, target=None, weight='weight', method='bellman-ford')
end = time.time()
runtime = int(round((end-start)*1000))
print("Runtime for Bellman-Ford-based APSP: " + str(runtime) + " ms")

Runtime for Bellman-Ford-based APSP: 96 ms


In [23]:
#APSP - Floyd-Warshall
start = time.time()
apsp_floydwarshall = nx.floyd_warshall(subgraph_small, weight='weight')
end = time.time()
runtime = int(round((end-start)*1000))
print("Runtime for Floyd-Warshall-based APSP: " + str(runtime) + " ms")

Runtime for Floyd-Warshall-based APSP: 77 ms


In [24]:
#SANITY CHECK
print('DIJKSTRA: ' + path2str(apsp_dijkstra[cityname2id['barcelona']][cityname2id['nantes']],id2cityname) + ', TOTAL DISTANCE: ' + str(nx.path_weight(subgraph_small, apsp_dijkstra[cityname2id['barcelona']][cityname2id['nantes']], weight='weight')))
print('BELLMAN-FORD: ' + path2str(apsp_bellmanford[cityname2id['barcelona']][cityname2id['nantes']],id2cityname) + ', TOTAL DISTANCE: ' + str(nx.path_weight(subgraph_small, apsp_bellmanford[cityname2id['barcelona']][cityname2id['nantes']], weight='weight')))

DIJKSTRA: barcelona -> toulouse -> orléans -> tours -> angers -> nantes, TOTAL DISTANCE: 1015.9958226507174
BELLMAN-FORD: barcelona -> toulouse -> orléans -> tours -> angers -> nantes, TOTAL DISTANCE: 1015.9958226507174
