In [1]:
import folium
import pickle
import pandas as pd
import networkx as nx
import numpy as np
from utils import linear_scaler

# Graph Analysis

Now that the graph is partially constructed, let's perform some analysis on it.

In [2]:
# Start by loading pickled data
G = pickle.load(open('data/processed/railgraph.pickle', 'rb'))
stop_id_to_name = pickle.load(open('data/processed/stop_id_to_name', 'rb'))
edges_counts = pd.read_pickle('data/processed/edges_counts.pickle')
railway_stops = pd.read_pickle('data/processed/railway_stops.pickle')

## Betweenness Centrality

*TODO: add short description about centrality measure and how it can be interpreted* 

In [3]:
# Compute betweenness centrality of graph (unweighted)
btwn_centrality = nx.betweenness_centrality(G, weight = 'daily_count')

In [4]:
# Take top 20
btwn_centrality_20 = dict(sorted(btwn_centrality.items(), key=lambda item: - item[1])[:20]).keys()

print('Top 20 stops with the highest betweenness centrality:')
for stop in btwn_centrality_20:
    print('{:.<20} {:.4f}'.format(stop_id_to_name[stop], btwn_centrality[stop]))

Top 20 stops with the highest betweenness centrality:
Zürich HB........... 0.4122
Bern................ 0.2373
Olten............... 0.2013
Brig................ 0.1676
Basel SBB........... 0.1379
Sargans............. 0.1360
Lausanne............ 0.1340
Montreux............ 0.1276
Fribourg/Freiburg... 0.1162
Landquart........... 0.1066
Winterthur.......... 0.0971
Biel/Bienne......... 0.0909
St. Gallen.......... 0.0826
Luzern.............. 0.0790
Chur................ 0.0787
Solothurn........... 0.0687
Aigle............... 0.0659
Thun................ 0.0647
Amriswil............ 0.0647
Aarau............... 0.0590


We can see that the 20 most central nodes according to the betweenness centrality are mostly Swiss agglomerations. However, we see some less major cities with a somewhat important centrality: i.e. Visp, Landquart, Ins. These are probably smalles cities through which has to pass because of their geographic localisation: i.e. the stop might be on the intersection of multiple lines for example. 

In [5]:
for node in G.nodes:
    G.nodes[node]['centrality'] = btwn_centrality[node]

In [6]:
def scaler(min_, max_, lower, upper):
    return lambda x: (((x - min_) * (upper - lower)) / (max_ - min_)) + lower

btwn_scaler = scaler(min(btwn_centrality.values()), max(btwn_centrality.values()), 1, 10)

In [7]:
m = folium.Map(location=[46.771413, 8.471689], zoom_start = 8, tiles='CartoDB Positron', height = '80%')

# Draw nodes
for node in G.nodes():
    lat, lon = G.nodes[node]['lat'], G.nodes[node]['lon']
    folium.CircleMarker(
        location = [lat, lon],
        popup = '{}\nCentrality: {:.2f}'.format(G.nodes[node]['name'],  G.nodes[node]['centrality']), 
        radius = btwn_scaler(G.nodes[node]['centrality']),
        fill = True,
        color = 'green' if node in btwn_centrality_20 else '#3388ff',
        opacity = 1 if node in btwn_centrality_20 else 0.3,
    ).add_to(m)

m.save("network_betweenness.html")
m