# Imports and Loading the Network

In [2]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

The graph is loaded from the `sbb.edgelist`.
Make sure to create this edgelist through the `create_edgelist.py` script.

In [3]:
G = nx.read_edgelist("sbb.edgelist", delimiter=";", create_using=nx.Graph)

# 1. Network Exploration

In [4]:
print(f"number of nodes: {len(G.nodes)}")
print(f"number of edges: {len(G.edges)}")

number of nodes: 3473
number of edges: 8610


## Degrees: Minimum and Maximum

In [5]:
degrees = dict(G.degree())

sorted_degrees = sorted(degrees.items(), key=lambda x: x[1])

In [7]:
# min degree
min_degree = sorted_degrees[0]
print(f"Minimum degree: {min_degree[0]} with degree {min_degree[1]}")

# max degree
max_degree = sorted_degrees[-1]
print(f"Maximum degree: {max_degree[0]} with degree {max_degree[1]}")

# top 10 min degree
bottom10_min_degree = sorted_degrees[0:10]

# top 10 max degree
top10_max_degree = sorted_degrees[-10:]

print(f"Top 10 degree: {top10_max_degree}")
print(f"Bottom 10 degrees: {bottom10_min_degree}")

min_degree_nodes = [k for (k, v) in sorted_degrees if v == 1]
print("Number of stations with degree of 1:", len(min_degree_nodes))

Minimum degree: Uetliberg with degree 1
Maximum degree: Zürich HB with degree 102
Top 10 degree: [('Dijon', 52), ('Lausanne', 54), ('St. Gallen', 55), ('Winterthur', 64), ('Luzern', 65), ('Bern', 67), ('Olten', 71), ('Strasbourg', 73), ('Lyon Part Dieu', 75), ('Zürich HB', 102)]
Bottom 10 degrees: [('Uetliberg', 1), ('Orbe', 1), ('Ste-Croix', 1), ('St. Urban Ziegelei', 1), ('Arosa', 1), ('Broc-Chocolaterie', 1), ('Bludenz', 1), ('Balsthal', 1), ('Corcelles-Peseux', 1), ('Altstätten Stadt', 1)]
Number of stations with degree of 1: 83


## Centrality Measures

### Betweeness Centrality

The node that acts as a bridge, broker or gatekeeper.

In [12]:
betweenness_centrality = nx.betweenness_centrality(G, normalized=True)
sorted_betweeness_centrality = sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)

In [13]:
num = 10

top_betweenness_centrality = sorted_betweeness_centrality[0:num]
bottom_betweenness_centrality = sorted_betweeness_centrality[-num:]

print(f"Top {num} betweeness centrality score: {top_betweenness_centrality}")
print(f"Bottom {num} betweness centrality score: {bottom_betweenness_centrality}")

# find all nodes w a betweenness centrality of 0?
min_centrality_nodes = [k for (k, v) in sorted_betweeness_centrality if v == 0]
print("Rural stations:", len(min_centrality_nodes))

Top 10 betweeness centrality score: [('Basel SBB', 0.17604824035206496), ('Mulhouse', 0.147309900748999), ('Genève', 0.1354017345065535), ('Bellegarde-sur-Valserine', 0.11802205338580672), ('Lyon Part Dieu', 0.11680840037900542), ('Zürich HB', 0.11246807473512789), ('Basel Bad Bf', 0.06891126259224004), ('Strasbourg', 0.06623164365791197), ('St. Gallen', 0.057286264003561946), ('Bern', 0.05443407847191186)]
Bottom 10 betweness centrality score: [('Gersau Förstli (Fähre)', 0.0), ('Beckenried Niederdorf (Fähre)', 0.0), ('Walenstadt (See)', 0.0), ('Weesen (See)', 0.0), ('Mols (See)', 0.0), ('Mühlehorn (See)', 0.0), ('Fällanden (See)', 0.0), ('Mönchaltorf (See)', 0.0), ('Solothurn (Schiff)', 0.0), ('Rheinau Kraftwerk', 0.0)]
Rural stations: 346


### Closeness Centrality

A central node is one that is close on average to every other nodes.

In [24]:
closeness_centrality = nx.closeness_centrality(G)
sorted_closeness_centrality = sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)

In [23]:
num = 10

top_closeness_centrality = sorted_closeness_centrality[0:num]
bottom_closeness_centrality = sorted_closeness_centrality[-num:]

print(f"Top {num} closeness centrality score: {top_closeness_centrality}")
print(f"Bottom {num} closeness centrality score: {bottom_closeness_centrality}")

# find all nodes w a closeness centrality of 0?
min_closeness_nodes = [k for (k, v) in sorted_closeness_centrality if v < 0.001]
print("Rural stations:", len(min_closeness_nodes))

Top 10 closeness centrality score: [('Zürich HB', 0.22813387329516363), ('Genève', 0.2271055305930703), ('Basel SBB', 0.22559970730402645), ('Olten', 0.21814233146724887), ('Lausanne', 0.21707593014513704), ('Luzern', 0.21499174470061208), ('Brig', 0.21499174470061208), ('St. Gallen', 0.2129645099107185), ('Bellegarde-sur-Valserine', 0.21152021644773605), ('Mulhouse', 0.2112473311499483)]
Bottom 10 closeness centrality score: [('Pied du barrage', 0.0002880184331797235), ('Les Montuires', 0.0002880184331797235), ('Friedrichshafen Fähre', 0.0002880184331797235), ('Romanshorn Autoquai', 0.0002880184331797235), ('Meilen Autoquai', 0.0002880184331797235), ('Horgen Autoquai', 0.0002880184331797235), ('Les Brenets (Lac)', 0.0002880184331797235), ('Saut-du-Doubs', 0.0002880184331797235), ('Gersau Förstli (Fähre)', 0.0002880184331797235), ('Beckenried Niederdorf (Fähre)', 0.0002880184331797235)]
Rural stations: 64


### Adamic-Adar 

This measure refines the simple counting of vertex similarity by weighting less-connected neighbors more heavily.

For example, two users on a social media who are friends with a famous person are (probably) less similar than those who are friends with a less famous person.

In [10]:
from itertools import combinations

perms = list(combinations(G.nodes, 2))
preds = nx.adamic_adar_index(G, perms)

min_sim = np.inf
min_tuple = []
max_sim = 0
max_tuple = []

for u, v, p in preds:
    if p < min_sim:
        min_sim = p
        min_tuple = [u,v]
    elif p > max_sim:
        max_sim = p
        max_tuple = [u,v]
        
print(f"Minimum similarity:{min_tuple} with {min_sim}")
print(f"maximum similarity:{max_tuple} with {max_sim}")

Minimum similarity:['Luzern', 'Laufenburg'] with 0
maximum similarity:['Zürich HB', 'Rapperswil SG'] with 9.16882848421458


# Graph Visualization

Visualization of the graph with networkx and geopandas on top of a map of Switzerland.

In [11]:
# TODO: implement plot