# Imports and Loading the Network

In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

The graph is loaded from the `sbb.edgelist`.
Make sure to create this edgelist through the `create_edgelist.py` script.

In [2]:
G = nx.read_edgelist("sbb.edgelist", delimiter=";", create_using=nx.Graph)

# 1. Network Exploration

In [3]:
print(f"number of nodes: {len(G.nodes)}")
print(f"number of edges: {len(G.edges)}")

number of nodes: 3473
number of edges: 8610


## Degrees: Minimum and Maximum

In [4]:
degrees = dict(G.degree())

sorted_degrees = sorted(degrees.items(), key=lambda x: x[1])

In [5]:
# min degree
min_degree = sorted_degrees[0]
print(f"Minimum degree: {min_degree[0]} with degree {min_degree[1]}")

# max degree
max_degree = sorted_degrees[-1]
print(f"Maximum degree: {max_degree[0]} with degree {max_degree[1]}")

Minimum degree: Uetliberg with degree 1
Maximum degree: Zürich HB with degree 102


## Centrality Measures

### Betweeness Centrality

The node that acts as a bridge, broker or gatekeeper.

In [6]:
betweenness_centrality = nx.betweenness_centrality(G, normalized=True)
sorted_betweeness_centrality = sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)

In [7]:
top5_betweenness_centrality = sorted_betweeness_centrality[0:5]
bottom5_betweenness_centrality = sorted_betweeness_centrality[-5:]

print(f"Top 5 betweeness centrality score: {top5_betweenness_centrality}")
print(f"Bottom 5 betweness centrality score: {bottom5_betweenness_centrality}")

# find all nodes w a betweenness centrality of 0?
min_centrality_nodes = [k for (k, v) in sorted_betweeness_centrality if v == 0]
print("Rural stations:", len(min_centrality_nodes))

Top 5 betweeness centrality score: [('Basel SBB', 0.17604824035206496), ('Mulhouse', 0.147309900748999), ('Genève', 0.1354017345065535), ('Bellegarde-sur-Valserine', 0.11802205338580672), ('Lyon Part Dieu', 0.11680840037900542)]
Bottom 5 betweness centrality score: [('Mühlehorn (See)', 0.0), ('Fällanden (See)', 0.0), ('Mönchaltorf (See)', 0.0), ('Solothurn (Schiff)', 0.0), ('Rheinau Kraftwerk', 0.0)]
Rural stations: 346


### Closeness Centrality

A central node is one that is close on average to every other nodes.

In [8]:
closeness_centrality = nx.closeness_centrality(G)
sorted_closeness_centrality = sorted(closeness_centrality, key=closeness_centrality.get, reverse=True)

In [9]:
top5_closeness_centrality = sorted_closeness_centrality[0:5]
bottom5_closeness_centrality = sorted_closeness_centrality[-5:]

print(f"Top 5 closeness centrality score: {top5_closeness_centrality}")
print(f"Bottom 5 closeness centrality score: {bottom5_closeness_centrality}")

Top 5 closeness centrality score: ['Zürich HB', 'Genève', 'Basel SBB', 'Olten', 'Lausanne']
Bottom 5 closeness centrality score: ['Horgen Autoquai', 'Les Brenets (Lac)', 'Saut-du-Doubs', 'Gersau Förstli (Fähre)', 'Beckenried Niederdorf (Fähre)']


### Adamic-Adar 

This measure refines the simple counting of vertex similarity by weighting less-connected neighbors more heavily.

For example, two users on a social media who are friends with a famous person are (probably) less similar than those who are friends with a less famous person.

In [10]:
from itertools import combinations

perms = list(combinations(G.nodes, 2))
preds = nx.adamic_adar_index(G, perms)

min_sim = np.inf
min_tuple = []
max_sim = 0
max_tuple = []

for u, v, p in preds:
    if p < min_sim:
        min_sim = p
        min_tuple = [u,v]
    elif p > max_sim:
        max_sim = p
        max_tuple = [u,v]
        
print(f"Minimum similarity:{min_tuple} with {min_sim}")
print(f"maximum similarity:{max_tuple} with {max_sim}")

Minimum similarity:['Luzern', 'Laufenburg'] with 0
maximum similarity:['Zürich HB', 'Rapperswil SG'] with 9.16882848421458


# Graph Visualization

Visualization of the graph with networkx and geopandas on top of a map of Switzerland.

In [11]:
# TODO: implement plot