# Imports

In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from itertools import permutations
import random
import time

# 1. Network Exploration 

The graph is loaded from the `sbb.edgelist`.
Make sure to create this edgelist through the `create_edgelist.py` script.

In [2]:
G = nx.read_edgelist("sbb.edgelist", delimiter=";", create_using=nx.Graph)

In [3]:
print(f"number of nodes: {len(G.nodes)}")
print(f"number of edges: {len(G.edges)}")

number of nodes: 3473
number of edges: 8610


## Degrees

In [4]:
degrees = dict(G.degree())

# min degree
min_deg_node = min(degrees, key=degrees.get)
min_deg = min(degrees.values())
print(f'min degree: {min_deg}')
l_min_deg = {node: degree for (node, degree) in degrees.items() if degree == min_deg}
print(f'number of nodes with degree of {min_deg}: {len(l_min_deg)}')

# todo: max degree
max_deg_node = max(degrees, key=degrees.get)
max_deg = max(degrees.values())
print(f'max degree: {max_deg}')
l_max_deg = {node: degree for (node, degree) in degrees.items() if degree == max_deg}
print(f'number of nodes with degree of {max_deg}: {len(l_max_deg)}')
print(f'max degree node: {max_deg_node}')

print(len(list(G.neighbors('Lyon Part Dieu'))))

min degree: 1
number of nodes with degree of 1: 83
max degree: 102
number of nodes with degree of 102: 1
max degree node: Zürich HB
75


## Centrality Measures

In [5]:
# Betweeness centrality measures: node is the one that acts as a bridge, broker or gatekeeper.
betweenness_centrality = nx.betweenness_centrality(G, normalized=True)

# find top 5 betweenness centrality nodes
sorted_centrality_node = sorted(betweenness_centrality, key=betweenness_centrality.get, reverse=True)

top5_betweenness_centrality = sorted_centrality_node[0:5]
bottom5_betweenness_centrality = sorted_centrality_node[-5:]

print(top5_betweenness_centrality)
print(bottom5_betweenness_centrality)

# # todo find all nodes w a betweenness centrality of 0
# _centrality_node = min(betweenness_centrality, key=betweenness_centrality.get)
# print(f'min betweenness centrality: {min_centrality_node}, {betweenness_centrality[min_centrality_node]}')

['Basel SBB', 'Mulhouse', 'Genève', 'Bellegarde-sur-Valserine', 'Lyon Part Dieu']
['Mühlehorn (See)', 'Fällanden (See)', 'Mönchaltorf (See)', 'Solothurn (Schiff)', 'Rheinau Kraftwerk']


In [6]:
# Closeness centrality measure: ceentral node is the one that is close, on average, to other nodes.
closeness_centrality = nx.closeness_centrality(G)

sorted_closeness_node = sorted(closeness_centrality, key=closeness_centrality.get, reverse=True)

top5_closeness_centrality = sorted_closeness_node[0:5]
bottom5_closeness_centrality = sorted_closeness_node[-5:]

print(top5_closeness_centrality)
print(bottom5_closeness_centrality)

['Zürich HB', 'Genève', 'Basel SBB', 'Olten', 'Lausanne']
['Horgen Autoquai', 'Les Brenets (Lac)', 'Saut-du-Doubs', 'Gersau Förstli (Fähre)', 'Beckenried Niederdorf (Fähre)']


In [7]:
# Similarity measures
perms = list(permutations(G.nodes, 2))

# This measure refines the simple counting of vertex similarity by weighting less-connected neighbors more heavily
# Two FB persons who are friends with a famous person are (probably) less similar than those who are friends with a less famous person.
aa = nx.adamic_adar_index(G, perms)

min_sim = np.inf
min_sim_u_v = []
max_sim = 0
max_sim_u_v = []

for u, v, p in aa:
    if p < min_sim:
        min_sim = p
        min_sim_u_v = [u,v]
    elif p > max_sim:
        max_sim = p
        max_sim_u_v = [u,v]
        
print(f'min sim {min_sim_u_v}: {min_sim}, max sim {max_sim_u_v}: {max_sim}')

min sim ['Luzern', 'Laufenburg']: 0, max sim ['Zürich HB', 'Rapperswil SG']: 9.168828484214584


# 2. Community Detection

## Louvain

In [None]:
louvain = nx.community.louvain_communities(G, seed=42, resolution=0.5)
print(len(louvain))

louvain_len = [len(s) for s in louvain]               
print("Louvain: size of each community:", louvain_len)             

getcolors = lambda n: ["#%06x" % random.randint(0, 0xFFFFFF) for _ in range(n)]

colors = getcolors(len(louvain))

plt.figure(figsize=(12, 10))    
pos = nx.spring_layout(G, seed=42)     
for i, c in enumerate(louvain):   
    nx.draw_networkx_nodes(     
        G, pos=pos, nodelist=c, node_color=colors[i], node_size=10, alpha=0.75        
    )            

nx.draw_networkx_edges(G, pos=pos, width=0.20, alpha=0.33)
plt.title("Louvain Community Detection")
plt.show()

# todo: use edge weights to help visualise the graph


In [None]:
# louvain with largest connected component
connected_comp = nx.connected_components(G)
max_connected_comp = max(connected_comp)
print("Number of nodes in largest connected component: ", len(max_connected_comp))

sub_G = G.subgraph(max_connected_comp)

louvain = nx.community.louvain_communities(sub_G, seed=42, resolution=0.5)
print(len(louvain))

louvain_len = [len(s) for s in louvain]               
print("Louvain: size of each community:", louvain_len)             

getcolors = lambda n: ["#%06x" % random.randint(0, 0xFFFFFF) for _ in range(n)]

colors = getcolors(len(louvain))

plt.figure(figsize=(12, 10))    
pos = nx.spring_layout(sub_G, seed=42)     
for i, c in enumerate(louvain):   
    nx.draw_networkx_nodes(     
        sub_G, pos=pos, nodelist=c, node_color=colors[i], node_size=10, alpha=0.75        
    )            

nx.draw_networkx_edges(sub_G, pos=pos, width=0.20, alpha=0.33)
plt.title("Louvain Community Detection")
plt.show()

## Girvan Newman

In [None]:
# Girvan Newman

# TODO: Apply the Girvan-Newman method from the networkx library iteratively for 30 minutes.
gn_communities = nx.community.girvan_newman(sub_G)
it = 0
i = []
timeout = time.time() + 60 * 25 # setting to 30 minutes makes it run for over 1.5 hours, setting to 25 results in a runtime of 31m
while True:
    if time.time() > timeout:
        break
    it += 1
    i = next(gn_communities, 'end')

print(f'number of iterations: {it}')

In [None]:
# TODO: Report the size of each community and draw the graph. Color the nodes according to their community.

getcolors = lambda n: ["#%06x" % random.randint(0, 0xFFFFFF) for _ in range(n)]
colors = getcolors(len(i))

plt.figure(figsize=(12, 10))    
pos = nx.spring_layout(sub_G, seed=42)     
for idx, c in enumerate(i):
    print(f'Size of community {idx+1}: {len(c)}')
    nx.draw_networkx_nodes(     
        sub_G, pos=pos, nodelist=c, node_color=colors[idx], node_size=10, alpha=0.75        
    )            

nx.draw_networkx_edges(sub_G, pos=pos, width=0.20, alpha=0.33)
plt.title("Girvan Newman Community Detection")
plt.show()