In [1]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
from auxiliaries import *

In [2]:
berlin_df, berlin_nodes = read_in_network("berlin", "combined")
helsinki_df, helsinki_nodes = read_in_network("helsinki", "combined")

In [13]:
berlin_nodes

Unnamed: 0,stop_I,lat,lon,name
0,105,52.528318,13.320260,Wiebestr./Huttenstr. (Berlin)
1,106,52.527903,13.323637,Reuchlinstr. (Berlin)
2,107,52.529103,13.315981,Neues Ufer (Berlin)
3,108,52.525756,13.309840,Ilsenburger Str. (Berlin)
4,109,52.525797,13.314261,Goslarer Platz (Berlin)
...,...,...,...,...
4596,10938,52.769962,13.454593,"Stolzenhagen, Stolzenfels"
4597,10939,52.611806,13.594948,"Blumberg (BAR), Gutshof"
4598,10940,52.606930,13.601930,"Blumberg (BAR), Liebigstr."
4599,10946,52.734171,13.666572,"Danewitz, Kirche"


In [3]:
berlin = convert_to_graph(berlin_df)
helsinki = convert_to_graph(helsinki_df)

## Connectivity

In [4]:
# For undirected graph
print(nx.is_connected(berlin))
print(nx.is_connected(helsinki))

False
False


In [5]:
print(nx.number_connected_components(berlin))
print(nx.number_connected_components(helsinki))

4
9


### Seperating connected components

In [6]:
def get_components(city):
    components = [city.subgraph(c).copy() for c in nx.connected_components(city)]
    for idx, g in enumerate(components, start=1):
        print(f"Component {idx} | Number of nodes: {len(g.nodes)} | Number of edges: {len(g.edges)}\n----------------------------------------------------------------")
        
    return components[0]

In [7]:
berlin_1 = get_components(berlin)

Component 1 | Number of nodes: 4593 | Number of edges: 12070
----------------------------------------------------------------
Component 2 | Number of nodes: 4 | Number of edges: 6
----------------------------------------------------------------
Component 3 | Number of nodes: 2 | Number of edges: 2
----------------------------------------------------------------
Component 4 | Number of nodes: 2 | Number of edges: 1
----------------------------------------------------------------


Since the majority of nodes in the Berlin network are in component 1, we will consider that and drop the rest for further analysis.

In [8]:
helsinki_1 = get_components(helsinki)

Component 1 | Number of nodes: 6879 | Number of edges: 8911
----------------------------------------------------------------
Component 2 | Number of nodes: 75 | Number of edges: 119
----------------------------------------------------------------
Component 3 | Number of nodes: 19 | Number of edges: 32
----------------------------------------------------------------
Component 4 | Number of nodes: 2 | Number of edges: 2
----------------------------------------------------------------
Component 5 | Number of nodes: 2 | Number of edges: 2
----------------------------------------------------------------
Component 6 | Number of nodes: 2 | Number of edges: 1
----------------------------------------------------------------
Component 7 | Number of nodes: 2 | Number of edges: 2
----------------------------------------------------------------
Component 8 | Number of nodes: 2 | Number of edges: 1
----------------------------------------------------------------
Component 9 | Number of nodes: 3 | Nu

Since the majority of nodes in the Helsinki network are in component 1, we will consider that and drop the rest for further analysis.

In [9]:
print(nx.is_connected(berlin_1))
print(nx.is_connected(helsinki_1))

True
True


## Robustness
The ability of a network to maintain its general structural properties (like connectivity) when it faces disruptions or attacks (loses nodes or edges).

In [10]:
# Minimum number of nodes that need to be removed to disconnect the graph
print(nx.node_connectivity(berlin_1))
print(nx.node_connectivity(helsinki_1))

1
1


In [14]:
# Which nodes?
berlin_cut_node = nx.minimum_node_cut(berlin_1)
print(berlin_cut_node)

For Berlin:

Remove Node: {7334} | Station: Series([], Name: name, dtype: object)


In [31]:
berlin_cut_node_name = berlin_nodes.loc[berlin_nodes['stop_I'] == 7334]['name'].item()
print("For Berlin:\n")
print(f"Remove Node: {berlin_cut_node} | Station: {berlin_cut_node_name}")

For Berlin:

Remove Node: {7334} | Station: Schönefeld (bei Berlin), Wehrmathen


In [34]:
# Which nodes?
helsinki_cut_node = nx.minimum_node_cut(helsinki_1)
print(helsinki_cut_node)

{7523}


In [35]:
helsinki_cut_node_name = helsinki_nodes.loc[helsinki_nodes['stop_I'] == 7334]['name'].item()
print("For helsinki:\n")
print(f"Remove Node: {helsinki_cut_node} | Station: {helsinki_cut_node_name}")

For helsinki:

Remove Node: {7523} | Station: Talman koulu


In [11]:
# Minimum number of edges that need to be removed to disconnect the graph
print(nx.edge_connectivity(berlin_1))
print(nx.edge_connectivity(helsinki_1))

1
1


In [36]:
# Which edges?
berlin_cut_edge = nx.minimum_edge_cut(berlin_1)
print(berlin_cut_edge)
helsinki_cut_edge = nx.minimum_edge_cut(helsinki_1)
print(helsinki_cut_edge)

{(8147, 8151)}
{(7441, 7523)}


In [40]:
berlin_cut_edge_name = (berlin_nodes.loc[berlin_nodes['stop_I'] == 8147, 'name'].item(),
                        berlin_nodes.loc[berlin_nodes['stop_I'] == 8151, 'name'].item())
print("For Berlin:\n")
print(f"Remove edge: {berlin_cut_edge} | Stations: {berlin_cut_edge_name}")

For Berlin:

Remove edge: {(8147, 8151)} | Stations: ('Erkner, Friedhof', 'Erkner, Jägerstr.')


In [42]:
helsinki_cut_edge_name = (helsinki_nodes.loc[helsinki_nodes['stop_I'] == 7441, 'name'].item(), 
                          helsinki_nodes.loc[helsinki_nodes['stop_I'] == 7523, 'name'].item())
print("For Helsinki:\n")
print(f"Remove edge: {helsinki_cut_edge} | Stations: {helsinki_cut_edge_name}")

For Helsinki:

Remove edge: {(7441, 7523)} | Stations: ('Mehuasema', 'Sommarnäsintie')


Robust networks have large minimum node and edge connectivity.

## Global clustering coefficient

### Approach 1: 
Average the local clustering coefficient over all nodes in a graph

In [49]:
# nx.average_clustering(berlin_1) # does not work for multigraphs

### Approach 2: 
Percentage of ‘open triads’ that are triangles in a network.

Transitivity = (3*number of triangles)/number of open triads

In [50]:
# nx.transitivity(berlin_1) # does not work for multigraphs

## Distance Measures

### 1. Average distance
between all pairs of nodes in the graph

In [51]:
print(nx.average_shortest_path_length(berlin_1))

13.607588354039741


In [52]:
print(nx.average_shortest_path_length(helsinki_1))

25.305112664683058


### 2. Diameter
Maximum distance between any pair of nodes

In [53]:
print(nx.diameter(berlin_1))

46


In [54]:
print(nx.diameter(helsinki_1))

74


### 3. Eccentricity of a node
Largest distance between a node and all the pther nodes

In [57]:
print(nx.eccentricity(berlin_1))

{10924: 32, 10920: 33, 10794: 33, 10435: 31, 10436: 33, 10925: 29, 10492: 30, 10471: 28, 10926: 31, 10490: 30, 10927: 29, 2736: 27, 2738: 26, 10695: 34, 10697: 33, 10701: 35, 10930: 33, 10931: 32, 10390: 32, 10893: 31, 2741: 28, 2761: 29, 2762: 29, 2763: 29, 10934: 30, 10469: 29, 10935: 30, 10470: 29, 10495: 30, 6839: 27, 7081: 27, 7082: 26, 7100: 27, 6841: 28, 6844: 29, 6860: 27, 10938: 33, 10688: 32, 10817: 34, 10838: 32, 10939: 29, 10940: 28, 10798: 30, 10511: 30, 105: 30, 106: 29, 107: 31, 117: 28, 109: 30, 108: 29, 345: 28, 110: 29, 226: 30, 115: 28, 111: 28, 120: 29, 217: 29, 220: 27, 357: 28, 112: 29, 212: 28, 119: 30, 113: 27, 146: 26, 114: 27, 128: 28, 156: 27, 8307: 39, 8308: 40, 8069: 38, 118: 29, 137: 29, 139: 28, 143: 27, 122: 30, 130: 30, 135: 29, 142: 30, 121: 29, 124: 28, 125: 29, 127: 30, 131: 29, 123: 31, 141: 27, 148: 28, 126: 28, 8320: 31, 8322: 30, 129: 29, 132: 28, 138: 29, 389: 27, 158: 29, 133: 27, 134: 28, 136: 29, 162: 28, 368: 30, 144: 30, 140: 29, 116: 29, 1

In [58]:
print(nx.eccentricity(helsinki_1))

{1: 56, 2: 55, 11: 54, 3: 55, 9: 56, 5: 55, 4: 54, 8: 53, 258: 54, 6: 55, 7: 54, 15: 55, 24: 52, 292: 54, 293: 54, 10: 57, 12: 55, 20: 56, 68: 54, 13: 55, 14: 54, 16: 55, 17: 55, 239: 56, 92: 56, 93: 55, 21: 53, 280: 54, 285: 52, 22: 52, 43: 53, 23: 53, 286: 53, 25: 53, 50: 53, 46: 51, 26: 54, 27: 54, 28: 55, 29: 54, 30: 55, 31: 53, 32: 53, 33: 54, 34: 54, 36: 48, 398: 47, 37: 48, 38: 48, 380: 47, 39: 48, 41: 48, 42: 51, 282: 53, 44: 51, 106: 50, 45: 50, 386: 49, 47: 50, 48: 51, 85: 52, 49: 55, 51: 54, 53: 48, 54: 49, 384: 48, 55: 49, 56: 49, 57: 52, 67: 51, 58: 52, 63: 53, 59: 53, 74: 52, 61: 54, 60: 53, 399: 54, 62: 52, 66: 51, 65: 54, 64: 53, 69: 52, 71: 53, 70: 52, 72: 53, 73: 52, 162: 53, 98: 53, 180: 53, 87: 53, 88: 52, 97: 53, 90: 51, 89: 59, 101: 58, 95: 56, 94: 55, 255: 57, 96: 56, 208: 54, 100: 57, 119: 56, 103: 49, 110: 50, 104: 49, 113: 49, 105: 50, 154: 51, 117: 49, 150: 51, 111: 51, 107: 51, 109: 50, 108: 51, 169: 52, 122: 51, 114: 51, 112: 50, 618: 50, 115: 49, 383: 48, 

### 4. Radius
It is the minimum eccentricity

In [59]:
print(nx.radius(berlin_1))

23


In [60]:
print(nx.radius(helsinki_1))

39


### 5. Periphery
The set of nodes that have eccentricity equal to diameter (nodes that are on the outskirts of a graph)

In [61]:
print(nx.periphery(berlin_1))

[1714, 7482]


In [62]:
print(nx.periphery(berlin_1))

[1714, 7482]


### 6. Center
The set of nodes that have eccentricity equal to the radius (nodes that are in the center of the graph)

In [63]:
print(nx.center(berlin_1))

[2933]


In [64]:
print(nx.center(helsinki_1))

[1011, 963]
