In [1]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import random

In [68]:
from auxiliaries import *

random.seed(34)

In [47]:
# types of routes
types = {
    0: "tram",
    1: "subway",
    2: "rail",
    3: "bus",
    4: "ferry",
    5: "walking",
}

In [48]:
# read in public transport network of berlin
berlin, berlin_nodes = read_in_network("berlin", "combined")

# read in walking network
berlin_walking, nodes_t = read_in_network("berlin", "walk")
# rename d_walk column to duration_avg
berlin_walking = berlin_walking.rename(columns={"d_walk": "duration_avg"})
# add n_vehicles and route_I_counts columns, set both to 0
berlin_walking["n_vehicles"] = 0
berlin_walking["route_I_counts"] = 0
# add route_type column, set to 5 (walking)
berlin_walking["route_type"] = 5

# add berlin_walking to berlin
berlin_full = pd.concat([berlin, berlin_walking], ignore_index=True)

In [49]:
# count route_type counts in berlin_full
berlin_full["route_type"].value_counts()

route_type
5    18125
3    10208
0      990
2      507
1      366
4        8
Name: count, dtype: int64

In [50]:
# drop all routes with route_type 4
berlin_full = berlin_full[berlin_full["route_type"] != 4]

In [51]:
berlin_full["route_type"].value_counts()

route_type
5    18125
3    10208
0      990
2      507
1      366
Name: count, dtype: int64

In [52]:
# convert to graph
G = convert_to_graph(berlin_full)
pos = add_positions(G, berlin_nodes)

In [53]:
G.get_edge_data(10924, 10920)

{0: {'duration_avg': 120.0, 'route_type': 3},
 1: {'duration_avg': 873.0, 'route_type': 5}}

In [54]:
len(G.edges())

30196

## Connected component

The Berlin network is not a connected component, but as we see below, the largest connected component is very large (only 8 nodes are not connected to it). Therefore we can just drop those few nodes and work only with the largest connected component.

In [55]:
min(nx.connected_components(G), key=len)

{10661, 10662}

In [56]:
# drop nodes 10661 and 10662
G.remove_nodes_from([10661, 10662])

In [57]:
# length of each connected component
for component in nx.connected_components(G):
    print(len(component))

4598


## Functions for experiments

In [58]:
def travel_time(a, b):
    """Rough estimate of travel time between two nodes."""
    return nx.shortest_path_length(G, a, b, weight="duration_avg")

In [61]:
nx.shortest_path_length(G, 10924, 345, weight="duration_avg")

1776.245733788396

## Measures for robustness

In [99]:
def average_travel_time(G):
    """Average travel time between all pairs of nodes in G."""
    return nx.average_shortest_path_length(G, weight="duration_avg")


def full_average_travel_time(G):
    """
    calculate average travel time for each component of G
    and weight it by the number of nodes in the component
    """
    components = nx.connected_components(G)
    total = 0
    for component in components:
        total += len(component) * average_travel_time(G.subgraph(component))
    return total / len(G)


def random_sample(nodes, size):
    """
    Randomly sample a subset of nodes from the graph.
    """
    # take sample of nodes
    sample = []
    for i in range(size):
        sample.append(random.sample(list(nodes), 2))
    return sample


def sample_average_travel_time(sample, G):
    """
    Average travel time between all pairs of nodes in the sample.
    """
    # for each pair in sample, calculate shortest path length in G
    # and average over all pairs
    total = 0
    for pair in sample:
        total += travel_time(pair[0], pair[1])
    return total / (len(sample))

In [178]:
# only take largest connected component
sample_average_travel_time(random_sample(G.nodes(), 100), G)

2042.4944781632191

## Percolation functions

In [174]:
# function to perform random percolation on a graph
def random_percolation(G, p):
    """
    Random percolation on a graph.
    :param G: graph
    :param p: probability of edge removal
    """
    to_remove = []
    G_copy = G.copy()
    for edge in G_copy.edges(keys=True):
        source, target, key = edge[0], edge[1], edge[2]
        edgetype = G_copy.get_edge_data(source, target, key)["route_type"]
        # exclude walking edges
        if edgetype != 5 and random.random() < p:
            to_remove.append((source, target, key))
    for edge in to_remove:
        source, target, key = edge[0], edge[1], edge[2]
        G_copy.remove_edge(source, target, key=key)
    return G_copy

In [171]:
G_perc = random_percolation(G, 0.5)

In [172]:
len(G_perc.edges())

24186

In [169]:
len(G.edges())

30194

#### TODO: Make function for targeted percolation

In [None]:
# Targeted percolation

#### TODO: Make function that does repeated percolation and makes graph

In [188]:
# Repeated experiments
import numpy as np

def percolation_experiments(G, p_min, p_max, steps, n_tests, n_percolations, verbose=False):
    """
    Repeated percolation experiments.
    :param G: graph
    :param p_min: minimum probability
    :param p_max: maximum probability
    :param steps: number of steps
    :param n: number of repetitions
    """
    # initiate dataframe for results
    results = pd.DataFrame(columns=['p', 'perc', 'test', 'result'])
    # make range of probabilities
    p_range = np.linspace(p_min, p_max, steps)

    experiment = 0
    for p in p_range:
        for i in range(n_percolations):
            G_perc = random_percolation(G, p)
            for test in range(n_tests):
                sample = random_sample(G_perc.nodes, 100)
                time = sample_average_travel_time(sample, G_perc)

                # append result to dataframe
                results.loc[experiment] = (p, i, test, time)
                if verbose:
                    print("p: {}, test: {}, time: {}".format(p, test, time))

    return results

In [189]:
percolation_experiments(G, 0, 8, 100, 10, 10, verbose=True)

p: 0.0, test: 0, time: 1951.3172603014336
p: 0.0, test: 1, time: 2093.5478700138897
p: 0.0, test: 2, time: 2120.3230493422225
p: 0.0, test: 3, time: 1999.9594894858064
p: 0.0, test: 4, time: 2092.053224117001
p: 0.0, test: 5, time: 2134.267779022565
p: 0.0, test: 6, time: 2031.0423815189515
p: 0.0, test: 7, time: 2019.99903301867
p: 0.0, test: 8, time: 2067.771717669003
