# Step 10 - Calculations of carconstricted network metrics
## Project: Growing Urban Bicycle Networks

This notebook supplements the main analysis with calculations on the car networks constricted by the bicycle network growth. This notebook was run in a separate environment from all other notebooks, therefore it does not match the repository's folder structure and defines its own functions, for example.

Contact: Sayat Mimar (smimar@ur.rochester.edu)

In [1]:
import igraph as ig

In [2]:
import itertools

In [3]:
import random

In [4]:
import pandas as pd

In [5]:
import numpy as np

In [6]:
import networkx as nx

In [7]:
from haversine import haversine_vector

In [8]:
%matplotlib inline
import matplotlib.pyplot as plt

In [9]:
import gzip

In [10]:
import pickle5 as pickle

# Small cities

In [27]:
city_names = ['amsterdam','barcelona','bath','bern','birmingham','boston','bradford','budapest','buenosaires','chicago',
             'cologne','copenhagen','delft','detroit','edinburgh','glasgow','hongkong','kathmandu','leeds','luanda','malmo',
              'manhattan',  'marrakesh','milan','moscow','mumbai','munich','oslo','paris','philadelphia','rabat','sanfrancisco',
              'santiago','shahalam','sheffield','singapore','stuttgart','tashkent','telaviv','turin','ulaanbaatar','vienna'
              ,'zurich'
             ]

In [16]:
snapshots = ['0.025',
 '0.050',
 '0.075',
 '0.100',
 '0.125',
 '0.150',
 '0.175',
 '0.200',
 '0.225',
 '0.250',
 '0.275',
 '0.300',
 '0.325',
 '0.350',
 '0.375',
 '0.400',
 '0.425',
 '0.450',
 '0.475',
 '0.500',
 '0.525',
 '0.550',
 '0.575',
 '0.600',
 '0.625',
 '0.650',
 '0.675',
 '0.700',
 '0.725',
 '0.750',
 '0.775',
 '0.800',
 '0.825',
 '0.850',
 '0.875',
 '0.900',
 '0.925',
 '0.950',
 '0.975',
 '1.000']

In [17]:
snapshots_1 = ['all',
 '0.025',
 '0.050',
 '0.075',
 '0.100',
 '0.125',
 '0.150',
 '0.175',
 '0.200',
 '0.225',
 '0.250',
 '0.275',
 '0.300',
 '0.325',
 '0.350',
 '0.375',
 '0.400',
 '0.425',
 '0.450',
 '0.475',
 '0.500',
 '0.525',
 '0.550',
 '0.575',
 '0.600',
 '0.625',
 '0.650',
 '0.675',
 '0.700',
 '0.725',
 '0.750',
 '0.775',
 '0.800',
 '0.825',
 '0.850',
 '0.875',
 '0.900',
 '0.925',
 '0.950',
 '0.975',
 '1.000']

In [None]:
for city in city_names:
    
    graphs=[]
    eff_global = []
    eff_local=[]
    clustering_10=[]
    clustering_5=[]
    clustering_3=[]
    anisotropy_10=[]
    anisotropy_5=[]
    anisotropy_3=[]
    directness=[]
    
     ##### read the _carall files
    with gzip.open('all_cities_3_1/'+city+'/'+city+'_carall.picklez', 'rb') as f:
        graph = pickle.load(f)
        (graph.es())['weight'] = np.array((graph.es())['weight'])/1000

    graphs.append(graph)
    ##### read all snapshot files
    for s in snapshots:

        with gzip.open('all_cities_3_1/'+city+'/'+city+'_carconstrictedbike_poi_railwaystation_Cq'+s+'.picklez', 'rb') as f:
            graph = pickle.load(f)
            (graph.es())['weight'] = np.array((graph.es())['weight'])/1000
            #graph.delete_edges(11910)
        graphs.append(graph)
        
        
    btwnness = []
    count=0
    for k in graphs:
        btwnness.append(k.betweenness(weights='weight',nobigint=False))
        count+=1
        if count%10==0:
            print(count)
        
    pd_btw = pd.DataFrame()
    pd_btw['ids'] = graphs[0].vs()['id']
    c=0
    for j in snapshots:
        pd_btw[j] = btwnness[c]
        graphs[c].vs['betw'] = btwnness[c]
        c+=1  
    pd_btw.to_csv('results_3/betweenness_rail_c/'+city+'_carconstrictedbike_poi_railwaystation_closeness.csv'+'_betwnns.csv')  ### location of the csv file, write betweenness into csv file
    
    for gr in graphs:
        eff_global.append(calculate_efficiency_global(gr))
        eff_local.append(calculate_efficiency_local(gr))
        clustering_10.append(center_drift_weighted(gr,90))
        clustering_5.append(center_drift_weighted(gr,95))
        clustering_3.append(center_drift_weighted(gr,97))
        anisotropy_10.append(bet_anisotropy_weighted(gr,90))
        anisotropy_5.append(bet_anisotropy_weighted(gr,95))
        anisotropy_3.append(bet_anisotropy_weighted(gr,97))
        directness.append(calculate_directness(gr))


        
    pd_metrics = pd.DataFrame()

    pd_metrics['snapshots'] = snapshots
    metrics = ['eff_global','eff_local','clustering_10','clustering_5','clustering_3','anisotropy_10','anisotropy_5','anisotropy_3','directness']
    values = [eff_global,eff_local,clustering_10,clustering_5,clustering_3,anisotropy_10,anisotropy_5,anisotropy_3,directness]

    c_2 = 0
    for m in metrics:

        pd_metrics[m] = values[c_2]
        c_2+=1
        
    pd_metrics.to_csv('results_3/metrics_rail_c/'+city+'_carconstrictedbike_poi_railwaystation_closeness.csv') ##write metric results into csv file
    
    print(city)
        

    

# Big cities

In [19]:
big_cities = ['tokyo','helsinki','berlin','hamburg','manchestergreater','london','losangeles','mexico','rome','houston','jakarta',
             'karachi','phoenix','saopaulo'  ]

In [20]:
snapshots = ['0.500','1.000']
snapshots_1 = ['all','0.500','1.000']

In [None]:
for city in big_cities:
    
    graphs=[]
    eff_global = []
    eff_local=[]
    clustering_10=[]
    clustering_5=[]
    clustering_3=[]
    anisotropy_10=[]
    anisotropy_5=[]
    anisotropy_3=[]
    directness=[]
    
    
    with gzip.open('all_cities_3_1/'+city+'/'+city+'_carall.picklez', 'rb') as f:
        graph = pickle.load(f)
        (graph.es())['weight'] = np.array((graph.es())['weight'])/1000
        
    graphs.append(graph)

    for s in snapshots:

        with gzip.open('all_cities_3_1/'+city+'/'+city+'_carconstrictedbike_poi_railwaystation_Rq'+s+'.picklez', 'rb') as f:
            graph = pickle.load(f)
            (graph.es())['weight'] = np.array((graph.es())['weight'])/1000
            #graph.delete_edges(11910)
        graphs.append(graph)
        
        
    btwnness = []
    count=0
    for k in graphs:
        btwnness.append(k.betweenness(weights='weight',nobigint=False))
        count+=1
        if count%10==0:
            print(count)
        
    pd_btw = pd.DataFrame()
    pd_btw['ids'] = graphs[0].vs()['id']
    c=0
    for j in snapshots:
        pd_btw[j] = btwnness[c]
        graphs[c].vs['betw'] = btwnness[c]
        c+=1  
    pd_btw.to_csv('results_3/betweenness_rail_r/'+city+'_carconstrictedbike_poi_railwaystation_random'+'_betwnns.csv')

    for gr in graphs:
        eff_global.append(calculate_efficiency_global(gr))
        eff_local.append(calculate_efficiency_local(gr))
        clustering_10.append(center_drift_weighted(gr,90))
        clustering_5.append(center_drift_weighted(gr,95))
        clustering_3.append(center_drift_weighted(gr,97))
        anisotropy_10.append(bet_anisotropy_weighted(gr,90))
        anisotropy_5.append(bet_anisotropy_weighted(gr,95))
        anisotropy_3.append(bet_anisotropy_weighted(gr,97))
        directness.append(calculate_directness(gr))


        
    pd_metrics = pd.DataFrame()

    pd_metrics['snapshots'] = snapshots
    metrics = ['eff_global','eff_local','clustering_10','clustering_5','clustering_3','anisotropy_10','anisotropy_5','anisotropy_3','directness']
    values = [eff_global,eff_local,clustering_10,clustering_5,clustering_3,anisotropy_10,anisotropy_5,anisotropy_3,directness]

    c_2 = 0
    for m in metrics:

        pd_metrics[m] = values[c_2]
        c_2+=1
        
    pd_metrics.to_csv('results_3/metrics_rail_r/'+city+'_carconstrictedbike_poi_railwaystation_random.csv')
    
    print(city)
        

    

In [21]:
def calculate_efficiency_global(G, numnodepairs = 500, normalized = True):
    """Calculates global network efficiency.
    If there are more than numnodepairs nodes, measure over pairings of a 
    random sample of numnodepairs nodes.
    """

    if G is None: return 0
    if G.vcount() > numnodepairs:
        nodeindices = random.sample(list(G.vs.indices), numnodepairs)
    else:
        nodeindices = list(G.vs.indices)
    d_ij = G.shortest_paths(source = nodeindices, target = nodeindices, weights = "weight")
    d_ij = [item for sublist in d_ij for item in sublist] # flatten
    EG = sum([1/d for d in d_ij if d != 0])
    if not normalized: return EG
    pairs = list(itertools.permutations(nodeindices, 2))
    if len(pairs) < 1: return 0
    l_ij = haversine_vector([(G.vs[p[0]]["y"], G.vs[p[0]]["x"]) for p in pairs],
                            [(G.vs[p[1]]["y"], G.vs[p[1]]["x"]) for p in pairs])
    EG_id = sum([1/l for l in l_ij if l != 0])
    # if (EG / EG_id) > 1: # This should not be allowed to happen!
    #     pp.pprint(d_ij)
    #     pp.pprint(l_ij)
    #     pp.pprint([e for e in G.es])
    #     print(pairs)
    #     print([(G.vs[p[0]]["x"], G.vs[p[0]]["y"]) for p in pairs],
    #                         [(G.vs[p[1]]["x"], G.vs[p[1]]["y"]) for p in pairs])
    #     print(EG, EG_id)
    #     sys.exit()
    # assert EG / EG_id <= 1, "Normalized EG > 1. This should not be possible."
    return EG / EG_id

In [22]:
### clustering
def center_drift_weighted(g,percentile):
    bet_thresh=np.percentile(g.vs['betw'],percentile)
    high_locs_x=np.array([v['x'] for v in g.vs if v['betw']>bet_thresh])
    high_locs_y=np.array([v['y'] for v in g.vs if v['betw']>bet_thresh])
    #mean_weight=np.mean([w for w in g.es['weight']])
    radii=np.sqrt((high_locs_x-np.mean(high_locs_x))**2+(high_locs_y-np.mean(high_locs_y))**2)
    all_radii=np.sqrt((g.vs['x']-np.mean(high_locs_x))**2+(g.vs['y']-np.mean(high_locs_y))**2)
    N=float(g.vcount())
    D=(np.ptp(g.vs['x'])+np.ptp(g.vs['y']))/2.
    #return np.std(radii)/np.mean(radii)
    return np.mean(radii)/np.mean(all_radii)

In [23]:
def bet_anisotropy_weighted(g,percentile):
    bet_thresh=np.percentile(g.vs['betw'],percentile)
    high_locs_x=np.array([v['x'] for v in g.vs if v['betw']>bet_thresh])
    high_locs_y=np.array([v['y'] for v in g.vs if v['betw']>bet_thresh])
    cov_mat=np.cov(high_locs_x,high_locs_y)
    eigs=np.linalg.eig(cov_mat)[0]
    max_eig=max(eigs)
    min_eig=min(eigs)
    return np.sqrt(min_eig/max_eig)

In [24]:
def calculate_efficiency_local(G, numnodepairs = 500, normalized = True):
    """Calculates local network efficiency.
    If there are more than numnodepairs nodes, measure over pairings of a 
    random sample of numnodepairs nodes.
    """

    if G is None: return 0
    if G.vcount() > numnodepairs:
        nodeindices = random.sample(list(G.vs.indices), numnodepairs)
    else:
        nodeindices = list(G.vs.indices)
    EGi = []
    vcounts = []
    ecounts = []
    for i in nodeindices:
        if len(G.neighbors(i)) > 1: # If we have a nontrivial neighborhood
            G_induced = G.induced_subgraph(G.neighbors(i))
            EGi.append(calculate_efficiency_global(G_induced, numnodepairs, normalized))
    return listmean(EGi)

In [25]:
def listmean(lst): 
    try: return sum(lst) / len(lst)
    except: return 0

In [26]:
def calculate_directness(G, numnodepairs = 500):
    """Calculate directness on G over all connected node pairs in indices.
    """
    
    indices = random.sample(list(G.vs), min(numnodepairs, len(G.vs)))

    poi_edges = []
    v1 = []
    v2 = []
    total_distance_haversine = 0
    for c, v in enumerate(indices):
        poi_edges.append(G.get_shortest_paths(v, indices[c:], weights = "weight", output = "epath"))
        temp = G.get_shortest_paths(v, indices[c:], weights = "weight", output = "vpath")
        if len(temp) > 1:
            total_distance_haversine += sum(haversine_vector([(G.vs[t[0]]["y"], G.vs[t[0]]["x"]) for t in temp if len(t) !=0], [(G.vs[t[-1]]["y"], G.vs[t[-1]]["x"]) for t in temp if len(t) !=0]))
    
    total_distance_network = 0
    for paths_e in poi_edges:
        for path_e in paths_e:
            # Sum up distances of path segments from first to last node
            total_distance_network += sum([G.es[e]['weight'] for e in path_e])
    
    return total_distance_haversine / total_distance_network