In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


In [2]:
transport_type = {-1: 'walking',0: 'tram', 1: 'subway', 2: 'rail',
                  3: 'bus', 4: 'ferry', 5: 'cablecar', 6: 'gondola', 7: 'funicular'}

In [3]:
def get_list_cities_names():
    cities =[ 'adelaide', 'antofagasta','athens', 'belfast', 'berlin', 'bordeaux', 'brisbane', 'canberra',
              'detroit', 'dublin', 'grenoble', 'helsinki', 'kuopio', 'lisbon', 'luxembourg', 'melbourne',
              'nantes', 'palermo', 'paris', 'prague', 'rennes', 'rome', 'sydney', 'toulouse', 'turku',
              'venice', 'winnipeg']
    return cities

In [4]:
def load_nodes(net,path):
    '''
    Load node from network_nodes.csv (nodes are stops)
    
    '''
    nodes_info = pd.read_csv(path, delimiter=";")
    nodes_df = pd.DataFrame(nodes_info, columns=['stop_I', 'lat', 'lon', 'name'])
    for index, row in nodes_df.iterrows():
        net.add_node(row['stop_I'], coords=(row['lat'], row['lon']), pos=(row['lon'], row['lat']))

In [16]:
def load_edges_pspace(net, path):
    '''
    Read edges from network_temporal_day.csv, create edges if can reach from one node to next 
    one without changing the means of transport 
    
    '''
    
    data = pd.read_csv(path, delimiter=";")
    data_df = pd.DataFrame(data, columns=['from_stop_I', 'to_stop_I', 'dep_time_ut','arr_time_ut',
                                                 'route_type','trip_I','seq','route_I'])
    paths = {}
    path_number = 0
    paths[path_number] = [data_df.from_stop_I[path_number], data_df.to_stop_I[path_number]]
    
    for i in range(1,data_df.shape[0]):
        
        if(data_df.from_stop_I[i] == data_df.to_stop_I[i-1] and
            data_df.dep_time_ut[i] == data_df.arr_time_ut[i-1] and
            data_df.seq[i] == data_df.seq[i-1] + 1): 

                paths[path_number].append(data_df.to_stop_I[i])

        else:
            path_number += 1 
            temp = [data_df.from_stop_I[i], data_df.to_stop_I[i]]
            paths[path_number]=temp

    unique_paths = {}
    for k,v in paths.items():
        if v not in unique_paths.values():
            unique_paths[k]= v  
    paths = unique_paths
    
    for k,v in paths.items():
        for cnt1 in range(len(v)):
            for cnt2 in range(cnt1+1,len(v)):
                net.add_edge(v[cnt1],v[cnt2])

In [14]:
def create_network(city):
    '''
    create network in P-space 
    (Stops are represented by nodes; they are linked if they can be reached without changing means of transport.)
    
    '''
    nodes_path = 'data/'+city+'/network_nodes.csv'
    edges_path = 'data/' + city + '/network_temporal_day.csv'
    net = nx.Graph()
    load_nodes(net, nodes_path)
    load_edges_pspace(net, edges_path)
    return net

    

In [6]:
def get_component_size_dist(net):
    
    dist = {}
    sizes = []
    for comp in sorted((net.subgraph(c) for c in nx.connected_components(net)), key = len):
        sizes.append(len(comp))
    for size in sizes:
        if size in dist:
            continue
        else:
            dist[size] = sizes.count(size)
    
    return dist


In [7]:
def compute_measures(net, city):
    GCC = max((net.subgraph(c) for c in nx.connected_components(net)), key=len)
    print( 'Number of nodes: ',  nx.number_of_nodes(net))
    print('Number of edges: ', nx.number_of_edges(net))
    print('Density: ',nx.density(net))
    print('Network diameter: ', nx.diameter(GCC))
    print('Average shortest path length: ', nx.average_shortest_path_length(GCC))
    print('Average clustering coefficient',  nx.average_clustering(net, count_zeros=True))
    print('Average degree: ', 2*net.number_of_edges() / float(net.number_of_nodes()))
    print('Number of component in the networek: ', len(listxxxxxxxxxxxxxxxxxxxx))
    print( 'Assortativity: ', nx.degree_assortativity_coefficient(net))
    print( 'Component distribution', get_component_size_dist(net))
    print('Average degree conectivity: ', nx.average_degree_connectivity(net))


In [9]:
def ccdf(input_list):
    input_array = np.array(input_list)
    x_points = np.unique(input_array) 
    cdf = []
    normalizer=float(input_array.size) 

    for x in x_points:
        cdf.append((input_array[np.where(input_array >= x)].size)/normalizer)   

    return (x_points, np.array(cdf))

In [10]:
def plot_ccdfs(x,datavecs, markers, labels):
    fig = plt.figure(figsize=(15,10)) 
    ax = fig.add_subplot(111)
    for x_values, y_values, marker, label  in zip(x,datavecs, markers, labels):    
        ax.loglog(x_values, y_values, marker, label = label) 

    ax.set_xlabel('Degree' ) 
    ax.set_ylabel('1-CDF degree') 
    ax.legend(loc=0)
    plt.savefig("Degree_distribution_P_space.pdf", dpi=150)
    plt.show()

    return fig

In [12]:
def plot_degree_clustering(degrees,clusteringvec, markers, labels):
   
    fig = plt.figure(figsize=(15,10)) 
    ax = fig.add_subplot(111)
    for degree,clustering, marker, label  in zip(degrees,clusteringvec, markers, labels):    
        ax.scatter(degree,clustering, marker ,label = label)
    ax.set_xlabel('Nodes Degree (ki)') 
    ax.set_ylabel('Clustering Coefficient (ci)') 
    ax.legend(loc=0)
    plt.savefig("Clustering_coefficient_P_space.pdf", dpi=150)

    plt.show()
    return fig

In [13]:
def get_centrality_measures(network):
    tol = 10**-1
    nodes = network.nodes()
    degrees = nx.degree_centrality(network)
    betweenness = nx.betweenness_centrality(network, normalized=True)
    closeness = nx.closeness_centrality(network)
    eigenvector_centrality = nx.eigenvector_centrality(network, tol = tol) 
   
    degree = np.array([v for k, v in  sorted(degrees.items(), key=lambda pair: list(nodes).index(pair[0]))])
    betweenness =np.array([v for k, v in  sorted(betweenness.items(), key=lambda  item: item[1], reverse = True)])
    closeness = np.array([v for k, v in  sorted(closeness.items(), key=lambda  item: item[1], reverse = True)])
    eigenvector_centrality = np.array([v for k, v in  sorted(eigenvector_centrality.items(),
                                                             key=lambda  item: item[1], reverse = True)])
    
    
    return [degree, betweenness, closeness, eigenvector_centrality]

In [None]:
cities = get_list_cities_names()
cities_ccdf_degree = {}
cities_degree = {}
cities_clustering = {}
cities_total_degree = {}
for i , city in enumerate(cities):
    print(30*'-', city, 30*'-')
    net = nx.Graph()
    
    path_nodes = './data/'+city+'/network_nodes.csv'
    load_nodes(net, path_nodes)
    
    path_edges = './data/'+city+'/network_temporal_day.csv'
    load_edges_pspace(net, path_edges)
    
    create_network(city)
    
    compute_measures(net, city)
    
    degrees = [net.degree(node) for node in nx.nodes(net)]
    cities_degree[i],cities_ccdf_degree[i]=ccdf(degrees)
    clustering = list(nx.clustering(net).values())
    cities_clustering[i]= clustering
    cities_total_degree[i]= degrees
    

------------------------------ adelaide ------------------------------
Number of nodes:  7548
Number of edges:  432817
Density:  0.015195957303845908
Network diameter:  6


In [14]:
labels = cities
markers = [ '-', '-.','.', '--', '-o', '-+','-*','+','_','_-','*','-', '-.','.', '--', 
           '-o', '-+','-*','+','_','_-','*', '--', '-o', '-+','-*','+']
datavecs = [list(v) for v in cities_ccdf_degree.values()]
x = [list(v) for v in cities_degree.values()]
plot_ccdfs(x,datavecs, markers, labels);

NameError: name 'cities' is not defined

In [13]:
labels = cities
markers =[ '-', '-.','.', '--', '-o', '-+','-*','+','_','_-','*','-', '-.','.', '--', 
           '-o', '-+','-*','+','_','_-','*', '--', '-o', '-+','-*','+']
clusteringvec = [list(v) for v in cities_clustering.values()]
x = [list(v) for v in cities_total_degree.values()]
plot_degree_clustering(x,clusteringvec,markers, labels);

NameError: name 'cities' is not defined