In [1]:
import networkx as nx
import csv
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

Reading graph edges and nodes:

In [3]:
with open('../csv_files/metro_edges_no_duplicated_edges_networkx.csv') as f:
    f.readline()        
    g = nx.parse_edgelist(f, delimiter=',', nodetype=int, data=(('Weight', float), ('edge_name', str), ('edge_color', str), ('travel_seconds', int) ), create_using = nx.DiGraph())
    
with open('../csv_files/metro_gephi_nodes_coordinates.csv') as f:
    reader = csv.DictReader(f)

    node_latitudes = {}
    node_longitudes = {}
    node_names = {}
    
    for row in reader:
        node_latitudes[ int(row['Id']) ] = float(row['latitude'])
        node_longitudes[ int(row['Id']) ] = float(row['longitude'])
        node_names[ int(row['Id']) ] = row['Label']
        
nx.set_node_attributes(g, name = 'latitude', values = node_latitudes)
nx.set_node_attributes(g, name = 'longitude', values = node_longitudes)
nx.set_node_attributes(g, name = 'name', values = node_names)

In [4]:
def top_n_stations_by_attribute(graph, attr_name, n):
    return pd.DataFrame.from_records(map(lambda x: x[1],  list(graph.nodes(data=True)) ))[['name', attr_name]].sort_values(attr_name, ascending = False)[:n].reset_index(drop=True).shift()[1:]

Top 35 stations with more neighbour stations 

In [11]:
nx.set_node_attributes(g, name = 'degree', values = dict(g.degree))

top_n_stations_by_attribute(g, 'degree', 35)

Unnamed: 0,name,degree
1,AVENIDA DE AMERICA,8.0
2,CUATRO CAMINOS,7.0
3,PRINCIPE PIO,7.0
4,NUEVOS MINISTERIOS,7.0
5,LEGAZPI,6.0
6,PACIFICO,6.0
7,ALONSO MARTINEZ,6.0
8,DIEGO DE LEON,6.0
9,MANUEL BECERRA,6.0
10,OPORTO,6.0


Calculating stations importance using Closeness Centrality: This metric indicates how long it will take for information from a node u will take to reach other nodes in the network.

In [6]:
nx.set_node_attributes(g, name = 'closeness_centrality', values = nx.closeness_centrality(g, distance = 'travel_seconds'))

Top 20 most important (according to Closeness Centrality algorithm) Metro stations are shown

In [7]:
top_n_stations_by_attribute(g, 'closeness_centrality', 20)

Unnamed: 0,name,closeness_centrality
1,GREGORIO MARAÑON,0.000138
2,CUATRO CAMINOS,0.000123
3,NUEVOS MINISTERIOS,0.000121
4,GUZMAN EL BUENO,0.000121
5,METROPOLITANO,0.00012
6,CIUDAD UNIVERSITARIA,0.000118
7,MONCLOA,0.000117
8,REPUBLICA ARGENTINA,0.000114
9,AVENIDA DE AMERICA,0.000108
10,FRANCOS RODRIGUEZ,0.000107


Another metric to have in mind: Betweenness Centrality. This metric indicates how ofthen a node is found on a shortest path between two nodes in the network.

In [8]:
nx.set_node_attributes(g, name = 'betweenness_centrality', values = nx.betweenness_centrality(g, normalized = True, weight = 'Weight'))

Top 20 most important (according to Betweeness Centrality algorithm) Metro stations are shown

In [9]:
top_n_stations_by_attribute(g, 'betweenness_centrality', 20)

Unnamed: 0,name,betweenness_centrality
1,PRINCIPE PIO,0.014627
2,LEGAZPI,0.013459
3,PLAZA ELIPTICA,0.013267
4,OPORTO,0.013023
5,USERA,0.012901
6,OPAÑEL,0.012552
7,CARPETANA,0.012413
8,LAGUNA,0.012291
9,LUCERO,0.012099
10,PACIFICO,0.011925
