In [17]:
import networkx as nx
import csv
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

Reading graph edges and nodes:

In [3]:
with open('../csv_files/metro_edges_no_duplicated_edges_networkx.csv') as f:
    f.readline()
        
    g = nx.parse_edgelist(f, delimiter=',', nodetype=int, data=(('Weight', float), ('edge_name', str), ('edge_color', str), ('travel_seconds', int) ))
    
with open('../csv_files/metro_gephi_nodes_coordinates.csv') as f:
    reader = csv.DictReader(f)

    node_latitudes = {}
    node_longitudes = {}
    node_names = {}
    
    for row in reader:
        node_latitudes[ int(row['Id']) ] = float(row['latitude'])
        node_longitudes[ int(row['Id']) ] = float(row['longitude'])
        node_names[ int(row['Id']) ] = row['Label']
        
nx.set_node_attributes(g, name = 'latitude', values = node_latitudes)
nx.set_node_attributes(g, name = 'longitude', values = node_longitudes)
nx.set_node_attributes(g, name = 'name', values = node_names)

In [59]:
def top_n_stations_by_attribute(graph, attr_name, n):
    return pd.DataFrame.from_records(map(lambda x: x[1],  list(graph.nodes(data=True)) ))[['name', attr_name]].sort_values(attr_name, ascending = False)[:n].reset_index(drop=True).shift()[1:]

Top 35 stations with more connections to other Metro stations

In [60]:
nx.set_node_attributes(g, name = 'degree', values = dict(g.degree))

top_n_stations_by_attribute(g, 'degree', 35)

Unnamed: 0,name,degree
1,AVENIDA DE AMERICA,7.0
2,SOL,6.0
3,ALONSO MARTINEZ,6.0
4,PLAZA DE CASTILLA,5.0
5,OPERA,5.0
6,PRINCIPE PIO,5.0
7,DIEGO DE LEON,5.0
8,NUEVOS MINISTERIOS,5.0
9,CUATRO CAMINOS,5.0
10,MAR DE CRISTAL,4.0


Calculating stations importance using Closeness Centrality: This metric indicates how long it will take for information from a node u will take to reach other nodes in the network.

In [None]:
nx.set_node_attributes(g, name = 'closeness_centrality', values = nx.closeness_centrality(g, distance = 'travel_seconds'))

Top 20 most important (according to Closeness Centrality algorithm) Metro stations are shown

In [61]:
top_n_stations_by_attribute(g, 'closeness_centrality', 20)

Unnamed: 0,name,closeness_centrality
1,GREGORIO MARAÑON,0.000383
2,ALONSO MARTINEZ,0.000382
3,TRIBUNAL,0.000376
4,AVENIDA DE AMERICA,0.000373
5,PLAZA DE ESPAÑA,0.000369
6,NUÑEZ DE BALBOA,0.000366
7,NUEVOS MINISTERIOS,0.000366
8,RUBEN DARIO,0.000366
9,BILBAO,0.000364
10,DIEGO DE LEON,0.000363


Another metric to have in mind: Betweenness Centrality. This metric indicates how ofthen a node is found on a shortest path between two nodes in the network.

In [63]:
nx.set_node_attributes(g, name = 'betweenness_centrality', values = nx.betweenness_centrality(g, normalized = True, weight = 'Weight'))

Top 20 most important (according to Betweeness Centrality algorithm) Metro stations are shown

In [64]:
top_n_stations_by_attribute(g, 'betweenness_centrality', 20)

Unnamed: 0,name,betweenness_centrality
1,PRINCIPE PIO,0.347908
2,AVENIDA DE AMERICA,0.315934
3,NUEVOS MINISTERIOS,0.310146
4,ALONSO MARTINEZ,0.289435
5,CASA DE CAMPO,0.253766
6,GREGORIO MARAÑON,0.253208
7,LAGO,0.245746
8,BATAN,0.240098
9,TRIBUNAL,0.229812
10,COLONIA JARDIN,0.225907
