In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import sparse
from collections import Counter
import networkx as nx
from scipy import stats

### Reading already parsed datasets

In [2]:
NY_df = pd.read_csv('data/newyork_dataset', sep=',',low_memory=False, header=0, encoding='utf8')
CHI_df = pd.read_csv('data/chicago_dataset', sep=',',low_memory=False, header=0, encoding='utf8')
BOS_df = pd.read_csv('data/boston_dataset', sep=',',low_memory=False, header=0, encoding='utf8')

Number of station - nodes for each dataset

In [3]:
print('NY: # start stations =',(NY_df['start station name'].nunique()), '; # end stations =',(NY_df['end station name'].nunique()))
print('CHI: # start stations =',(CHI_df['start station name'].nunique()), '; # end stations =',(CHI_df['end station name'].nunique()))
print('BOS: # start stations =',(BOS_df['start station name'].nunique()), '; # end stations =',(BOS_df['end station name'].nunique()))

NY: # start stations = 784 ; # end stations = 791
CHI: # start stations = 571 ; # end stations = 569
BOS: # start stations = 268 ; # end stations = 268


## Network creation

Create directed networks such that the nodes correspond to the stations and the edges to the trips starting from one stations and ending in another one. (Actually this may require dataset cleaning since some trips are made from and to the same station). 

In [4]:
def network_creation(df):
    nodes = np.array(list(set.union(set(df.loc[:, 'start station name']), set(df.loc[:, 'end station name']))))
    edges = zip(df['start station name'], df['end station name'])
    G = nx.DiGraph()
    G.add_nodes_from(nodes)
    G.add_edges_from(edges)
    print(nx.info(G))
    return G

NY_graph = network_creation(NY_df)
CHI_graph = network_creation(CHI_df)
BOS_graph = network_creation(BOS_df)

DiGraph with 791 nodes and 187520 edges
DiGraph with 573 nodes and 43057 edges
DiGraph with 268 nodes and 23679 edges


## Nodes Degrees

Let's see which are the stations having the highest number of incoming and outgoing bikes for each BSS

In [5]:
import operator

def nodes_degrees(G):
    in_degrees = dict(G.in_degree(G.nodes()))
    out_degrees = dict(G.out_degree(G.nodes()))
    degrees = dict(G.degree(G.nodes()))
    nx.set_node_attributes(G, in_degrees, 'in degree')
    nx.set_node_attributes(G, out_degrees, 'out degree')
    nx.set_node_attributes(G, degrees, 'degree')
    df = pd.DataFrame.from_dict({'Nodes':G.nodes(), 'degree':list(degrees.values()), 'in degree':list(in_degrees.values()),
                                'out degree':list(out_degrees.values())})
    print('Top 5 stations by degree')
    display(df[['Nodes', 'degree']].sort_values('degree', ascending=False).head(5))
    print('\n Top 5 stations by in degree')
    display(df[['Nodes', 'in degree']].sort_values('in degree', ascending=False).head(5))
    print('\n Top 5 stations by out degree')
    display(df[['Nodes', 'out degree']].sort_values('out degree', ascending=False).head(5))
    return G, df

In [6]:
NY_graph, NY_nodes = nodes_degrees(NY_graph)

Top 5 stations by degree


Unnamed: 0,Nodes,degree
504,E 17 St & Broadway,985
158,Pershing Square North,979
248,Broadway & E 22 St,968
358,Cleveland Pl & Spring St,881
733,Grand St & Elizabeth St,877



 Top 5 stations by in degree


Unnamed: 0,Nodes,in degree
504,E 17 St & Broadway,491
248,Broadway & E 22 St,478
158,Pershing Square North,466
103,1 Ave & E 68 St,456
358,Cleveland Pl & Spring St,450



 Top 5 stations by out degree


Unnamed: 0,Nodes,out degree
158,Pershing Square North,513
504,E 17 St & Broadway,494
248,Broadway & E 22 St,490
733,Grand St & Elizabeth St,444
181,Broadway & E 14 St,436


In [7]:
CHI_graph, CHI_nodes = nodes_degrees(CHI_graph)

Top 5 stations by degree


Unnamed: 0,Nodes,degree
412,Daley Center Plaza,491
203,Clinton St & Madison St,473
506,Streeter Dr & Grand Ave,457
534,Clinton St & Washington Blvd,450
210,Franklin St & Monroe St,445



 Top 5 stations by in degree


Unnamed: 0,Nodes,in degree
506,Streeter Dr & Grand Ave,238
68,Damen Ave & Pierce Ave,228
412,Daley Center Plaza,227
203,Clinton St & Madison St,224
246,Michigan Ave & Oak St,214



 Top 5 stations by out degree


Unnamed: 0,Nodes,out degree
412,Daley Center Plaza,264
203,Clinton St & Madison St,249
113,Canal St & Adams St,246
534,Clinton St & Washington Blvd,245
210,Franklin St & Monroe St,242


In [8]:
BOS_graph, BOS_nodes = nodes_degrees(BOS_graph)

Top 5 stations by degree


Unnamed: 0,Nodes,degree
118,Christian Science Plaza - Massachusetts Ave at...,375
8,Harvard Square at Mass Ave/ Dunster,367
161,Back Bay T Stop - Dartmouth St at Stuart St,366
33,MIT at Mass Ave / Amherst St,365
176,Dartmouth St at Newbury St,359



 Top 5 stations by in degree


Unnamed: 0,Nodes,in degree
8,Harvard Square at Mass Ave/ Dunster,190
118,Christian Science Plaza - Massachusetts Ave at...,187
246,South Station - 700 Atlantic Ave,183
161,Back Bay T Stop - Dartmouth St at Stuart St,180
183,Boylston St at Massachusetts Ave,179



 Top 5 stations by out degree


Unnamed: 0,Nodes,out degree
118,Christian Science Plaza - Massachusetts Ave at...,188
33,MIT at Mass Ave / Amherst St,187
161,Back Bay T Stop - Dartmouth St at Stuart St,186
3,Central Square at Mass Ave / Essex St,182
176,Dartmouth St at Newbury St,181


## Centralitiy measures

### PageRank centrality

In [9]:
def pagerank(G, df):
    adj = nx.linalg.graphmatrix.adjacency_matrix(G)
    authorities = nx.algorithms.link_analysis.pagerank_alg.pagerank(nx.DiGraph(adj.T), tol = (1e-4)/len(nx.DiGraph(adj.T)))
    hubs = nx.algorithms.link_analysis.pagerank_alg.pagerank(nx.DiGraph(adj), tol = (1e-4)/len(nx.DiGraph(adj.T)))
    df['PageRank authority'] = list(authorities.values())
    df['PageRank hub'] = list(hubs.values())
    display(df.sort_values('PageRank authority', ascending=False).head(5))
    display(df.sort_values('PageRank hub', ascending=False).head(5))
    return G, df

In [10]:
NY_graph, NY_nodes = pagerank(NY_graph, NY_nodes)

Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub
158,Pershing Square North,979,466,513,0.002504,0.00223
504,E 17 St & Broadway,985,491,494,0.002383,0.002364
248,Broadway & E 22 St,968,478,490,0.002378,0.002287
755,1 Ave & E 62 St,865,434,431,0.00221,0.002165
733,Grand St & Elizabeth St,877,433,444,0.002151,0.002096


Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub
504,E 17 St & Broadway,985,491,494,0.002383,0.002364
248,Broadway & E 22 St,968,478,490,0.002378,0.002287
585,Kent Ave & N 7 St,841,432,409,0.002099,0.002283
103,1 Ave & E 68 St,871,456,415,0.002107,0.002268
522,S 4 St & Wythe Ave,859,431,428,0.002133,0.002245


In [11]:
CHI_graph, CHI_nodes = pagerank(CHI_graph, CHI_nodes)

Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub
412,Daley Center Plaza,491,227,264,0.004999,0.004045
203,Clinton St & Madison St,473,224,249,0.00499,0.004062
534,Clinton St & Washington Blvd,450,205,245,0.00467,0.003617
113,Canal St & Adams St,435,189,246,0.004624,0.003321
210,Franklin St & Monroe St,445,203,242,0.004526,0.003541


Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub
506,Streeter Dr & Grand Ave,457,238,219,0.004288,0.004955
68,Damen Ave & Pierce Ave,434,228,206,0.003846,0.004313
271,Lake Shore Dr & Monroe St,390,205,185,0.003739,0.004282
203,Clinton St & Madison St,473,224,249,0.00499,0.004062
412,Daley Center Plaza,491,227,264,0.004999,0.004045


In [12]:
BOS_graph, BOS_nodes = pagerank(BOS_graph, BOS_nodes)

Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub
118,Christian Science Plaza - Massachusetts Ave at...,375,187,188,0.007205,0.007261
161,Back Bay T Stop - Dartmouth St at Stuart St,366,180,186,0.007117,0.006798
33,MIT at Mass Ave / Amherst St,365,178,187,0.006979,0.006555
178,Cambridge St at Joy St,344,172,172,0.006839,0.006334
3,Central Square at Mass Ave / Essex St,358,176,182,0.006837,0.006723


Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub
8,Harvard Square at Mass Ave/ Dunster,367,190,177,0.006684,0.007322
118,Christian Science Plaza - Massachusetts Ave at...,375,187,188,0.007205,0.007261
246,South Station - 700 Atlantic Ave,358,183,175,0.006769,0.006992
161,Back Bay T Stop - Dartmouth St at Stuart St,366,180,186,0.007117,0.006798
56,Nashua Street at Red Auerbach Way,330,170,160,0.006147,0.006753


### Betweenness

Betweenness centrality, which is also expressed on a scale of 0 to 1, is fairly good at finding nodes that connect two otherwise disparate parts of a network.

In [13]:
def betweenness(G, df):
    betweenness_dict = nx.betweenness_centrality(G)
    nx.set_node_attributes(G, betweenness_dict, 'betweenness')
    df['betweenness'] = list(betweenness_dict.values())
    display(df.sort_values('betweenness', ascending=False).head(5))
    return G, df

In [14]:
NY_graph, NY_nodes = betweenness(NY_graph, NY_nodes)

Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub,betweenness
585,Kent Ave & N 7 St,841,432,409,0.002099,0.002283,0.006729
504,E 17 St & Broadway,985,491,494,0.002383,0.002364,0.005772
136,Queens Plaza North & Crescent St,741,379,362,0.001968,0.002011,0.005692
679,Broadway & Battery Pl,832,443,389,0.002092,0.002157,0.005535
158,Pershing Square North,979,466,513,0.002504,0.00223,0.005378


In [15]:
CHI_graph, CHI_nodes = betweenness(CHI_graph, CHI_nodes)

Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub,betweenness
506,Streeter Dr & Grand Ave,457,238,219,0.004288,0.004955,0.026654
8,Ellis Ave & 60th St,77,40,37,0.002126,0.002047,0.020376
271,Lake Shore Dr & Monroe St,390,205,185,0.003739,0.004282,0.020016
285,Burnham Harbor,238,116,122,0.003155,0.003122,0.019647
71,Halsted St & 63rd St,19,10,9,0.001524,0.002709,0.015357


In [16]:
BOS_graph, BOS_nodes = betweenness(BOS_graph, BOS_nodes)

Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub,betweenness
56,Nashua Street at Red Auerbach Way,330,170,160,0.006147,0.006753,0.026659
178,Cambridge St at Joy St,344,172,172,0.006839,0.006334,0.016168
235,Tremont St at West St,323,165,158,0.006266,0.006426,0.015237
246,South Station - 700 Atlantic Ave,358,183,175,0.006769,0.006992,0.014996
118,Christian Science Plaza - Massachusetts Ave at...,375,187,188,0.007205,0.007261,0.013817


## Community detection with modularity

In [17]:
def communities(G, df):
    communities = nx.algorithms.community.modularity_max.greedy_modularity_communities(G)
    node_comm = pd.DataFrame({'Nodes':G.nodes()})
    node_comm['Community']=np.zeros(len(node_comm.index),dtype='int8')
    for i, set in enumerate(communities):
        for element in set:
            node_comm.iloc[node_comm['Nodes'] == element, 1]=i
    for i, set in enumerate(communities):
        display(node_comm[node_comm['Community']==i])
    
    df['Community'] = node_comm['Community']
    return G, df

In [18]:
NY_graph, NY_nodes = communities(NY_graph, NY_nodes)

Unnamed: 0,Nodes,Community
1,1 Ave & E 110 St,0
2,E 33 St & 5 Ave,0
3,Avenue D & E 3 St,0
4,Broadway & W 49 St,0
5,27 Ave & 9 St,0
...,...,...
783,1 Ave & E 44 St,0
784,E 55 St & 3 Ave,0
785,Avenue C & E 18 St,0
787,E 115 St & Lexington Ave,0


Unnamed: 0,Nodes,Community
0,Berkeley Pl & 6 Ave,1
7,S Portland Ave & Hanson Pl,1
8,Carroll St & Washington Ave,1
13,Park Ave & St Edwards St,1
14,S 3 St & Bedford Ave,1
...,...,...
777,Stagg St & Morgan Ave,1
778,Putnam Ave & Nostrand Ave,1
779,Bayard St & Leonard St,1
786,Willoughby Ave & Walworth St,1


Unnamed: 0,Nodes,Community
11,Soissons Landing,2
15,E 2 St & 2 Ave,2
32,William St & Pine St,2
42,6 Ave & Canal St,2
56,Stanton St & Norfolk St,2
140,Suffolk St & Stanton St,2
146,MacDougal St & Prince St,2
197,Pitt St & Stanton St,2
202,Centre St & Worth St,2
236,Old Slip & Front St,2


In [19]:
CHI_graph, CHI_nodes = communities(CHI_graph, CHI_nodes)

Unnamed: 0,Nodes,Community
1,Kimbark Ave & 53rd St,0
2,Ogden Ave & Chicago Ave,0
5,Racine Ave & 15th St,0
6,Wabash Ave & Roosevelt Rd,0
8,Ellis Ave & 60th St,0
...,...,...
560,Western Ave & 28th St,0
561,Halsted St & Roosevelt Rd,0
564,Ellis Ave & 58th St,0
568,Eckhart Park,0


Unnamed: 0,Nodes,Community
0,Wells St & Evergreen Ave,1
3,Keystone Ave & Montrose Ave,1
4,Milwaukee Ave & Cuyler Ave,1
9,Clark St & Elmdale Ave,1
10,Southport Ave & Irving Park Rd,1
...,...,...
558,Lincoln Ave & Winona St,1
562,Leavitt St & North Ave,1
567,Western Ave & Howard St,1
570,Southport Ave & Roscoe St,1


Unnamed: 0,Nodes,Community
23,South Shore Dr & 71st St,2
70,Stony Island Ave & 67th St,2
115,Cottage Grove Ave & 71st St,2
129,Rhodes Ave & 71st St,2
130,Jeffery Blvd & 76th St,2
138,Bennett Ave & 79th St,2
141,South Shore Dr & 74th St,2
199,Wabash Ave & 87th St,2
211,Calumet Ave & 71st St,2
234,Stony Island Ave & 71st St,2


Unnamed: 0,Nodes,Community
7,Ashland Ave & Pershing Rd,3
54,Morgan St & Pershing Rd,3
201,Throop St & 52nd St,3
399,Damen Ave & 51st St,3
461,Western Blvd & 48th Pl,3
476,Damen Ave & Pershing Rd,3
482,Damen Ave & 59th St,3
485,Marshfield Ave & 44th St,3
509,Ashland Ave & McDowell Ave,3
520,Seeley Ave & Garfield Blvd,3


Unnamed: 0,Nodes,Community
101,Kedzie Ave & 21st St,4
295,Central Park Ave & 24th St,4
450,Central Park Ave & Ogden Ave,4
492,Millard Ave & 26th St,4


Unnamed: 0,Nodes,Community
184,Laramie Ave & Kinzie St,5
498,Laramie Ave & Madison St,5


Unnamed: 0,Nodes,Community
33,Racine Ave & 61st St,6


Unnamed: 0,Nodes,Community
103,Racine Ave & Garfield Blvd,7


Unnamed: 0,Nodes,Community
474,Central Ave & Harrison St,8


Unnamed: 0,Nodes,Community
529,Central Ave & Chicago Ave,9


Unnamed: 0,Nodes,Community
552,Exchange Ave & 79th St,10


Unnamed: 0,Nodes,Community
571,State St & 76th St,11


In [20]:
BOS_graph, BOS_nodes = communities(BOS_graph, BOS_nodes)

Unnamed: 0,Nodes,Community
1,Roslindale Village - Washington St,0
2,Williams St at Washington St,0
4,Walnut Ave at Warren St,0
7,Morton St T,0
9,Ruggles T Stop - Columbus Ave at Melnea Cass Blvd,0
...,...,...
256,Grove Hall Library - 41 Geneva Ave,0
259,NCAAA - Walnut Ave at Crawford St,0
263,Boston Convention and Exhibition Center - Summ...,0
265,New Balance - 20 Guest St,0


Unnamed: 0,Nodes,Community
0,Foss Park,1
3,Central Square at Mass Ave / Essex St,1
8,Harvard Square at Mass Ave/ Dunster,1
13,Main St at Thompson Sq,1
14,Somerville City Hall,1
...,...,...
258,Kendall Street,1
260,Verizon Innovation Hub 10 Ware Street,1
262,700 Huron Ave,1
264,Charlestown Navy Yard,1


Unnamed: 0,Nodes,Community
5,Piers Park,2
6,The Eddy - New St at Sumner St,2
19,East Boston Neighborhood Health Center - 20 Ma...,2
102,Orient Heights T Stop - Bennington St at Sarat...,2
104,Boston East - 126 Border St,2
133,Airport T Stop - Bremen St at Brooks St,2
148,Maverick Square - Lewis Mall,2
180,Chelsea St at Saratoga St,2
182,Bennington St at Byron St,2
200,Bennington St at Constitution Beach,2


Unnamed: 0,Nodes,Community
68,Faneuil St at Arlington St,3
127,Clarendon St at Commonwealth Ave,3


Unnamed: 0,Nodes,Community
99,BCBS Hingham,4


Unnamed: 0,Nodes,Community
126,BCBS Quincy,5


## Visualization

These are the resulting dataframes obtained throught all the previously done measures.

In [21]:
display(NY_nodes)
display(CHI_nodes)
display(BOS_nodes)

Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub,betweenness,Community
0,Berkeley Pl & 6 Ave,363,189,174,0.000989,0.001108,0.000345,1
1,1 Ave & E 110 St,331,173,158,0.000937,0.000956,0.000323,0
2,E 33 St & 5 Ave,730,373,357,0.001756,0.001784,0.001416,0
3,Avenue D & E 3 St,482,248,234,0.001195,0.001237,0.000322,0
4,Broadway & W 49 St,718,353,365,0.001807,0.001683,0.001298,0
...,...,...,...,...,...,...,...,...
786,Willoughby Ave & Walworth St,400,199,201,0.001111,0.001157,0.000472,1
787,E 115 St & Lexington Ave,277,149,128,0.000804,0.000853,0.000163,0
788,John St & William St,579,292,287,0.001427,0.001453,0.000743,2
789,West Thames St,575,300,275,0.001373,0.001463,0.000549,0


Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub,betweenness,Community
0,Wells St & Evergreen Ave,329,165,164,0.003003,0.002918,0.001969,1
1,Kimbark Ave & 53rd St,83,40,43,0.001831,0.001953,0.007518,0
2,Ogden Ave & Chicago Ave,279,132,147,0.002833,0.002308,0.001390,0
3,Keystone Ave & Montrose Ave,38,21,17,0.000677,0.000757,0.000190,1
4,Milwaukee Ave & Cuyler Ave,33,15,18,0.000705,0.000695,0.000123,1
...,...,...,...,...,...,...,...,...
568,Eckhart Park,255,131,124,0.002427,0.002287,0.001192,0
569,Halsted St & Polk St,184,94,90,0.002008,0.002069,0.002138,0
570,Southport Ave & Roscoe St,314,158,156,0.002777,0.003094,0.003028,1
571,State St & 76th St,3,2,1,0.000461,0.002148,0.000000,11


Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub,betweenness,Community
0,Foss Park,75,38,37,0.001816,0.001875,0.000163,1
1,Roslindale Village - Washington St,30,12,18,0.001392,0.001083,0.000101,0
2,Williams St at Washington St,92,46,46,0.002236,0.002507,0.001761,0
3,Central Square at Mass Ave / Essex St,358,176,182,0.006837,0.006723,0.008802,1
4,Walnut Ave at Warren St,41,23,18,0.001241,0.001458,0.000220,0
...,...,...,...,...,...,...,...,...
263,Boston Convention and Exhibition Center - Summ...,130,63,67,0.002835,0.002695,0.000470,0
264,Charlestown Navy Yard,191,94,97,0.003801,0.003941,0.006936,1
265,New Balance - 20 Guest St,140,68,72,0.003067,0.002792,0.000813,0
266,Wilson Square,132,64,68,0.002842,0.002869,0.000535,1
