In [96]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import sparse
from collections import Counter
import networkx as nx
from scipy import stats
import math

### Reading already parsed datasets

In [2]:
NY_df = pd.read_csv('data/newyork_dataset', sep=',',low_memory=False, header=0, encoding='utf8')
CHI_df = pd.read_csv('data/chicago_dataset', sep=',',low_memory=False, header=0, encoding='utf8')
BOS_df = pd.read_csv('data/boston_dataset', sep=',',low_memory=False, header=0, encoding='utf8')

Number of station - nodes for each dataset

In [3]:
print('NY: # start stations =',(NY_df['start station name'].nunique()), '; # end stations =',(NY_df['end station name'].nunique()))
print('CHI: # start stations =',(CHI_df['start station name'].nunique()), '; # end stations =',(CHI_df['end station name'].nunique()))
print('BOS: # start stations =',(BOS_df['start station name'].nunique()), '; # end stations =',(BOS_df['end station name'].nunique()))

NY: # start stations = 784 ; # end stations = 791
CHI: # start stations = 571 ; # end stations = 569
BOS: # start stations = 268 ; # end stations = 268


## Network creation

Create directed networks such that the nodes correspond to the stations and the edges to the trips starting from one stations and ending in another one. (Actually this may require dataset cleaning since some trips are made from and to the same station). 

In [4]:
def network_creation(df):
    nodes = np.array(list(set.union(set(df.loc[:, 'start station name']), set(df.loc[:, 'end station name']))))
    edges = zip(df['start station name'], df['end station name'])
    G = nx.DiGraph()
    G.add_nodes_from(nodes)
    G.add_edges_from(edges)
    print(nx.info(G))
    return G

NY_graph = network_creation(NY_df)
CHI_graph = network_creation(CHI_df)
BOS_graph = network_creation(BOS_df)

DiGraph with 791 nodes and 187520 edges
DiGraph with 573 nodes and 43057 edges
DiGraph with 268 nodes and 23679 edges


## Nodes Degrees

Let's see which are the stations having the highest number of incoming and outgoing bikes for each BSS

In [5]:
import operator

def nodes_degrees(G):
    in_degrees = dict(G.in_degree(G.nodes()))
    out_degrees = dict(G.out_degree(G.nodes()))
    degrees = dict(G.degree(G.nodes()))
    nx.set_node_attributes(G, in_degrees, 'in degree')
    nx.set_node_attributes(G, out_degrees, 'out degree')
    nx.set_node_attributes(G, degrees, 'degree')
    df = pd.DataFrame.from_dict({'Nodes':G.nodes(), 'degree':list(degrees.values()), 'in degree':list(in_degrees.values()),
                                'out degree':list(out_degrees.values())})
    print('Top 5 stations by degree')
    display(df[['Nodes', 'degree']].sort_values('degree', ascending=False).head(5))
    print('\n Top 5 stations by in degree')
    display(df[['Nodes', 'in degree']].sort_values('in degree', ascending=False).head(5))
    print('\n Top 5 stations by out degree')
    display(df[['Nodes', 'out degree']].sort_values('out degree', ascending=False).head(5))
    return G, df

In [6]:
NY_graph, NY_nodes = nodes_degrees(NY_graph)

Top 5 stations by degree


Unnamed: 0,Nodes,degree
12,E 17 St & Broadway,985
243,Pershing Square North,979
619,Broadway & E 22 St,968
503,Cleveland Pl & Spring St,881
212,Grand St & Elizabeth St,877



 Top 5 stations by in degree


Unnamed: 0,Nodes,in degree
12,E 17 St & Broadway,491
619,Broadway & E 22 St,478
243,Pershing Square North,466
41,1 Ave & E 68 St,456
503,Cleveland Pl & Spring St,450



 Top 5 stations by out degree


Unnamed: 0,Nodes,out degree
243,Pershing Square North,513
12,E 17 St & Broadway,494
619,Broadway & E 22 St,490
212,Grand St & Elizabeth St,444
156,Broadway & E 14 St,436


In [7]:
CHI_graph, CHI_nodes = nodes_degrees(CHI_graph)

Top 5 stations by degree


Unnamed: 0,Nodes,degree
553,Daley Center Plaza,491
32,Clinton St & Madison St,473
361,Streeter Dr & Grand Ave,457
58,Clinton St & Washington Blvd,450
50,Franklin St & Monroe St,445



 Top 5 stations by in degree


Unnamed: 0,Nodes,in degree
361,Streeter Dr & Grand Ave,238
206,Damen Ave & Pierce Ave,228
553,Daley Center Plaza,227
32,Clinton St & Madison St,224
453,Michigan Ave & Oak St,214



 Top 5 stations by out degree


Unnamed: 0,Nodes,out degree
553,Daley Center Plaza,264
32,Clinton St & Madison St,249
26,Canal St & Adams St,246
58,Clinton St & Washington Blvd,245
50,Franklin St & Monroe St,242


In [8]:
BOS_graph, BOS_nodes = nodes_degrees(BOS_graph)

Top 5 stations by degree


Unnamed: 0,Nodes,degree
73,Christian Science Plaza - Massachusetts Ave at...,375
182,Harvard Square at Mass Ave/ Dunster,367
80,Back Bay T Stop - Dartmouth St at Stuart St,366
258,MIT at Mass Ave / Amherst St,365
31,Dartmouth St at Newbury St,359



 Top 5 stations by in degree


Unnamed: 0,Nodes,in degree
182,Harvard Square at Mass Ave/ Dunster,190
73,Christian Science Plaza - Massachusetts Ave at...,187
46,South Station - 700 Atlantic Ave,183
80,Back Bay T Stop - Dartmouth St at Stuart St,180
178,Copley Square - Dartmouth St at Boylston St,179



 Top 5 stations by out degree


Unnamed: 0,Nodes,out degree
73,Christian Science Plaza - Massachusetts Ave at...,188
258,MIT at Mass Ave / Amherst St,187
80,Back Bay T Stop - Dartmouth St at Stuart St,186
229,Central Square at Mass Ave / Essex St,182
31,Dartmouth St at Newbury St,181


## Centralitiy measures

### PageRank centrality

In [9]:
def pagerank(G, df):
    adj = nx.linalg.graphmatrix.adjacency_matrix(G)
    authorities = nx.algorithms.link_analysis.pagerank_alg.pagerank(nx.DiGraph(adj.T), tol = (1e-4)/len(nx.DiGraph(adj.T)))
    hubs = nx.algorithms.link_analysis.pagerank_alg.pagerank(nx.DiGraph(adj), tol = (1e-4)/len(nx.DiGraph(adj.T)))
    df['PageRank authority'] = list(authorities.values())
    df['PageRank hub'] = list(hubs.values())
    display(df.sort_values('PageRank authority', ascending=False).head(5))
    display(df.sort_values('PageRank hub', ascending=False).head(5))
    return G, df

In [10]:
NY_graph, NY_nodes = pagerank(NY_graph, NY_nodes)

Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub
243,Pershing Square North,979,466,513,0.002504,0.00223
12,E 17 St & Broadway,985,491,494,0.002383,0.002364
619,Broadway & E 22 St,968,478,490,0.002378,0.002287
240,1 Ave & E 62 St,865,434,431,0.00221,0.002165
212,Grand St & Elizabeth St,877,433,444,0.002151,0.002096


Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub
12,E 17 St & Broadway,985,491,494,0.002383,0.002364
619,Broadway & E 22 St,968,478,490,0.002378,0.002287
38,Kent Ave & N 7 St,841,432,409,0.002099,0.002283
41,1 Ave & E 68 St,871,456,415,0.002107,0.002268
466,S 4 St & Wythe Ave,859,431,428,0.002133,0.002245


In [11]:
CHI_graph, CHI_nodes = pagerank(CHI_graph, CHI_nodes)

Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub
553,Daley Center Plaza,491,227,264,0.004999,0.004045
32,Clinton St & Madison St,473,224,249,0.00499,0.004062
58,Clinton St & Washington Blvd,450,205,245,0.00467,0.003617
26,Canal St & Adams St,435,189,246,0.004624,0.003321
50,Franklin St & Monroe St,445,203,242,0.004526,0.003541


Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub
361,Streeter Dr & Grand Ave,457,238,219,0.004288,0.004955
206,Damen Ave & Pierce Ave,434,228,206,0.003846,0.004313
140,Lake Shore Dr & Monroe St,390,205,185,0.003739,0.004282
32,Clinton St & Madison St,473,224,249,0.00499,0.004062
553,Daley Center Plaza,491,227,264,0.004999,0.004045


In [12]:
BOS_graph, BOS_nodes = pagerank(BOS_graph, BOS_nodes)

Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub
73,Christian Science Plaza - Massachusetts Ave at...,375,187,188,0.007205,0.007261
80,Back Bay T Stop - Dartmouth St at Stuart St,366,180,186,0.007117,0.006798
258,MIT at Mass Ave / Amherst St,365,178,187,0.006979,0.006555
251,Cambridge St at Joy St,344,172,172,0.006839,0.006334
229,Central Square at Mass Ave / Essex St,358,176,182,0.006837,0.006723


Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub
182,Harvard Square at Mass Ave/ Dunster,367,190,177,0.006684,0.007322
73,Christian Science Plaza - Massachusetts Ave at...,375,187,188,0.007205,0.007261
46,South Station - 700 Atlantic Ave,358,183,175,0.006769,0.006992
80,Back Bay T Stop - Dartmouth St at Stuart St,366,180,186,0.007117,0.006798
47,Nashua Street at Red Auerbach Way,330,170,160,0.006147,0.006753


### Betweenness

Betweenness centrality, which is also expressed on a scale of 0 to 1, is fairly good at finding nodes that connect two otherwise disparate parts of a network.

In [13]:
def betweenness(G, df):
    betweenness_dict = nx.betweenness_centrality(G)
    nx.set_node_attributes(G, betweenness_dict, 'betweenness')
    df['betweenness'] = list(betweenness_dict.values())
    display(df.sort_values('betweenness', ascending=False).head(5))
    return G, df

In [14]:
NY_graph, NY_nodes = betweenness(NY_graph, NY_nodes)

Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub,betweenness
38,Kent Ave & N 7 St,841,432,409,0.002099,0.002283,0.006729
12,E 17 St & Broadway,985,491,494,0.002383,0.002364,0.005772
772,Queens Plaza North & Crescent St,741,379,362,0.001968,0.002011,0.005692
437,Broadway & Battery Pl,832,443,389,0.002092,0.002157,0.005535
243,Pershing Square North,979,466,513,0.002504,0.00223,0.005378


In [15]:
CHI_graph, CHI_nodes = betweenness(CHI_graph, CHI_nodes)

Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub,betweenness
361,Streeter Dr & Grand Ave,457,238,219,0.004288,0.004955,0.026654
230,Ellis Ave & 60th St,77,40,37,0.002126,0.002047,0.020376
140,Lake Shore Dr & Monroe St,390,205,185,0.003739,0.004282,0.020016
196,Burnham Harbor,238,116,122,0.003155,0.003122,0.019647
74,Halsted St & 63rd St,19,10,9,0.001524,0.002709,0.015357


In [16]:
BOS_graph, BOS_nodes = betweenness(BOS_graph, BOS_nodes)

Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub,betweenness
47,Nashua Street at Red Auerbach Way,330,170,160,0.006147,0.006753,0.026659
251,Cambridge St at Joy St,344,172,172,0.006839,0.006334,0.016168
160,Tremont St at West St,323,165,158,0.006266,0.006426,0.015237
46,South Station - 700 Atlantic Ave,358,183,175,0.006769,0.006992,0.014996
73,Christian Science Plaza - Massachusetts Ave at...,375,187,188,0.007205,0.007261,0.013817


## Community detection with modularity

In [17]:
def communities(G, df):
    communities = nx.algorithms.community.modularity_max.greedy_modularity_communities(G)
    node_comm = pd.DataFrame({'Nodes':G.nodes()})
    node_comm['Community']=np.zeros(len(node_comm.index),dtype='int8')
    for i, set in enumerate(communities):
        for element in set:
            node_comm.iloc[node_comm['Nodes'] == element, 1]=i
    for i, set in enumerate(communities):
        display(node_comm[node_comm['Community']==i])
    
    df['Community'] = node_comm['Community']
    return G, df

In [18]:
NY_graph, NY_nodes = communities(NY_graph, NY_nodes)

Unnamed: 0,Nodes,Community
0,1 Ave & E 16 St,0
3,E 106 St & Madison Ave,0
4,44 Dr & 21 St,0
5,W 4 St & 7 Ave S,0
6,E 85 St & York Ave,0
...,...,...
781,W 113 St & Broadway,0
782,Lexington Ave & E 120 St,0
783,E 91 St & Park Ave,0
786,W 52 St & 5 Ave,0


Unnamed: 0,Nodes,Community
2,Willoughby Ave & Wyckoff Ave,1
8,Carroll St & Smith St,1
11,6 St & 7 Ave,1
15,Leonard St & Nassau Ave,1
16,Johnson St & Gold St,1
...,...,...
780,Berkeley Pl & 6 Ave,1
784,Broadway & Berry St,1
785,Park Pl & Vanderbilt Ave,1
787,Dean St & Hoyt St,1


Unnamed: 0,Nodes,Community
1,Madison St & Montgomery St,2
7,St James Pl & Oliver St,2
73,Leonard St & Church St,2
75,Lafayette St & Jersey St,2
97,Reade St & Broadway,2
105,Yankee Ferry Terminal,2
125,E 2 St & 2 Ave,2
180,Front St & Maiden Ln,2
186,Bayard St & Baxter St,2
209,Peck Slip & Front St,2


In [19]:
CHI_graph, CHI_nodes = communities(CHI_graph, CHI_nodes)

Unnamed: 0,Nodes,Community
1,Morgan St & Polk St,0
3,California Ave & 21st St,0
4,Fort Dearborn Dr & 31st St,0
6,State St & Pearson St,0
8,Artesian Ave & Hubbard St,0
...,...,...
564,Halsted St & Maxwell St,0
567,Jeffery Blvd & 67th St,0
568,Wood St & 35th St,0
569,Commercial Ave & 83rd St,0


Unnamed: 0,Nodes,Community
0,Kedzie Ave & Milwaukee Ave,1
2,Drake Ave & Addison St,1
5,Central St & Girard Ave,1
7,Austin Blvd & Lake St,1
9,Greenview Ave & Jarvis Ave,1
...,...,...
562,Latrobe Ave & Chicago Ave,1
563,Troy St & North Ave,1
566,Ravenswood Ave & Berteau Ave,1
570,Humboldt Blvd & Armitage Ave,1


Unnamed: 0,Nodes,Community
14,Wabash Ave & 83rd St,2
62,South Shore Dr & 71st St,2
84,Rhodes Ave & 71st St,2
101,Phillips Ave & 83rd St,2
126,Wabash Ave & 87th St,2
168,Yates Blvd & 75th St,2
173,Jeffery Blvd & 76th St,2
176,Cottage Grove Ave & 83rd St,2
204,Stony Island Ave & South Chicago Ave,2
212,Ashland Ave & 69th St,2


Unnamed: 0,Nodes,Community
17,Ashland Ave & Pershing Rd,3
148,Marshfield Ave & 44th St,3
153,Damen Ave & 59th St,3
155,Seeley Ave & Garfield Blvd,3
217,Throop St & 52nd St,3
261,Morgan St & Pershing Rd,3
299,Ashland Ave & McDowell Ave,3
325,Western Blvd & 48th Pl,3
332,Damen Ave & 51st St,3
390,Damen Ave & Pershing Rd,3


Unnamed: 0,Nodes,Community
152,Central Park Ave & Ogden Ave,4
164,Central Park Ave & 24th St,4
474,Kedzie Ave & 21st St,4
552,Millard Ave & 26th St,4


Unnamed: 0,Nodes,Community
284,Laramie Ave & Madison St,5
565,Laramie Ave & Kinzie St,5


Unnamed: 0,Nodes,Community
35,Central Ave & Harrison St,6


Unnamed: 0,Nodes,Community
48,State St & 76th St,7


Unnamed: 0,Nodes,Community
162,Racine Ave & 61st St,8


Unnamed: 0,Nodes,Community
233,Racine Ave & Garfield Blvd,9


Unnamed: 0,Nodes,Community
333,Central Ave & Chicago Ave,10


Unnamed: 0,Nodes,Community
438,Exchange Ave & 79th St,11


In [20]:
BOS_graph, BOS_nodes = communities(BOS_graph, BOS_nodes)

Unnamed: 0,Nodes,Community
1,Tremont St at Northampton St,0
3,Franklin Park Zoo - Franklin Park Rd at Blue H...,0
6,Commonwealth Ave At Babcock St,0
8,Huntington Ave at Mass Art,0
9,University of Massachusetts Boston - Integrate...,0
...,...,...
253,Roxbury Crossing T Stop - Columbus Ave at Trem...,0
255,Dudley Square - Dudley St at Warren St,0
257,Oak Square - 615 Washington St,0
264,Savin Hill T Stop - S Sydney St at Bay St,0


Unnamed: 0,Nodes,Community
2,Harvard St at Greene-Rose Heritage Park,1
4,Alewife Station at Russell Field,1
5,Kendall Street,1
7,175 N Harvard St,1
10,Harvard Law School at Mass Ave / Jarvis St,1
...,...,...
259,Porter Square Station,1
262,Verizon Innovation Hub 10 Ware Street,1
263,Powder House Circle - Nathan Tufts Park,1
266,Mugar Way at Beacon St,1


Unnamed: 0,Nodes,Community
0,Bennington St at Byron St,2
37,18 Dorrance Warehouse,2
68,The Eddy - New St at Sumner St,2
104,Airport T Stop - Bremen St at Brooks St,2
117,Maverick Square - Lewis Mall,2
137,Chelsea St at Saratoga St,2
144,East Boston Neighborhood Health Center - 20 Ma...,2
181,Piers Park,2
212,Boston East - 126 Border St,2
234,Glendon St at Condor St,2


Unnamed: 0,Nodes,Community
21,Clarendon St at Commonwealth Ave,3
106,Faneuil St at Arlington St,3


Unnamed: 0,Nodes,Community
58,BCBS Hingham,4


Unnamed: 0,Nodes,Community
261,BCBS Quincy,5


## Visualization

These are the resulting dataframes obtained throught all the previously done measures.

In [21]:
display(NY_nodes)
display(CHI_nodes)
display(BOS_nodes)

Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub,betweenness,Community
0,1 Ave & E 16 St,743,370,373,0.001825,0.001800,0.001787,0
1,Madison St & Montgomery St,436,214,222,0.001141,0.001083,0.000210,2
2,Willoughby Ave & Wyckoff Ave,224,115,109,0.000707,0.000759,0.000281,1
3,E 106 St & Madison Ave,300,146,154,0.000919,0.000844,0.000178,0
4,44 Dr & 21 St,407,202,205,0.001238,0.001196,0.001591,0
...,...,...,...,...,...,...,...,...
786,W 52 St & 5 Ave,765,368,397,0.001968,0.001756,0.001826,0
787,Dean St & Hoyt St,459,248,211,0.001152,0.001370,0.000666,1
788,E 74 St & 1 Ave,629,318,311,0.001612,0.001566,0.001321,0
789,Columbia St & Kane St,311,167,144,0.000838,0.001020,0.000281,1


Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub,betweenness,Community
0,Kedzie Ave & Milwaukee Ave,290,148,142,0.002847,0.003046,0.008479,1
1,Morgan St & Polk St,205,102,103,0.002224,0.002133,0.002028,0
2,Drake Ave & Addison St,47,24,23,0.000705,0.000925,0.000192,1
3,California Ave & 21st St,35,19,16,0.000732,0.001020,0.000952,0
4,Fort Dearborn Dr & 31st St,147,72,75,0.002286,0.002157,0.007424,0
...,...,...,...,...,...,...,...,...
568,Wood St & 35th St,29,16,13,0.000609,0.000774,0.000178,0
569,Commercial Ave & 83rd St,1,1,0,0.000265,0.000323,0.000000,0
570,Humboldt Blvd & Armitage Ave,178,100,78,0.001611,0.002115,0.002470,1
571,Chicago Ave & Sheridan Rd,49,26,23,0.000955,0.001175,0.000779,1


Unnamed: 0,Nodes,degree,in degree,out degree,PageRank authority,PageRank hub,betweenness,Community
0,Bennington St at Byron St,15,8,7,0.001990,0.002947,0.000037,2
1,Tremont St at Northampton St,207,109,98,0.004066,0.004228,0.002589,0
2,Harvard St at Greene-Rose Heritage Park,264,134,130,0.004943,0.004988,0.002225,1
3,Franklin Park Zoo - Franklin Park Rd at Blue H...,42,19,23,0.001879,0.001456,0.000664,0
4,Alewife Station at Russell Field,108,58,50,0.002271,0.002517,0.000425,1
...,...,...,...,...,...,...,...,...
263,Powder House Circle - Nathan Tufts Park,122,64,58,0.002564,0.002734,0.000668,1
264,Savin Hill T Stop - S Sydney St at Bay St,65,31,34,0.001768,0.001967,0.001035,0
265,Mattapan T Stop,26,15,11,0.001488,0.001743,0.000894,0
266,Mugar Way at Beacon St,336,169,167,0.006302,0.006252,0.006699,1


### Geographic plots

First of all latitude and longitude data have to be incorporated in the dataframes.

In [22]:
print(np.array(list(set.union(set(NY_df.loc[:, 'start station name']), set(NY_df.loc[:, 'end station name'])))))

['1 Ave & E 16 St' 'Madison St & Montgomery St'
 'Willoughby Ave & Wyckoff Ave' 'E 106 St & Madison Ave' '44 Dr & 21 St'
 'W 4 St & 7 Ave S' 'E 85 St & York Ave' 'St James Pl & Oliver St'
 'Carroll St & Smith St' 'W 26 St & 8 Ave' 'Amsterdam Ave & W 82 St'
 '6 St & 7 Ave' 'E 17 St & Broadway' 'Riverside Blvd & W 67 St'
 'E 41 St & Madison Ave' 'Leonard St & Nassau Ave' 'Johnson St & Gold St'
 'Nassau Ave & Newell St' '1 Ave & E 44 St' 'Coffey St & Conover St'
 'Columbia St & W 9 St' 'Murray St & West St' 'Cathedral Pkwy & Broadway'
 'Central Park West & W 100 St' 'Cadman Plaza E & Johnson St'
 'Willoughby Ave & Hall St' 'Fulton St & Adams St'
 'Albany Ave & Fulton St' 'Eckford St & Engert Ave'
 'W 120 St & Claremont Ave' 'E 102 St & Park Ave' 'Vernon Blvd & 10 St'
 'W 104 St & Amsterdam Ave' '4 Ave & 9 St' 'Butler St & Court St'
 'Lispenard St & Broadway' 'Devoe St & Lorimer St'
 'Riverside Dr & W 91 St' 'Kent Ave & N 7 St' 'NYCBS DEPOT - DELANCEY'
 'Powers St & Olive St' '1 Ave & E 68

In [23]:
def lat_long(df, n_df):
    temp = df[['start station name', 'start station latitude', 'start station longitude']].copy()
    temp.set_axis(['end station name', 'end station latitude', 'end station longitude'], axis=1, inplace=True)
    temp = temp.append(df[['end station name', 'end station latitude', 'end station longitude']])
    temp.drop_duplicates(inplace=True, ignore_index=True)
    cols = list(n_df.columns)
    n_df = pd.merge(left=n_df, right=temp, how='left', left_on='Nodes', right_on='end station name')
    n_df.rename(columns={'end station latitude' : 'latitude', 'end station longitude' : 'longitude'}, inplace = True)
    return n_df

NY_nodes = lat_long(NY_df, NY_nodes)
CHI_nodes = lat_long(CHI_df, CHI_nodes)
BOS_nodes = lat_long(BOS_df, BOS_nodes)

The best way to visualize the given results is to see the position of the stations using sizes and colors to underline their different behaviours.

In [24]:
import geopandas as gpd

#### Interactive geoplot

In [25]:
import folium
import geopandas as gpd

# reading geojson file containing districts geographic data
NY_geo = gpd.read_file('data/ny_districts.geojson')
NY_geo

Unnamed: 0,BoroCD,Shape_Leng,Shape_Area,geometry
0,311,51566.991644,1.031759e+08,"POLYGON ((-73.97299 40.60881, -73.97259 40.606..."
1,404,36821.131610,6.563509e+07,"POLYGON ((-73.84751 40.73901, -73.84801 40.738..."
2,203,33489.439334,4.481448e+07,"POLYGON ((-73.88072 40.83752, -73.88074 40.837..."
3,308,38211.697490,4.560542e+07,"POLYGON ((-73.95829 40.67983, -73.95596 40.679..."
4,106,40626.734083,3.870487e+07,"MULTIPOLYGON (((-73.96421 40.74660, -73.96427 ..."
...,...,...,...,...
66,208,47853.661513,9.204531e+07,"POLYGON ((-73.89663 40.91142, -73.89663 40.911..."
67,109,34956.478384,4.189291e+07,"POLYGON ((-73.94017 40.83038, -73.93963 40.830..."
68,110,35839.110710,3.907864e+07,"POLYGON ((-73.93445 40.83598, -73.93456 40.835..."
69,407,139836.497120,3.284308e+08,"POLYGON ((-73.77061 40.78208, -73.77076 40.782..."


In [111]:
def geo_plot(df, value, fr, c):
    # creating map
    m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()],
                   tiles = 'cartodbpositron', zoom_start=12)
                   # tiles="OpenStreetMap", zoom_start=12)
    
    # inserting stations data
    for i in range(0,len(df)):
        station = df.iloc[i].copy()
        folium.Circle(
            location=[station['latitude'], station['longitude']],
            popup=station['Nodes'],
            tooltip=station['Nodes'] + ', ' + str(station[value]),
            radius=float(station[value])/fr,
            color=c,
            fill=True,
            fill_color=c
            ).add_to(m)
    
    return m

#### Geographic plots based on degree

In [113]:
geo_plot(NY_nodes, 'degree', 5, 'blue')

This visualization can be improved by plotting only a fraction of the data-points

In [114]:
geo_plot(NY_nodes.sort_values('degree', ascending=False).head(math.ceil(len(NY_nodes)/3)), 'degree', 6, 'blue')

In [115]:
geo_plot(CHI_nodes, 'degree', 2, 'crimson')

In [116]:
geo_plot(BOS_nodes, 'degree', 2, 'green')

#### Geographic plots based on PageRank