In [1]:
import networkx as nx
import pandas as pd
import numpy as np
from scipy import stats
import statistics

In [2]:
simplify_dir = '/Volumes/Data2/RST/notebook/GTFS/TTC_2016-10-03/GIS Simplify/'
gtfs_dir = '/Volumes/Data2/RST/notebook/GTFS/TTC_2016-10-03/'

In [3]:
ward_int = pd.read_csv('/Volumes/Data2/RST/notebook/GIS/int_wards.csv')

In [4]:
G = nx.read_gexf('/Volumes/Data2/RST/notebook/networks/AM-RM-16-singleDi.gexf')

In [7]:
df_list = []
cluster_list = []
for period in ['EM', 'AM', 'MD', 'PM', 'EV']:
    G = nx.read_gexf('/Volumes/Data2/RST/notebook/networks/' + period + '-RM-16-singleDi.gexf')
    graph_data = pd.read_csv('/Volumes/Data2/RST/notebook/networks/' + period + '-RM-16-singleDi.csv')

    clustering = nx.algorithms.cluster.clustering(G, weight = 'freq')

    cluster_df = pd.DataFrame([[key, value] for key, value in clustering.items()], columns = ['INT_ID', 'cc'])

    cluster_df['INT_ID'] = cluster_df['INT_ID'].astype(int)

    cluster_df = cluster_df.merge(ward_int)

    neighbor_degree = []
    for node in list(G.nodes()):
        
        # no neighbours
        if len(list(G.neighbors(node))) == 0:
            neighbor_degree.append([int(node),0,0])
            
        # when there are neighbours beyond neighbours of the same route
        elif (G.degree(node) - 4) > 0:
            neighbor_degree.append([int(node), 
                                    statistics.mean([G.degree(n, weight = 'scaled_freq') for n in G.neighbors(node)]),
                                    G.degree(node, weight = 'scaled_freq')])
        else:
            neighbor_degree.append([int(node), 
                                    statistics.mean([G.degree(n, weight = 'scaled_freq') for n in G.neighbors(node)]),
                                   0])

    cluster_df = cluster_df.merge(pd.DataFrame.from_records(neighbor_degree, columns = ['INT_ID', 'neighbor_avg',
                                                                                       'transfer_avg']))
    cluster_int = cluster_df.copy()
    
    cluster_int['period'] = period
    
    cluster_list.append(cluster_int)
    
    avg_cluster = []
    for i  in range(1, 26, 1):
        int_list = list(cluster_df[cluster_df['AREA_SHORT_CODE']==i]['INT_ID'].astype(str))
        G_ward = G.subgraph(int_list).copy()
        avg_cluster.append([i, 2 * G_ward.number_of_edges()/(G_ward.number_of_nodes() ** 2)])

    cluster_ward = cluster_df.groupby('AREA_SHORT_CODE').mean()[['cc', 'neighbor_avg', 'transfer_avg']].reset_index().copy()
    cluster_ward = cluster_ward.merge(pd.DataFrame.from_records(avg_cluster, columns = ['AREA_SHORT_CODE', 'mean_random']))

    cluster_ward['cc_normalized'] = cluster_ward['cc']/cluster_ward['mean_random']

    cluster_ward = cluster_ward.merge(ward_int[['AREA_SHORT_CODE', 'AREA_NAME']].drop_duplicates())

    cluster_ward['period'] = period
    
    df_list.append(cluster_ward)

In [9]:
pd.concat(cluster_list).to_csv('cluster_int.csv', index = False)

In [14]:
cluster = pd.concat(df_list)
cluster

Unnamed: 0,AREA_SHORT_CODE,cc,neighbor_avg,transfer_avg,mean_random,cc_normalized,AREA_NAME,period
0,1,0.000062,0.080221,0.018078,0.026996,0.002291,Etobicoke North,EM
1,2,0.001553,0.073643,0.012987,0.033466,0.046420,Etobicoke Centre,EM
2,3,0.000782,0.123061,0.016829,0.031901,0.024501,Etobicoke-Lakeshore,EM
3,4,0.001906,0.156527,0.044794,0.068371,0.027871,Parkdale-High Park,EM
4,5,0.000914,0.099830,0.021599,0.035918,0.025439,York South-Weston,EM
...,...,...,...,...,...,...,...,...
20,21,0.006500,0.456060,0.163659,0.022407,0.290090,Scarborough Centre,EV
21,22,0.008930,0.473257,0.184480,0.034787,0.256704,Scarborough-Agincourt,EV
22,23,0.004497,0.406297,0.159880,0.026614,0.168988,Scarborough North,EV
23,24,0.009587,0.540616,0.175776,0.035690,0.268618,Scarborough-Guildwood,EV


In [15]:
cluster = cluster.rename(columns = {'AREA_SHORT_CODE':'ward', 'AREA_NAME':'ward_name'})
cluster

Unnamed: 0,ward,cc,neighbor_avg,transfer_avg,mean_random,cc_normalized,ward_name,period
0,1,0.000062,0.080221,0.018078,0.026996,0.002291,Etobicoke North,EM
1,2,0.001553,0.073643,0.012987,0.033466,0.046420,Etobicoke Centre,EM
2,3,0.000782,0.123061,0.016829,0.031901,0.024501,Etobicoke-Lakeshore,EM
3,4,0.001906,0.156527,0.044794,0.068371,0.027871,Parkdale-High Park,EM
4,5,0.000914,0.099830,0.021599,0.035918,0.025439,York South-Weston,EM
...,...,...,...,...,...,...,...,...
20,21,0.006500,0.456060,0.163659,0.022407,0.290090,Scarborough Centre,EV
21,22,0.008930,0.473257,0.184480,0.034787,0.256704,Scarborough-Agincourt,EV
22,23,0.004497,0.406297,0.159880,0.026614,0.168988,Scarborough North,EV
23,24,0.009587,0.540616,0.175776,0.035690,0.268618,Scarborough-Guildwood,EV


In [16]:
cluster.to_csv('cluster.csv', index = False)

In [17]:
cluster[cluster['period'] == 'AM'].sort_values(by = 'cc_normalized', ascending = False)

Unnamed: 0,ward,cc,neighbor_avg,transfer_avg,mean_random,cc_normalized,ward_name,period
14,15,0.011911,0.658062,0.216573,0.022216,0.536146,Don Valley West,AM
5,6,0.010095,0.690109,0.282617,0.02171,0.464987,York Centre,AM
11,12,0.01256,0.843914,0.349942,0.032908,0.381657,Toronto-St. Paul's,AM
23,24,0.013905,0.867824,0.306936,0.037051,0.375302,Scarborough-Guildwood,AM
21,22,0.01324,0.755359,0.305629,0.035359,0.374436,Scarborough-Agincourt,AM
20,21,0.008105,0.761893,0.281808,0.023275,0.348215,Scarborough Centre,AM
16,17,0.011563,0.737736,0.275409,0.034417,0.335955,Don Valley North,AM
9,10,0.012876,1.124662,0.495307,0.040311,0.319419,Spadina-Fort York,AM
2,3,0.00564,0.513436,0.161248,0.018098,0.311608,Etobicoke-Lakeshore,AM
24,25,0.006875,0.513782,0.1549,0.022186,0.309867,Scarborough-Rouge Park,AM


In [18]:
cluster[cluster['period'] == 'AM'].sort_values(by = 'cc', ascending = False)

Unnamed: 0,ward,cc,neighbor_avg,transfer_avg,mean_random,cc_normalized,ward_name,period
23,24,0.013905,0.867824,0.306936,0.037051,0.375302,Scarborough-Guildwood,AM
21,22,0.01324,0.755359,0.305629,0.035359,0.374436,Scarborough-Agincourt,AM
9,10,0.012876,1.124662,0.495307,0.040311,0.319419,Spadina-Fort York,AM
11,12,0.01256,0.843914,0.349942,0.032908,0.381657,Toronto-St. Paul's,AM
14,15,0.011911,0.658062,0.216573,0.022216,0.536146,Don Valley West,AM
16,17,0.011563,0.737736,0.275409,0.034417,0.335955,Don Valley North,AM
5,6,0.010095,0.690109,0.282617,0.02171,0.464987,York Centre,AM
12,13,0.009503,1.051516,0.479504,0.046339,0.205068,Toronto Centre,AM
20,21,0.008105,0.761893,0.281808,0.023275,0.348215,Scarborough Centre,AM
15,16,0.007724,0.705218,0.296891,0.03064,0.252097,Don Valley East,AM


In [19]:
cluster[cluster['period'] == 'AM'].sort_values(by = 'neighbor_avg', ascending = False)

Unnamed: 0,ward,cc,neighbor_avg,transfer_avg,mean_random,cc_normalized,ward_name,period
9,10,0.012876,1.124662,0.495307,0.040311,0.319419,Spadina-Fort York,AM
12,13,0.009503,1.051516,0.479504,0.046339,0.205068,Toronto Centre,AM
23,24,0.013905,0.867824,0.306936,0.037051,0.375302,Scarborough-Guildwood,AM
8,9,0.003567,0.854036,0.33174,0.033852,0.105369,Davenport,AM
11,12,0.01256,0.843914,0.349942,0.032908,0.381657,Toronto-St. Paul's,AM
10,11,0.005672,0.808257,0.389154,0.034844,0.162769,University-Rosedale,AM
13,14,0.006594,0.799614,0.29394,0.02961,0.222707,Toronto-Danforth,AM
20,21,0.008105,0.761893,0.281808,0.023275,0.348215,Scarborough Centre,AM
21,22,0.01324,0.755359,0.305629,0.035359,0.374436,Scarborough-Agincourt,AM
16,17,0.011563,0.737736,0.275409,0.034417,0.335955,Don Valley North,AM


In [20]:
cluster[cluster['period'] == 'AM'].sort_values(by = 'transfer_avg', ascending = False)

Unnamed: 0,ward,cc,neighbor_avg,transfer_avg,mean_random,cc_normalized,ward_name,period
9,10,0.012876,1.124662,0.495307,0.040311,0.319419,Spadina-Fort York,AM
12,13,0.009503,1.051516,0.479504,0.046339,0.205068,Toronto Centre,AM
10,11,0.005672,0.808257,0.389154,0.034844,0.162769,University-Rosedale,AM
11,12,0.01256,0.843914,0.349942,0.032908,0.381657,Toronto-St. Paul's,AM
8,9,0.003567,0.854036,0.33174,0.033852,0.105369,Davenport,AM
23,24,0.013905,0.867824,0.306936,0.037051,0.375302,Scarborough-Guildwood,AM
21,22,0.01324,0.755359,0.305629,0.035359,0.374436,Scarborough-Agincourt,AM
15,16,0.007724,0.705218,0.296891,0.03064,0.252097,Don Valley East,AM
13,14,0.006594,0.799614,0.29394,0.02961,0.222707,Toronto-Danforth,AM
3,4,0.00725,0.702138,0.286741,0.035375,0.204953,Parkdale-High Park,AM
