In [1]:
import networkx as nx
import pandas as pd
import numpy as np
from scipy import stats

In [2]:
simplify_dir = 'GTFS/stop_snapping/GIS Simplify/'
gtfs_dir = 'GTFS/TTC_2016-10-03/'

In [3]:
period_list = [['EM', 4], ['AM', 7], ['MD', 11], ['PM', 17], ['EV', 20]]

In [10]:
def connectivity(hr, period):
    
    
    G = nx.read_gexf('networks/' + period + '-RM-16-singleDi.gexf')
    graph_data = pd.read_csv('networks/' + period + '-RM-16-singleDi.csv')
    
    degree_df = pd.DataFrame([[key, value] for key, value in dict(G.degree(weight = 'freq')).items()], columns = ['node', 'degree'])

    stop_times_od = pd.read_csv(gtfs_dir + 'stop_times_full.csv')


    stop_times_od = stop_times_od[stop_times_od['hr_o'] == hr]


    graph_data = stop_times_od[['trip_id', 'INT_ID_o', 'INT_ID_d','route_short_name', 'direction_id', 'cost']]


    route_dir_int = graph_data[['INT_ID_o', 'route_short_name', 'direction_id']].drop_duplicates()
    route_dir_int = route_dir_int.rename(columns = {'INT_ID_o':'node'})


    route_dir_int = route_dir_int.append(graph_data[['INT_ID_d', 'route_short_name', 'direction_id']].drop_duplicates().rename(
        columns = {'INT_ID_d':'node'})).drop_duplicates()

    route_dir = route_dir_int
    route_dir_int = route_dir_int.groupby('node').count().reset_index().rename(columns = {'route_short_name':'route-direction'})[['node', 'route-direction']]
    degree_df['node'] = degree_df['node'].astype(int)
    connect_df = degree_df.merge(route_dir_int)
    connect_df['route-direction'] = (connect_df['route-direction'] - 2)/2
    connect_df['degree'] = connect_df['degree']/2

    connect_df['route-direction'] = np.where(connect_df['route-direction'] < 0, 0, connect_df['route-direction'])
    connect_df['transfer-poss'] = connect_df['route-direction'] * connect_df['degree']

    int_stops = pd.read_csv(simplify_dir + 'int_stop.csv')
    connect_df = connect_df.merge(int_stops, left_on = ['node'], right_on = ['INT_ID']
                                 )[['INT_ID', 'degree', 'route-direction', 'transfer-poss', 'int_lon', 'int_lat']]

    connect_df.to_csv('networks/' + period + '/' + period + '_transfer-poss.csv', index = False)

    ward_int = pd.read_csv('GIS/int_wards.csv')

    multiple_edges = graph_data[['INT_ID_o', 'INT_ID_d', 'route_short_name']].drop_duplicates()

    multiple_freq = graph_data[['INT_ID_o', 'INT_ID_d', 'route_short_name']]
    multiple_freq['freq'] = True
    multiple_freq = multiple_freq.groupby(['INT_ID_o', 'INT_ID_d', 'route_short_name']).count().reset_index()

    table = []

    for i in range(1, 26):

        # filtering wards
        ward_int_list = list(ward_int[ward_int['AREA_SHORT_CODE'] == i]['INT_ID'])
        multiple_edges_ward = multiple_edges[multiple_edges['INT_ID_o'].isin(ward_int_list)]
        multiple_edges_ward = multiple_edges_ward[multiple_edges_ward['INT_ID_d'].isin(ward_int_list)]

        multiple_freq_ward = multiple_freq[multiple_freq['INT_ID_o'].isin(ward_int_list)]
        multiple_freq_ward = multiple_freq_ward[multiple_freq_ward['INT_ID_d'].isin(ward_int_list)]

        multiple_edges_ward = multiple_edges_ward.groupby(['INT_ID_o', 'INT_ID_d']).count()
        multiple_edges_ward = multiple_edges_ward.reset_index()
        multiple_edges_ward['route_short_name'] = multiple_edges_ward['route_short_name']-1
        multiple_edges_ward = multiple_edges_ward.rename(columns = {'route_short_name':'count_routes'})

        multiple_freq_ward = multiple_freq_ward.merge(multiple_edges_ward, how = 'left')
        multiple_freq_ward = multiple_freq_ward[multiple_freq_ward['count_routes']>0]

        # checking for multiple edges
        e_multiple = multiple_freq_ward.sort_values(by = 'freq', ascending = False).drop_duplicates(
            subset = ['INT_ID_o', 'INT_ID_d'], keep = 'first')['freq'].sum()/2

        # calculating number of transfer stations
        transfer_stop = len(connect_df[(connect_df['INT_ID'].isin(ward_int_list)) &
                                       (connect_df['route-direction'] > 0)])

        ward_connect = connect_df[connect_df['INT_ID'].isin(ward_int_list)]

        # other stats measures
        skew = stats.skew(ward_connect['transfer-poss'])
        kurt = stats.kurtosis(ward_connect['transfer-poss'])

        # total transfer possibilities
        tot_transfer = ward_connect['transfer-poss'].sum()


        if transfer_stop > 0:
            connectivity = (tot_transfer - e_multiple)/transfer_stop
        else:
            connectivity = 0

        table.append([i, skew, kurt, tot_transfer, e_multiple, transfer_stop, connectivity])

    connectivity_df = pd.DataFrame(table, columns = ['AREA_SHORT_CODE', 'skew', 'kurtosis', 
                                   'total_transfer_oppurtunities', 'multiple_edges', 
                                   'num_transfer_stops', 'connectivity'])

    connectivity_df = connectivity_df.merge(ward_int[['AREA_NAME', 'AREA_SHORT_CODE']].drop_duplicates())
    connectivity_df = connectivity_df.rename(columns = {'AREA_SHORT_CODE': 'Ward', 'AREA_NAME': 'Ward Name'})
    connectivity_df = connectivity_df[['Ward', 'Ward Name', 'skew', 'kurtosis', 
                                   'total_transfer_oppurtunities', 'multiple_edges', 
                                   'num_transfer_stops', 'connectivity']]

    connectivity_df.to_csv('networks/' + period + '/' + period + '_connectivity.csv', index = False)



In [11]:
for i in period_list:
    connectivity(i[1], i[0])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

In [8]:
    G = nx.read_gexf('networks/' + 'AM' + '-RM-16-singleDi.gexf')



In [9]:
    degree_df = pd.DataFrame([[key, value] for key, value in dict(G.degree(weight = 'freq')).items()], columns = ['node', 'degree'])



In [38]:
degree_df

Unnamed: 0,node,degree
0,100,222
1,101,250
2,102,190
3,103,144
4,104,156
...,...,...
3521,13452472,3
3522,13458342,23
3523,13458080,2
3524,13460221,1


In [39]:
    stop_times_od = pd.read_csv(gtfs_dir + 'stop_times_full.csv')

    stop_times_od = stop_times_od[stop_times_od['hr_o'] == 7]

In [40]:
    graph_data = stop_times_od[['trip_id', 'INT_ID_o', 'INT_ID_d','route_short_name', 'direction_id', 'cost']]

    route_dir_int = graph_data[['INT_ID_o', 'route_short_name', 'direction_id']].drop_duplicates()
    route_dir_int = route_dir_int.rename(columns = {'INT_ID_o':'node'})

    route_dir_int = route_dir_int.append(graph_data[['INT_ID_d', 'route_short_name', 'direction_id']].drop_duplicates().rename(
        columns = {'INT_ID_d':'node'})).drop_duplicates()

In [41]:
route_dir_int

Unnamed: 0,node,route_short_name,direction_id
8584,13464667.0,506,0
8585,13464586.0,506,0
8586,13464490.0,506,0
8587,13464269.0,506,0
8588,13464177.0,506,0
...,...,...,...
1182560,13466500.0,502,1
1185095,13466454.0,514,0
1188249,13468779.0,514,1
1192089,203.0,35,0


In [43]:
    route_dir = route_dir_int
    route_dir_int = route_dir_int.groupby('node').count().reset_index().rename(columns = {'route_short_name':'route-direction'})[['node', 'route-direction']]
    print(len(route_dir_int))

3524
