In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

plt.rcParams['figure.dpi'] = 450

In [2]:
# data_dir = 'D:\\RST\\notebook\\networks\\'
# root_dir = 'D:\\RST\\notebook\\'

In [3]:
root_dir = '/Volumes/Data2/RST/notebook/'
data_dir = '/Volumes/Data2/RST/notebook/networks/'

In [4]:
tts = pd.read_csv(root_dir + 'TTS OD/TTS_TYPE.csv')
tts = tts.rename(columns = {'origin':'o_node', 'destination':'d_node'})

In [5]:
df_list = []
for period in ['EM', 'AM', 'MD', 'PM', 'EV']:
    
    for weight_type in ['raw', 'lico', 'c0', 'imm', 'rac', 'bl', 'lep']:

        bc_avg = pd.read_csv(data_dir + period + '/' + 'BC-' + period + '_edge-avg.csv' )

        bc_avg['o_node'] = bc_avg['o_node'] - 1000
        bc_avg['d_node'] = bc_avg['d_node'] - 1000

        period_tts = tts[tts['type'] == weight_type + '_' + period.lower()]

        stop_times_od = pd.read_csv(root_dir + 'networks/' + period + '-TE-16-8.csv')
        N = len((stop_times_od['INT_ID_o'].append(stop_times_od['INT_ID_d'])).drop_duplicates())

        bc_all = bc_avg.merge(period_tts)
        bc_od = bc_all.copy()
        bc_all['weighted'] = (bc_all['bc_single'] * bc_all['total'])
        bc_all = bc_all.groupby(['INT_ID_o','INT_ID_d', 'route']).agg({'weighted':'sum'}).reset_index()
        bc_all['weighted'] = bc_all['weighted']/((N - 1) * (N - 2) * period_tts['total'].sum())
        bc_all['type'] = weight_type + '_' + period.lower()
        
        df_list.append(bc_all)


In [6]:
bc_type = pd.concat(df_list)

In [7]:
bc_type

Unnamed: 0,INT_ID_o,INT_ID_d,route,weighted,type
0,100,101,1.0,5.655994e-10,raw_em
1,100,151,1.0,7.530298e-10,raw_em
2,100,13467909,72.0,3.962498e-11,raw_em
3,100,13467909,317.0,2.535999e-11,raw_em
4,100,13467909,320.0,8.929864e-11,raw_em
...,...,...,...,...,...
7737,30112870,13455296,51.0,1.355256e-11,lep_ev
7738,30112870,13455296,54.0,1.694013e-11,lep_ev
7739,30113231,13446900,167.0,1.644184e-11,lep_ev
7740,30113634,13446075,85.0,8.386178e-11,lep_ev


In [8]:
bc_type[bc_type['type'] == 'raw_am']

Unnamed: 0,INT_ID_o,INT_ID_d,route,weighted,type
0,100,101,1.0,1.546427e-09,raw_am
1,100,151,1.0,1.551585e-09,raw_am
2,100,13467163,6.0,3.625105e-11,raw_am
3,100,13467239,121.0,9.870576e-11,raw_am
4,100,13467593,121.0,4.196941e-11,raw_am
...,...,...,...,...,...
9630,30112870,13455296,54.0,1.053335e-10,raw_am
9631,30113231,13446581,24.0,1.826229e-11,raw_am
9632,30113231,13446900,24.0,2.537128e-11,raw_am
9633,30113634,13446075,85.0,2.297380e-10,raw_am


In [9]:
inner_to_stn = [100,101,102,103,104,105,106,107,108,109,110,111,112,
                151,152,153,154,155,156,157,158,159,
               203,204,205,206,207,208,209,210,211,212,213,214,215,
                216,217,218,219,220,221,222,223,224]

streetcar_routes = [501,502,503,504,505,506,509,510,511,512,514]

frequent_routes = [7,22,24,25,29,32,34,35,36,39,43,44,47,
                   52,53,54,60,63,69,70,72,76,84,85,86,87,89,94,100,102,
                  116,129,165]

outer_to_stn = pd.read_csv(root_dir + 'GIS/stations.csv')
outer_to_stn = list(outer_to_stn[~outer_to_stn['INT_ID'].isin(inner_to_stn)]['INT_ID'].drop_duplicates())

In [10]:
stop_times = pd.read_csv(root_dir + 'GTFS/TTC_2016-10-03/stop_times_reduced.csv')
stop_trips = stop_times.merge(pd.read_csv(root_dir + 'GTFS/TTC_2016-10-03/trips_weekday.csv'))

In [11]:
od_df_list = []
for period in ['EM','AM', 'MD', 'PM', 'EV']:
    od_df_list.append(pd.read_csv(data_dir + period + '-TE-16-8.csv'))
    
stop_times_od = pd.concat(od_df_list)

In [12]:
edge_route = stop_times_od[['INT_ID_o', 'INT_ID_d', 'route_short_name']].drop_duplicates()
edge_route = edge_route.rename(columns = {'route_short_name':'route'})

In [13]:
inner_edges = edge_route[(edge_route['INT_ID_o'].isin(inner_to_stn)) & 
                         (edge_route['INT_ID_d'].isin(inner_to_stn)) & (edge_route['route'].isin([1,2]))][['INT_ID_o', 'INT_ID_d','route']].drop_duplicates()
inner_edges['edge_type'] = 'Inner Subway Station'

In [14]:
outer_edges = edge_route[((edge_route['INT_ID_o'].isin(outer_to_stn)) | 
                         (edge_route['INT_ID_d'].isin(outer_to_stn))) &  (edge_route['route'].isin([1,2,3,4]))][['INT_ID_o', 'INT_ID_d', 'route']].drop_duplicates()
outer_edges = outer_edges[(outer_edges['INT_ID_o'] < 1000) & 
                         (outer_edges['INT_ID_d']< 1000)]
outer_edges['edge_type'] = 'Outer Subway Station'

In [15]:
restricted_edges = outer_edges.append(inner_edges)

In [16]:
streetcar_edges = edge_route[edge_route['route'].isin(streetcar_routes)][['INT_ID_o', 'INT_ID_d', 'route']].drop_duplicates()

In [17]:
streetcar_edges['edge_type'] = 'Streetcar Route'
restricted_edges = restricted_edges.append(streetcar_edges)

In [18]:
frequent_edges = edge_route[edge_route['route'].isin(frequent_routes)]
frequent_edges = frequent_edges[~frequent_edges['route'].isin(streetcar_routes)]
frequent_edges = frequent_edges[~frequent_edges['route'].isin([1,2,3,4])][['INT_ID_o', 'INT_ID_d', 'route']].drop_duplicates()

In [19]:
frequent_edges['edge_type'] = 'Frequent Bus Route'

restricted_edges = restricted_edges.append(frequent_edges)

In [20]:
local_edges = edge_route[~((edge_route['INT_ID_o'].isin(restricted_edges['INT_ID_o'])) & 
                         (edge_route['INT_ID_d'].isin(restricted_edges['INT_ID_d'])) &
           (edge_route['route'].isin(restricted_edges['route'])))][['INT_ID_o', 'INT_ID_d', 'route']].drop_duplicates()

local_edges['edge_type'] = 'Bus Route'

In [21]:
edge_directory = inner_edges.append(outer_edges).append(streetcar_edges).append(frequent_edges).append(local_edges)

In [22]:
edge_directory

Unnamed: 0,INT_ID_o,INT_ID_d,route,edge_type
7691,112.0,111.0,1,Inner Subway Station
7692,111.0,110.0,1,Inner Subway Station
7693,110.0,109.0,1,Inner Subway Station
7694,109.0,108.0,1,Inner Subway Station
7695,108.0,107.0,1,Inner Subway Station
...,...,...,...,...
122717,13444398.0,13444486.0,395,Bus Route
122718,13444486.0,13444592.0,395,Bus Route
122719,13444592.0,13444613.0,395,Bus Route
122720,13444613.0,13444687.0,395,Bus Route


In [23]:
edge_directory = edge_directory.drop_duplicates(subset = ['INT_ID_o', 'INT_ID_d', 'route'], keep = 'first')
edge_directory

Unnamed: 0,INT_ID_o,INT_ID_d,route,edge_type
7691,112.0,111.0,1,Inner Subway Station
7692,111.0,110.0,1,Inner Subway Station
7693,110.0,109.0,1,Inner Subway Station
7694,109.0,108.0,1,Inner Subway Station
7695,108.0,107.0,1,Inner Subway Station
...,...,...,...,...
122717,13444398.0,13444486.0,395,Bus Route
122718,13444486.0,13444592.0,395,Bus Route
122719,13444592.0,13444613.0,395,Bus Route
122720,13444613.0,13444687.0,395,Bus Route


In [24]:
bc_mode = bc_type.merge(edge_directory)

In [25]:
bc_mode[bc_mode['type'] == 'raw_am'].groupby('edge_type').count()

Unnamed: 0_level_0,INT_ID_o,INT_ID_d,route,weighted,type
edge_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bus Route,5934,5934,5934,5934,5934
Frequent Bus Route,2769,2769,2769,2769,2769
Inner Subway Station,88,88,88,88,88
Outer Subway Station,52,52,52,52,52
Streetcar Route,792,792,792,792,792


In [26]:
bc_mode.to_csv('bc_mode_edge.csv', index = False)

In [27]:
bc_mode_am = bc_mode[bc_mode['type'] == 'raw_am']
bc_type_am = bc_type[bc_type['type'] == 'raw_am']

In [28]:
bc_mode_am

Unnamed: 0,INT_ID_o,INT_ID_d,route,weighted,type,edge_type
7,100,101,1.0,1.546427e-09,raw_am,Inner Subway Station
42,100,151,1.0,1.551585e-09,raw_am,Inner Subway Station
76,100,13467909,72.0,8.210862e-11,raw_am,Frequent Bus Route
121,100,13467909,509.0,1.324850e-10,raw_am,Streetcar Route
155,101,100,1.0,3.581136e-09,raw_am,Inner Subway Station
...,...,...,...,...,...,...
262191,30112870,13453421,54.0,4.819033e-11,raw_am,Frequent Bus Route
262219,30112870,13455296,51.0,2.380289e-11,raw_am,Bus Route
262246,30112870,13455296,54.0,1.053335e-10,raw_am,Frequent Bus Route
262273,30113231,13446581,24.0,1.826229e-11,raw_am,Frequent Bus Route


In [29]:
bc_mode_am['key'] = True
bc_type_am['key'] = True

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bc_mode_am['key'] = True
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bc_type_am['key'] = True


In [30]:
test = bc_type_am.merge(bc_mode_am, how = 'left')
test = test[test['edge_type'].isna()]

In [31]:
test.sort_values(by = 'weighted').groupby('route').count()

Unnamed: 0_level_0,INT_ID_o,INT_ID_d,weighted,type,key,edge_type
route,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


In [32]:
restricted_edges

Unnamed: 0,INT_ID_o,INT_ID_d,route,edge_type
7687,116.0,115.0,1,Outer Subway Station
7688,115.0,114.0,1,Outer Subway Station
7689,114.0,113.0,1,Outer Subway Station
7690,113.0,112.0,1,Outer Subway Station
7712,159.0,160.0,1,Outer Subway Station
...,...,...,...,...
361110,13454288.0,13454774.0,70,Frequent Bus Route
361111,13454774.0,13454909.0,70,Frequent Bus Route
387997,13452251.0,13453059.0,84,Frequent Bus Route
31295,228.0,13452869.0,116,Frequent Bus Route


In [33]:
edge_route[(~edge_route['INT_ID_o'].isin(restricted_edges['INT_ID_o'])) & 
                         (~edge_route['INT_ID_d'].isin(restricted_edges['INT_ID_d']))][['INT_ID_o', 'INT_ID_d', 'route']].drop_duplicates()

Unnamed: 0,INT_ID_o,INT_ID_d,route
15157,20058480.0,13447680.0,196
15158,13447680.0,20015401.0,196
15159,20015401.0,13448413.0,196
15160,13448413.0,20015388.0,196
15161,20015388.0,13448065.0,196
...,...,...,...
373835,13461129.0,13460752.0,75
373836,13460752.0,13460415.0,75
373837,13460415.0,13460540.0,75
373838,13460540.0,13460816.0,75


In [34]:
edge_route[~((edge_route['INT_ID_o'].isin(restricted_edges['INT_ID_o'])) & 
                         (edge_route['INT_ID_d'].isin(restricted_edges['INT_ID_d'])) &
           (edge_route['route'].isin(restricted_edges['route'])))][['INT_ID_o', 'INT_ID_d', 'route']].drop_duplicates()

Unnamed: 0,INT_ID_o,INT_ID_d,route
15157,20058480.0,13447680.0,196
15158,13447680.0,20015401.0,196
15159,20015401.0,13448413.0,196
15160,13448413.0,20015388.0,196
15161,20015388.0,13448065.0,196
...,...,...,...
122717,13444398.0,13444486.0,395
122718,13444486.0,13444592.0,395
122719,13444592.0,13444613.0,395
122720,13444613.0,13444687.0,395
