In [1]:
import sys
import json
from pathlib import Path
from dateutil import parser
from math import pi

import requests
from shapely.geometry import shape, Point
import geopandas as gpd
import pandas as pd
import numpy as np


DIR = Path('..')
sys.path.append(str(DIR))

import gtfstk as gt
import pyotp as otp
import arcgdfconvertor as agc
import gtfsanalyst as ga


DATA_DIR = DIR/'data/'
OUT_DIR = DIR/'output/'


%load_ext autoreload
%autoreload 2

In [22]:
# setting the analysis date and time
dt = parser.parse('11/Jan/2018 8:00:00 AM')
city = 'Vancouver'
od_points_path = str(DATA_DIR/'Raw_data.gdb'/'{0}_Hex_Point'.format(city))
feed_path = str(DATA_DIR/'gtfs feeds'/'{0}_GTFS.zip'.format(city))
od_csv_path = str(DATA_DIR/'OD results'/'{0}_detailed_od.csv'.format(city))

# OD matrix

In [23]:
#Read file
od = pd.read_csv(od_csv_path)
od = od.drop(od.columns.values[0], axis =1)
od.head()

Unnamed: 0,trip_name,leg_id,mode,from,from_name,to,to_name,route_id,trip_id,distance,duration,startTime,endTime,waitTime,geometry
0,from 1396 to 1384,0,WALK,POINT (-122.770926197 49.27512711800006),Origin,POINT (-122.775046 49.274163),1:3867,,,425.143,329.0,1515686659000,1515686988000,1000.0,"LINESTRING (-12.277108 4.927512, -12.277109 4...."
1,from 1396 to 1384,1,BUS,POINT (-122.775046 49.274163),3867,POINT (-122.799011 49.274659),1:3195,1:32373,9280306.0,2447.870792,431.0,1515686989000,1515687420000,0.0,"LINESTRING (-12.277495 4.927413, -12.277507 4...."
2,from 1396 to 1384,2,WALK,POINT (-122.799011 49.274659),3195,POINT (-122.800656 49.274744),1:12234,,,420.911,319.0,1515687420000,1515687739000,161000.0,"LINESTRING (-12.2799 4.927465, -12.279902 4.92..."
3,from 1396 to 1384,3,RAIL,POINT (-122.800656 49.274744),12234,POINT (-123.079049 49.265873),1:10375,1:32499,9302512.0,22280.696146,1980.0,1515687900000,1515689880000,0.0,"LINESTRING (-12.280063 4.927479, -12.28019 4.9..."
4,from 1396 to 1384,4,WALK,POINT (-123.079049 49.265873),10375,POINT (-123.079463 49.265482),1:10926,,,202.098,154.0,1515689880000,1515690034000,326000.0,"LINESTRING (-12.307905 4.926587, -12.307843 4...."


In [24]:
od_matrix = od.copy()

# Calculate awt

In [25]:
# Load a feed
feed = gt.read_gtfs(feed_path, dist_units='km')

#load od points
in_gdf = agc.fc_to_gdf(od_points_path)
    
#calculate travel times
pt_edges = ga.pt_edges(
    ga.validate_feed(feed, '20180111'),
    max_cuf_off = 2 * 3600, 
    analysis_start = ga.text2sec('07:00:00'),
    analysis_end = ga.text2sec('09:00:00'), 
    convert_to_gpd = False)
pt_edges.head(2).T

od_matrix = od_matrix.merge(
    pt_edges[['trip_id', 'o_stop', 'awt']], 
    left_on = ['trip_id', 'from_name'],
    right_on = ['trip_id', 'o_stop'],
    how='left').drop('o_stop', axis = 1)

#convert durations into minutes
od_matrix['duration'] = od_matrix['duration']/60
od_matrix['waitTime'] = od_matrix['waitTime']/1000/60
od_matrix['awt'] = od_matrix['awt']/60

#make a summary table
od_summary = pd.pivot_table(
    od_matrix, 
    index=['trip_name'], 
    values=['duration', 'distance', 'waitTime', 'awt'],
    columns=['mode'], 
    aggfunc=[np.sum],
    fill_value=0).reset_index()
od_summary['from']= od_summary.trip_name.apply(lambda x: x.split()[1])
od_summary['to']= od_summary.trip_name.apply(lambda x: x.split()[3])
od_summary['invehicle_time']= (od_summary[('sum', 'duration', 'BUS')]+
                                od_summary[('sum', 'duration', 'FERRY')]+
                                od_summary[('sum', 'duration', 'RAIL')])

od_summary['walk_time']= (od_summary[('sum', 'duration', 'WALK')])

od_summary['waitTime']= (od_summary[('sum', 'waitTime', 'BUS')]+
                         od_summary[('sum', 'waitTime', 'FERRY')]+
                         od_summary[('sum', 'waitTime', 'RAIL')])

od_summary['awt']= (od_summary[('sum', 'awt', 'BUS')]+
                         od_summary[('sum', 'awt', 'FERRY')]+
                         od_summary[('sum', 'awt', 'RAIL')])

od_summary['travel_time']= (od_summary['walk_time']+
                            od_summary['invehicle_time']+
                            od_summary['waitTime'])


pow_df = in_gdf.copy()
pow_df.columns = pd.MultiIndex.from_product([['sum'], [''], pow_df.columns])
od_summary = od_summary.merge(
    pow_df[[('sum', '', 'Unique_ID'), ('sum', '', 'P_O_W')]], 
    left_on = ['to'],
    right_on = [('sum', '','Unique_ID')],
    how='left').drop(('sum', '','Unique_ID'), axis = 1)

od_summary.head()

Unnamed: 0_level_0,trip_name,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,from,to,invehicle_time,walk_time,waitTime,awt,travel_time,sum
Unnamed: 0_level_1,Unnamed: 1_level_1,awt,awt,awt,awt,distance,distance,distance,distance,duration,...,waitTime,waitTime,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
mode,Unnamed: 1_level_2,BUS,FERRY,RAIL,WALK,BUS,FERRY,RAIL,WALK,BUS,...,RAIL,WALK,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,P_O_W
0,from 1001 to 1002,0,0,0,0,4469.154894,0.0,0.0,635.468,6.366667,...,0.0,0.016667,1001,1002,6.366667,8.25,0.016667,0,14.633333,4607.948958
1,from 1001 to 1003,0,0,0,0,32663.7855,0.0,0.0,327.763,76.633333,...,0.0,0.016667,1001,1003,76.633333,4.2,75.233333,0,156.066667,3276.004753
2,from 1001 to 1004,0,0,0,0,15962.099872,0.0,0.0,1014.383,37.033333,...,0.0,108.483333,1001,1004,37.033333,12.833333,0.016667,0,49.883333,1530.963181
3,from 1001 to 1006,0,0,0,0,25941.482705,0.0,0.0,702.926,53.633333,...,0.0,0.016667,1001,1006,53.633333,9.016667,143.75,0,206.4,2795.636396
4,from 1001 to 1007,0,0,0,0,24711.07841,0.0,0.0,1228.988,47.416667,...,0.0,65.05,1001,1007,47.416667,16.033333,0.016667,0,63.466667,1854.79774


In [26]:
od_summary.head(2).T

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0,1
Unnamed: 0_level_1,Unnamed: 1_level_1,mode,Unnamed: 3_level_1,Unnamed: 4_level_1
trip_name,,,from 1001 to 1002,from 1001 to 1003
sum,awt,BUS,0,0
sum,awt,FERRY,0,0
sum,awt,RAIL,0,0
sum,awt,WALK,0,0
sum,distance,BUS,4469.15,32663.8
sum,distance,FERRY,0,0
sum,distance,RAIL,0,0
sum,distance,WALK,635.468,327.763
sum,duration,BUS,6.36667,76.6333


In [27]:
for i in range(5, 65, 5):
    f_name = 'dist_decay_{0}'.format(i)
    od_summary[f_name] = od_summary['travel_time'].apply(lambda x: 1/(1+math.exp(0.2*(x-i))) )*od_summary[('sum', '', 'P_O_W')]

od_summary.head().T

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0,1,2,3,4
Unnamed: 0_level_1,Unnamed: 1_level_1,mode,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
trip_name,,,from 1001 to 1002,from 1001 to 1003,from 1001 to 1004,from 1001 to 1006,from 1001 to 1007
sum,awt,BUS,0,0,0,0,0
sum,awt,FERRY,0,0,0,0,0
sum,awt,RAIL,0,0,0,0,0
sum,awt,WALK,0,0,0,0,0
sum,distance,BUS,4469.15,32663.8,15962.1,25941.5,24711.1
sum,distance,FERRY,0,0,0,0,0
sum,distance,RAIL,0,0,0,0,0
sum,distance,WALK,635.468,327.763,1014.38,702.926,1228.99
sum,duration,BUS,6.36667,76.6333,37.0333,53.6333,47.4167


In [28]:
f_names = dict()
for i in range(5, 65, 5):
    f_names['dist_decay_{0}'.format(i)] = 'sum'
    
Hex_dist_decay_summary = od_summary.groupby('from', as_index=False).agg(f_names)
Hex_dist_decay_summary.head()

  return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)


Unnamed: 0,from,dist_decay_5,dist_decay_10,dist_decay_15,dist_decay_20,dist_decay_25,dist_decay_30,dist_decay_35,dist_decay_40,dist_decay_45,dist_decay_50,dist_decay_55,dist_decay_60
0,1001,2493.001641,6305.276656,14912.227927,32541.842758,64943.309285,115078.471948,176035.927506,235613.663946,287804.599623,334837.879684,380540.089688,423558.171142
1,1002,4709.543747,11401.998281,25673.50132,52862.410833,98239.921699,162144.267187,234590.592468,303289.221552,364739.105864,422170.926808,480533.758416,540987.969984
2,1003,3153.269391,8110.413074,19555.297491,42855.025084,84295.916551,147102.00137,223761.406416,299745.710562,367394.934642,427425.521869,484990.282574,543216.312267
3,1004,496.374653,1277.962098,3095.68625,6900.359513,14145.490259,26808.608241,47597.982652,81018.984702,133367.837168,208857.595138,302301.489777,400323.797203
4,1006,955.719613,2124.564882,4151.434028,7411.795217,12929.060628,22765.004869,39674.588322,66963.055537,110503.251777,177450.462683,266073.883716,362755.128257


In [29]:
Hex_dist_decay_summary.to_csv(str(OUT_DIR/'{0}_CRL_Hex_dist_decay_summary.csv'.format(city)))