In [56]:
import sys
import json

general = {'step_size': 0.01, 'use_road_network': True, 'duration': 24 * 60}
catchment_radius = {'bus': '500', 'subway': '800', 'rail': '1000', 'bus#am': 1}
params = {'general': general, 'catchment_radius': catchment_radius}

default = {'training_folder': '../../', 'scenario': 'montreal_periods', 'params': params, 'period': 'am'}
manual, argv = (True, default) if 'ipykernel' in sys.argv[0] else (False, dict(default, **json.loads(sys.argv[1])))
print(argv)


{'training_folder': '../../', 'scenario': 'montreal_periods', 'params': {'general': {'step_size': 0.01, 'use_road_network': True, 'duration': 1440}, 'catchment_radius': {'bus': '500', 'subway': '800', 'rail': '1000', 'bus#am': 1}}, 'period': 'am'}


In [57]:
import os
import geopandas as gpd
import pandas as pd

import numpy as np
from shapely.geometry import Point, LineString

sys.path.insert(0, r'../../../quetzal')  # Add path to quetzal

from quetzal.io.quenedi import read_parameters, restrict_df_to_variant

on_lambda = bool(os.environ.get('AWS_EXECUTION_ENV'))


In [58]:
sys.path.insert(0, r'../../')  # Add path to quetzal
from utils import get_epsg, population_to_mesh, get_acf_distances, get_routing_distances

# Folders stucture and params

In [59]:
period = argv.get('period', '')

everything is on S3 (nothing on ECR) so no direct input folder. just scenarios/{scen}/inputs/

In [60]:
scenario = argv['scenario']
training_folder = argv['training_folder']

# if local. add the path to the scenario scenarios/<scenario>/
local_scen_path = '' if on_lambda else os.path.join('scenarios/', scenario)

input_folder = os.path.join(training_folder, 'inputs/')
scenario_folder = os.path.join(training_folder, local_scen_path, 'inputs/')
model_folder = os.path.join(training_folder, local_scen_path, 'model/')
output_folder = os.path.join(training_folder, local_scen_path, 'outputs/', period)

if not os.path.exists(output_folder):
	os.makedirs(output_folder)

print('input folder: ', scenario_folder)
print('output folder: ', output_folder)

input folder:  ../../scenarios/montreal_periods/inputs/
output folder:  ../../scenarios/montreal_periods/outputs/am


In [61]:
params = read_parameters(argv['params'], period=period)

In [62]:
catchment_radius = params.get('catchment_radius', {})
catchment_radius = {k: float(v) for k, v in catchment_radius.items()}
default_catchment_radius = 500


In [63]:
# step est en degrés (default: 0.005 ~500m)

step_size_min = 0.0005  # (0.0005 ~50m)
step_size = max(float(params['general'].get('step_size')), step_size_min)
use_road_network = params['general'].get('use_road_network')


# inputs

In [64]:
links = gpd.read_file(os.path.join(scenario_folder, 'pt', 'links.geojson'))
nodes = gpd.read_file(os.path.join(scenario_folder, 'pt', 'nodes.geojson'))
links = links.set_index('index')
nodes = nodes.set_index('index')

In [65]:
population_file = os.path.join(scenario_folder, 'population.geojson')
population_file_provided = os.path.isfile(population_file)
if population_file_provided:
	population = gpd.read_file(population_file)
	if 'index' in population.columns:
		population = population.set_index('index')
	else:
		population.index.name = 'index'
	assert 'density' in population.columns, 'need density column. in km2'
	assert population.crs == 4326, 'population.geojson CRS must be EPSG:4326'
print('population?', population_file_provided)

population? True


In [66]:
rnodes_file = os.path.join(scenario_folder, 'road', 'road_nodes.geojson')
rnodes_file_provided = os.path.isfile(rnodes_file)
use_road_network = rnodes_file_provided & use_road_network
if use_road_network:
	rnodes = gpd.read_file(os.path.join(scenario_folder, 'road', 'road_nodes.geojson'))
	rnodes = rnodes.set_index('index')
	rlinks = gpd.read_file(os.path.join(scenario_folder, 'road', 'road_links.geojson'))
	rlinks = rlinks.set_index('index')
print('road network provided?', rnodes_file_provided)
print('use roads?', use_road_network)

road network provided? False
use roads? False


In [67]:
od_file = os.path.join(scenario_folder, 'od', 'od.geojson')
od_file_provided = os.path.isfile(od_file)
if od_file_provided:
	od_test = gpd.read_file(od_file)
	if 'name' not in od_test.columns:
		od_test['name'] = od_test['index']
	od_test['name'] = od_test['name'].fillna(od_test['index'].astype(str))
print('od?', od_file_provided)

od? True


# population preparation

In [68]:
# find meters CRS
centroid = [*LineString(nodes.centroid.values).centroid.coords][0]
crs = get_epsg(centroid[1], centroid[0])
crs

32618

In [69]:
if population_file_provided:
	population['area (km2)'] = population.to_crs(crs).area * 1e-6
	population['area (km2)'].sum()

In [70]:
if population_file_provided:
	population = restrict_df_to_variant(population, period)

keep and rename:  {}
drops:  []


In [71]:
if population_file_provided:
	population['population'] = population['density'] * population['area (km2)']
	print('population', population['population'].sum())

population 4884999.000000021


# population mesh

In [72]:
if not population_file_provided:
	mesh = gpd.GeoDataFrame(
		index=[0], data={'zone': 'centroid', 'population': 0}, geometry=[Point(centroid[0], centroid[1])]
	)
	mesh.index.name = 'index'
	mesh.crs = 4326
	if use_road_network:
		mesh['node_index'] = rnodes.index[0]
elif use_road_network:
	# use rnodes as mesh.
	print('using road_nodes')
	mesh = population_to_mesh(population, mesh=rnodes, step=step_size, col='population', fill_missing='nearest')
else:
	# create a mesh
	# 0.01 = 1km 0.005 = 500m
	mesh = population_to_mesh(population, step=step_size, col='population', fill_missing='centroid')


1863 nodes in multiple zones. will be match to a single zone.
568 unfounded zones
Unfound zones centroid will be added to mesh


In [73]:
mesh.to_file(os.path.join(output_folder, 'population_mesh.geojson'), driver='GeoJSON')

# restrict TC to period

In [74]:
links = restrict_df_to_variant(links, period)
# only keep links with headway not 0
links = links[links['headway'] != 0]

keep and rename:  {'time#am': 'time', 'headway#am': 'headway', 'speed#am': 'speed'}
drops:  ['time#pm', 'headway#pm', 'speed#pm']


# catchment

In [75]:
# find TC nodes to mesh distance

In [76]:
max_dist = max(max(catchment_radius.values()), default_catchment_radius)

In [77]:
if use_road_network:
	print('using road_nodes')
	node_dist = get_routing_distances(nodes, rnodes, rlinks, mesh, 'length', max_dist)
else:
	node_dist = get_acf_distances(nodes, mesh, crs, max_dist)

# metrics

In [78]:
print('num route_id:', len(links['route_id'].unique()))
print('num route_type:', len(links['route_type'].unique()))

num route_id: 190
num route_type: 2


In [79]:
# init results dfs
df_route_id = pd.DataFrame(index=links['route_id'].unique())
df_route_id.index.name = 'route_id'

df_route_type = pd.DataFrame(index=links['route_type'].unique())
df_route_type.index.name = 'route_type'

In [80]:
def get_catchment(col='route_id'):
	# get all nodes with col filter
	link = links.groupby(col)[['a', 'b', 'route_type']].agg({'a': set, 'b': set, 'route_type': 'first'})
	link['node'] = link.apply(lambda row: row['a'].union(row['b']), axis=1)
	link = link.drop(columns=['a', 'b'])
	# add catchment radius for the route_type
	link['catchment_radius'] = link['route_type'].apply(lambda x: catchment_radius.get(x, default_catchment_radius))

	col_exist = col == 'route_type'  # cannot explode if index == route_type (a column)
	link = link.explode('node').reset_index(drop=col_exist)
	link = node_dist.merge(link, left_on='node_index', right_on='node')
	# filter by distance
	link = link[link['distances'] <= link['catchment_radius']]
	# drop duplicated mesh nodes (we count only one time)
	link = link.drop_duplicates(subset=['mesh_index', col], keep='first')

	return link.groupby(col)['population'].sum().to_dict()

In [81]:
res = get_catchment('route_id')

df_route_id['catchment'] = res
df_route_id['catchment'] = df_route_id['catchment'].fillna(0)
print(sum([item for key, item in res.items()]))


807178.000000009


In [82]:
res = get_catchment('route_type')

df_route_type['catchment'] = res
df_route_type['catchment'] = df_route_type['catchment'].fillna(0)

print(sum([item for key, item in res.items()]))

663573.000000003


# frequency

In [83]:
links['frequency'] = 1 / links['headway']

In [84]:
res = (links.groupby('route_id')['frequency'].agg('mean') * 3600).to_dict()

df_route_id['frequency (veh/hours)'] = res
print(np.nansum([item for key, item in res.items()]))

541.1115105236645


In [85]:
res = (links.groupby('route_type')['frequency'].agg('mean') * 3600).to_dict()

df_route_type['frequency (veh/hours)'] = res
print(sum([item for key, item in res.items()]))

14.843999246460045


In [86]:
link = links.groupby(['route_id', 'trip_id'])[['frequency']].agg('mean') * 3600
res = link.reset_index().set_index('route_id')['frequency'].to_dict()
print(np.nansum([item for key, item in res.items()]))

471.29212917397285


In [87]:
link = links.groupby(['route_type', 'trip_id'])[['frequency']].agg('mean') * 3600
res = link.reset_index().set_index('route_type')['frequency'].to_dict()
print(np.nansum([item for key, item in res.items()]))

13.403578247648682


# operational Fleet

In [88]:
def get_fleet(col='route_id'):
	link = links.groupby([col, 'trip_id'])[['time', 'frequency']].agg({'time': 'mean', 'frequency': 'mean'})
	link['fleet'] = np.ceil(link['frequency'] * link['time'])
	return link.reset_index().groupby(col)['fleet'].agg('sum').to_dict()

In [89]:
res = get_fleet('route_id')

df_route_id['fleet'] = res
print(sum([item for key, item in res.items()]))

407.0


In [90]:
res = get_fleet('route_type')

df_route_type['fleet'] = res
print(sum([item for key, item in res.items()]))

407.0


# Line Length

In [91]:
def get_length(col='route_id', length_col='length'):
	link = links.groupby([col, 'trip_id'])[[length_col]].agg('sum')
	return link.reset_index().groupby(col)[length_col].agg('sum').to_dict()

In [92]:
# preparation. if length is NaN, or if shape dist travel exist.

length_col = None
if 'length' in links.columns and length_col == None:
	if len(links[links['length'].isnull()]) == 0:
		length_col = 'length'

if 'shape_dist_traveled' in links.columns and length_col == None:
	if len(links[links['shape_dist_traveled'].isnull()]) == 0:
		length_col = 'shape_dist_traveled'

if length_col == None:
	print('create length from geometry')
	links['length'] = links.to_crs(crs).length
	length_col = 'length'


In [93]:
res = get_length('route_id', length_col)

df_route_id['length (m)'] = res
print(sum([item for key, item in res.items()]))

4400040.830544667


In [94]:
res = get_length('route_type', length_col)

df_route_type['length (m)'] = res
print(sum([item for key, item in res.items()]))

4400040.830544667


# Number of station per line

In [95]:
# o-->o-->o-->o and  o<--o<--o<--o
# est-ce que j'ai 8 ou 4 stations ?
# j'ai 4 stations par trip et 4 stations par route (si c'est les memes).
# comment savoir si cest les memes. clustering?
# pour linstant. on prend tous les noeds unique par route_id ou route_type (col='route_id', route_id)
def get_num_station(col='route_id'):
	link = links.groupby(col)[['a', 'b']].agg({'a': set, 'b': set})
	link['node_len'] = link.apply(lambda row: len(row['a'].union(row['b'])), axis=1)
	return link['node_len'].to_dict()


In [96]:
res = get_num_station('route_id')

df_route_id['num station'] = res
print(sum([item for key, item in res.items()]))

13122


In [97]:
res = get_num_station('route_type')

df_route_type['num station'] = res
print(sum([item for key, item in res.items()]))

13122


# Vehicle revenue KM 

In [98]:
def get_veh_kmh(col='route_id'):
	link = links.groupby([col, 'trip_id'])[[length_col, 'frequency']].agg({length_col: 'sum', 'frequency': 'mean'})
	link['veh_km/h'] = np.ceil(link['frequency'] * link[length_col]) * 3600 / 1000  # to km/H
	return link.reset_index().groupby(col)['veh_km/h'].agg('sum').to_dict()

In [99]:
res = get_veh_kmh('route_id')

df_route_id['veh.km/h'] = res
print(sum([item for key, item in res.items()]))

13734.0


In [100]:
res = get_veh_kmh('route_type')

df_route_type['veh.km/h'] = res
print(sum([item for key, item in res.items()]))

13734.0


# Round trip time

In [101]:
def get_round_trip_time(col='route_id'):
	link = links.groupby([col, 'trip_id'])[['time']].agg('sum')
	return link.reset_index().groupby(col)['time'].agg('sum').to_dict()

In [102]:
res = get_round_trip_time('route_id')

df_route_id['round trip time (s)'] = res
print(sum([item for key, item in res.items()]))

839581.0


# export dfs to csv

In [103]:
# round numbers
for col in ['catchment', 'frequency (veh/hours)', 'length (m)', 'veh.km/h', 'round trip time (s)']:
	df_route_id[col] = df_route_id[col].apply(lambda x: np.round(x, 2))
	df_route_id[col] = df_route_id[col].apply(lambda x: np.round(x, 2))


In [104]:
# df_route_id = df_route_id.fillna('null')
# df_route_type = df_route_type.fillna('null')

In [105]:
df_route_id.to_csv(os.path.join(output_folder, 'route_id_metrics.csv'))
df_route_id

Unnamed: 0_level_0,catchment,frequency (veh/hours),fleet,length (m),num station,veh.km/h,round trip time (s)
route_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
STM_100,0.0,4.52,2.0,28756.63,80,140.4,4620.0
STM_101,0.0,2.17,2.0,31147.03,87,72.0,6447.0
STM_102,0.0,1.85,2.0,12013.75,51,25.2,2400.0
STM_103,0.0,4.19,2.0,10142.89,47,46.8,2514.0
STM_104,0.0,2.96,2.0,19371.55,80,64.8,4569.0
...,...,...,...,...,...,...,...
STM_99,0.0,2.01,2.0,9661.49,51,21.6,2400.0
STM_1,264471.5,14.91,2.0,38570.75,53,579.6,4260.0
STM_2,360835.0,10.78,3.0,79346.37,89,882.0,7920.0
STM_4,20612.0,10.71,2.0,7620.80,6,86.4,720.0


In [106]:
df_route_type.to_csv(os.path.join(output_folder, 'route_type_metrics.csv'))
df_route_type

Unnamed: 0_level_0,catchment,frequency (veh/hours),fleet,length (m),num station,veh.km/h
route_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
bus,0.0,2.706206,398.0,4255468.0,12950,11962.8
subway,663573.0,12.137794,9.0,144572.7,172,1771.2


# geomatic outputs

In [107]:
# using get catchment. get the catchment radius of each node (get larger one if used by many mode.)
link = links.groupby('route_type')[['a', 'b', 'route_type']].agg({'a': set, 'b': set, 'route_type': 'first'})
link['node'] = link.apply(lambda row: row['a'].union(row['b']), axis=1)
link = link.drop(columns=['a', 'b'])
# add catchment radius for the route_type
link['catchment_radius'] = link['route_type'].apply(lambda x: catchment_radius.get(x, default_catchment_radius))
link = link.explode('node').reset_index(drop=True)
link = link.sort_values('catchment_radius', ascending=False).drop_duplicates('node', keep='first')
link = node_dist.merge(link, left_on='node_index', right_on='node')
link = link[link['distances'] <= link['catchment_radius']]

temp_dict = link.groupby('node_index')['population'].sum().to_dict()
nodes['catchment'] = nodes.index.map(temp_dict.get)

temp_dict = link.groupby('node_index')['catchment_radius'].agg('first').to_dict()
nodes['catchment_radius'] = nodes.index.map(temp_dict.get)


In [108]:
nodes.to_file(os.path.join(output_folder, 'nodes.geojson'), driver='GeoJSON')

# test