In [247]:
import sys
import json

general = {'step_size': '0.001', 'use_road_network': True, 'coef_day_to_year': '300', 'clustering_radius': '500'}

params = {
    'general': general,
    }

default = {'training_folder': '../../scenarios/clermont', 'params':params} # Default execution parameters
manual, argv = (True, default) if 'ipykernel' in sys.argv[0] else (False, dict(default, **json.loads(sys.argv[1])))
print(argv)

{'training_folder': '../../scenarios/clermont', 'params': {'general': {'step_size': '0.001', 'use_road_network': True, 'coef_day_to_year': '300', 'clustering_radius': '500'}}}


In [248]:
import os
import time
import geopandas as gpd
import pandas as pd
sys.path.insert(0, r'../../../quetzal') # Add path to quetzal
import numpy as np
import random
import matplotlib.pyplot as plt
from shapely.geometry import Point, LineString
from typing import Literal
import numba as nb
from collections import defaultdict
from sklearn.cluster import DBSCAN
#num_cores = 1
print('numba threads',nb.config.NUMBA_NUM_THREADS)

on_lambda = bool(os.environ.get('AWS_EXECUTION_ENV'))
io_engine = 'pyogrio' 

numba threads 16


In [249]:
sys.path.insert(0, r'../../') # Add path
from utils import get_epsg, population_to_mesh, get_acf_distances, get_routing_distances

# Folders stucture and params

Everything is on S3 (nothing on ECR) so no direct input folder. just scenarios/{scen}/inputs/

In [250]:
argv['training_folder']

'../../scenarios/clermont'

In [251]:
argv['params']

{'general': {'step_size': '0.001',
  'use_road_network': True,
  'coef_day_to_year': '300',
  'clustering_radius': '500'}}

In [252]:
base_folder = argv['training_folder']
input_folder = os.path.join(base_folder,'inputs/')
pt_folder  = os.path.join(input_folder,'pt/')
road_folder = os.path.join(input_folder,'road/') ## réseau ferré
od_folder =  os.path.join(input_folder,'od/')

output_folder = os.path.join(base_folder,'outputs/')
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

model_folder = os.path.join(input_folder, 'model/')

In [253]:
# Read general params
step_size_min = 0.0005
step_size = max(float(argv['params']['general'].get('step_size')), step_size_min)
use_road_network = argv['params']['general'].get('use_road_network') ## param use r_ntw
coef_day_to_year = float(argv['params']['general'].get('coef_day_to_year'))
clustering_radius = float(argv['params']['general'].get('clustering_radius'))

In [254]:
# Default pt_links params in case not filled by the user
default_catchment_radius = 500      # meters
default_capex = 0.3                 # €/veh.km
default_capacity = 60               # vehicle capacity in PAX
default_service_hours = 12          # nb d'heures par jour de fonctionnement
#ajouter : nb heures service/jour / headway (mais il y est déjà)
#TODO: voir si on peut ajouter les champs défaut dans les liens ==> suppr. ==> adapter script

# Inputs

PT links

In [255]:
with open(pt_folder + 'links.geojson') as f:
    links_ = json.load(f)

columns = set()
for feature in links_['features']:
    for key in feature['properties'].keys():
        columns.add(key)

links = pd.DataFrame(links_['features'])
for col in columns:
    links[col] = links.apply(lambda x: x['properties'].get(col, None), 1)
links['geometry'] = links['geometry'].apply(lambda x: LineString(x['coordinates']))
links.drop(columns=['type', 'properties'], inplace=True)

links = links.set_index('index')
links = gpd.GeoDataFrame(links, geometry='geometry', crs='EPSG:4326')

In [256]:
nodes = gpd.read_file(pt_folder + 'nodes.geojson', engine=io_engine)
nodes = nodes.set_index('index')
nodes = nodes[~pd.isna(nodes.geometry)]

In [257]:
# links['catchment_radius_marche'] = 800
# links['catchment_radius_velo'] = 3000
# links['catchment_radius_voiture'] = 5000

In [258]:
if 'capacity' not in links.columns:
    links['capacity'] = default_capacity

In [259]:
if 'capex' not in links.columns:
    links['capex'] = default_capex

In [260]:
catchment_radii = [x for x in links.columns if 'catchment_radius' in x]
catchment_radii_provided = (len(catchment_radii) > 0)
if catchment_radii_provided:
    for x in catchment_radii:
        M = links[x].max()
        if not links[x].equals(links[x].fillna(M)):
            print('!! Catchemnt radius values missing in column {} !!'.format(x))
        links[x] = links[x].fillna(M)
else:
    links['catchment_radius'] = default_catchment_radius

In [261]:
default_service_hours = 12
if 'nb_service_hours' not in links.columns:
    links['nb_service_hours'] = default_service_hours

In [262]:
if 'departures' not in links.columns:
    links['departures'] = None
if 'arrivals' not in links.columns:
    links['arrivals'] = None

Input data zoning file

In [263]:
# find meters CRS
centroid = [*LineString(nodes.centroid.values).centroid.coords][0]
crs = get_epsg(centroid[1],centroid[0])
crs

32631

In [264]:
zonage_file = os.path.join(input_folder, 'zonage.geojson')
zonage_file_provided = os.path.isfile(zonage_file)
if zonage_file_provided :
    zonage = gpd.read_file(input_folder + 'zonage.geojson', engine=io_engine).to_crs(epsg='4326')
    zonage['area (km2)'] = zonage.to_crs(crs).area / 10**6
else:
    print('No zonage file in the input folder...')

In [265]:
densities = [x for x in zonage.columns if 'density' in x]
assert len(densities) > 0, 'Please provide densities as input data in the zoning file'

In [266]:
display_ph_columns = ('headway_ph' in links.columns)

Road network

In [267]:
## road network here
rnodes_file = os.path.join(road_folder, 'road_nodes.geojson')
rnodes_file_provided = os.path.isfile(rnodes_file)
if rnodes_file_provided:
    rnodes = gpd.read_file(os.path.join(road_folder, 'road_nodes.geojson'), engine=io_engine)
    rnodes = rnodes.set_index('index').to_crs(epsg='4326')
    rlinks = gpd.read_file(os.path.join(road_folder, 'road_links.geojson'), engine=io_engine)
    rlinks = rlinks.set_index('index').to_crs(epsg='4326')
print('road network ?',rnodes_file_provided)

road network ? True


In [268]:
od_file = os.path.join(od_folder, 'od.geojson')
od_file_provided = os.path.isfile(od_file)
if od_file_provided:
    od_test = gpd.read_file(od_file, engine=io_engine)
    if 'name' not in od_test.columns:
        od_test['name'] = od_test['index']
    od_test['name'] = od_test['name'].fillna(od_test['index'].astype(str))
print('od ?',od_file_provided)

od ? False


# Init result dataframes

In [269]:
df_route_id = pd.DataFrame(index=links['route_id'].unique())
df_route_id.index.name = 'route_id'
df_route_id = df_route_id.reset_index()
if display_ph_columns:   
    df_route_id = df_route_id.merge(links[['route_id', 'route_type', 'capacity', 'headway', 'headway_ph', 'headway_oph', 'nb_peak_hours']], on='route_id', how='left')
else:
    df_route_id = df_route_id.merge(links[['route_id', 'route_type', 'capacity', 'headway']], on='route_id', how='left')
df_route_id = df_route_id.rename(columns={'capacity': 'veh_capacity (PAX)'})
df_route_id = df_route_id.drop_duplicates()
df_route_id = df_route_id.set_index('route_id')

df_trip_id = pd.DataFrame(index=links['trip_id'].unique())
df_trip_id.index.name = 'trip_id'
df_trip_id = df_trip_id.reset_index()
if display_ph_columns:   
    df_trip_id = df_trip_id.merge(links[['trip_id', 'route_id', 'route_type', 'capacity', 'headway', 'headway_ph', 'headway_oph', 'nb_peak_hours']], on='trip_id', how='left')
else:
    df_trip_id = df_trip_id.merge(links[['trip_id', 'route_id', 'route_type', 'capacity', 'headway']], on='trip_id', how='left')
df_trip_id = df_trip_id.rename(columns={'capacity': 'veh_capacity (PAX)'})
df_trip_id = df_trip_id.drop_duplicates()
df_trip_id = df_trip_id.set_index('trip_id')

In [270]:
df_route_type = pd.DataFrame(index=links['route_type'].unique())
df_route_type.index.name='route_type'

In [271]:
# Make sure headways are consistent : one single headway for both way and return
# Otherwise can't calculate KPIs later

df_route_id = df_route_id[~df_route_id.index.duplicated(keep='first')]
route_headway = dict(zip(df_route_id.index, df_route_id['headway']))
links.headway = links.route_id.map(route_headway)

# Catchment calculation

In [272]:
def get_catchment_by_mode(col='route_id', pop_col='population', node_dist=None):
    #get all nodes with col filter
    link = links.groupby(col)[['a','b','route_type', 'catchment_radius']].agg({'a':set,'b':set,'route_type':'first', 'catchment_radius': 'first'})
    link['node'] = link.apply(lambda row: row['a'].union(row['b']), axis=1)
    link = link.drop(columns=['a','b'])

    col_exist = col == 'route_type' # cannot explode if index == route_type (a column)
    link = link.explode('node').reset_index(drop=col_exist)
    link = node_dist.merge(link, left_on='node_index', right_on='node')
    #filter by distance
    link = link[link['distances'] <= link['catchment_radius']]
    #drop duplicated mesh nodes (we count only one time)
    link = link.drop_duplicates(subset=['mesh_index',col],keep='first')

    return link.groupby(col)[pop_col].sum().to_dict()

In [273]:
def get_catchment_by_access(col='route_id', pop_col='population', catchment_col='catchment_radius', node_dist=None):
    #get all nodes with col filter
    link = links.groupby(col)[['a','b','route_type', catchment_col]].agg({'a':set,'b':set,'route_type':'first', catchment_col:'first'})
    link['node'] = link.apply(lambda row: row['a'].union(row['b']), axis=1)
    link = link.drop(columns=['a','b'])

    col_exist = col == 'route_type' # cannot explode if index == route_type (a column)
    link = link.explode('node').reset_index(drop=col_exist)
    link = node_dist.merge(link, left_on='node_index', right_on='node')
    #filter by distance
    link = link[link['distances'] <= link[catchment_col]]
    #drop duplicated mesh nodes (we count only one time)
    link = link.drop_duplicates(subset=['mesh_index',col],keep='first')

    return link.groupby(col)[pop_col].sum().to_dict()

In [274]:
meshes = {}
## road network here
for density in densities:
    if density == 'density' and 'population_density' not in densities:
        tag = 'population'
    elif density == 'density':
        tag = 'x'
    else:
        tag = density.split('_density')[0]

    print(tag)

    zonage[tag] = zonage[density] * zonage['area (km2)']

    if rnodes_file_provided and use_road_network:
        # use rnodes as mesh_pop.
        print('using road_nodes') ## road network here
        mesh = population_to_mesh(zonage, mesh=rnodes, step=step_size, col=tag, fill_missing='closest')
    else:
        # create a mesh
        mesh = population_to_mesh(zonage, mesh=None, step=step_size, col=tag, fill_missing='centroid')

    #mesh.to_file(output_folder + 'population_mesh.geojson',driver='GeoJSON',engine=io_engine)
    if catchment_radii_provided:
        max_dist = max([links[catchment_radius].max() for catchment_radius in catchment_radii])
    else:
        # max_dist = max(max(input_catchment_radius.values()),default_catchment_radius)
        max_dist = default_catchment_radius

    ## road network here, what to use if not road network?
    if rnodes_file_provided: 
        print('using road_nodes')
        node_dist = get_routing_distances(nodes, rnodes, rlinks, mesh, tag, 'length', max_dist)
    else:
        node_dist = get_acf_distances(nodes, mesh, tag, crs, max_dist)

    if catchment_radii_provided:
        for catchment_radius in catchment_radii:
            suf = catchment_radius.split('catchment_radius_')[1]

            res_trip = get_catchment_by_access('trip_id', tag, catchment_radius, node_dist)
            res_route = get_catchment_by_access('route_id', tag, catchment_radius, node_dist)
            res_mode = get_catchment_by_access('route_type', tag, catchment_radius, node_dist)

            if suf == '':
                df_trip_id['catchment {}'.format(tag)] = res_trip
                df_trip_id['catchment {}'.format(tag)] = df_trip_id['catchment {}'.format(tag)].fillna(0) 

                df_route_id['catchment {}'.format(tag)] = res_route
                df_route_id['catchment {}'.format(tag)] = df_route_id['catchment {}'.format(tag)].fillna(0) 

                df_route_type['catchment {}'.format(tag)] = res_mode
                df_route_type['catchment {}'.format(tag)] = df_route_type['catchment {}'.format(tag)].fillna(0) 

            else:
                df_trip_id['catchment {} {}'.format(tag, suf)] = res_trip
                df_trip_id['catchment {} {}'.format(tag, suf)] = df_trip_id['catchment {} {}'.format(tag, suf)].fillna(0) 

                df_route_id['catchment {} {}'.format(tag, suf)] = res_route
                df_route_id['catchment {} {}'.format(tag, suf)] = df_route_id['catchment {} {}'.format(tag, suf)].fillna(0) 

                df_route_type['catchment {} {}'.format(tag, suf)] = res_mode
                df_route_type['catchment {} {}'.format(tag, suf)] = df_route_type['catchment {} {}'.format(tag, suf)].fillna(0) 

    else:
        res_trip = get_catchment_by_mode('trip_id', tag, node_dist)
        res_route = get_catchment_by_mode('route_id', tag, node_dist)
        res_mode = get_catchment_by_mode('route_type', tag, node_dist)

        df_trip_id['catchment {}'.format(tag)] = res_trip
        df_trip_id['catchment {}'.format(tag)] = df_trip_id['catchment {}'.format(tag)].fillna(0) 

        df_route_id['catchment {}'.format(tag)] = res_route
        df_route_id['catchment {}'.format(tag)] = df_route_id['catchment {}'.format(tag)].fillna(0) 

        df_route_type['catchment {}'.format(tag)] = res_mode
        df_route_type['catchment {}'.format(tag)] = df_route_type['catchment {}'.format(tag)].fillna(0)

population
using road_nodes
0 nodes in multiple zones. will be match to a single zone.
13 unfounded zones
using road_nodes


# Frequency

In [275]:
# # Suppose that headway is not the same in both directions : keep the minimum value
# idx = df_route_id.groupby(level=0)['headway'].idxmin()
# df_route_id = df_route_id.loc[idx]

# df_route_id = df_route_id.rename(columns={'headway': 'headway (s)'})
# df_route_id = df_route_id.sort_values('route_type', ascending=False)

In [276]:
# links['frequency'] = 1/links['headway']

In [277]:
# res = (links.groupby('route_id')['frequency'].agg('mean')*3600).to_dict()

# df_route_id['frequency (veh/hours)'] = res
# print(np.nansum([item for key, item in res.items()]))

In [278]:
# res = (links.groupby('route_type')['frequency'].agg('mean')*3600).to_dict()

# df_route_type['frequency (veh/hours)'] = res
# print(sum([item for key, item in res.items()]))

In [279]:
if 'departures' in links.columns:
    links['link_has_timetable'] = links['departures'].apply(lambda x: 0 if x is None else 1)
    lines_with_timetable = links.groupby('route_id')['link_has_timetable'].min()
    links['line_has_timetable'] = links['route_id'].map(lines_with_timetable)
    links.drop(columns='link_has_timetable', inplace=True)
else:
    links['line_has_timetable'] = 0

In [280]:
default = {'headway_ph': links['headway'].max(),
           'headway_oph': links['headway'].max(),
           'nb_service_hours': 14,
           'nb_peak_hours': 14}

dic_headway = links.groupby('route_id')['headway'].min()

for col in ['headway_ph', 'headway_oph']:
    if col not in links.columns:
        links[col] = default[col]
    else: 
        dic_col = links.groupby('route_id')[col].min()
        links[col] = links['route_id'].map(dic_col)
        links.loc[links[col].isna(), col] = links.loc[links[col].isna(), 'route_id'].map(dic_headway)
        links[col] = links[col].astype(int)

for col in ['nb_service_hours', 'nb_peak_hours']:
    if col not in links.columns:
        links[col] = default[col]
    else:
        links[col] = links[col]
        dic_col = links.groupby('route_id')[col].max()
        links[col] = links['route_id'].map(dic_col).fillna(default[col])
        links[col] = links[col].astype(int)

In [281]:
from datetime import datetime

def time_seconds(time_str):
    time_obj = datetime.strptime(time_str, '%H:%M:%S')
    return time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second

def retrieve_avg_headway(departures):
    deps_seconds = [time_seconds(t) for t in departures]
    if len(deps_seconds) >= 1:
        gaps = [deps_seconds[i] - deps_seconds[i-1] for i in range(1, len(deps_seconds))]
        return int(np.average(gaps))
    return None

def retrieve_oph_headway(departures):
    deps_seconds = [time_seconds(t) for t in departures]
    if len(deps_seconds) >= 1:
        gaps = [deps_seconds[i] - deps_seconds[i-1] for i in range(1, len(deps_seconds))]
        return max(gaps)
    return None

def retrieve_ph_headway(departures):
    deps_seconds = [time_seconds(t) for t in departures]
    if len(deps_seconds) >= 1:
        gaps = [deps_seconds[i] - deps_seconds[i-1] for i in range(1, len(deps_seconds))]
        return min(gaps)
    return None

def retrieve_service_hours(departures):
    deps_seconds = [time_seconds(t) for t in departures]
    if len(deps_seconds) >= 1:
        return (deps_seconds[-1] - deps_seconds[0])/3600

In [282]:
links.loc[links['line_has_timetable'] == 1, 'frequency_per_day'] = links.loc[links['line_has_timetable'] == 1].apply(lambda x: len(x['departures']), 1)
links.loc[links['line_has_timetable'] == 0, 'frequency_per_day'] = links.loc[links['line_has_timetable'] == 0].apply(lambda x: np.ceil(3600*(x['nb_peak_hours']/x['headway_ph'] + (x['nb_service_hours'] - x['nb_peak_hours'])/x['headway_oph'])), 1)

In [283]:
links.loc[links['line_has_timetable']==1, 'headway'] = links.loc[links['line_has_timetable']==1, 'departures'].apply(retrieve_avg_headway)
links.loc[links['line_has_timetable']==1, 'headway_ph'] = links.loc[links['line_has_timetable']==1, 'departures'].apply(retrieve_ph_headway)
links.loc[links['line_has_timetable']==1, 'headway_oph'] = links.loc[links['line_has_timetable']==1, 'departures'].apply(retrieve_oph_headway)
links.loc[links['line_has_timetable']==1, 'nb_service_hours'] = links.loc[links['line_has_timetable']==1, 'departures'].apply(retrieve_service_hours)
links.loc[links['line_has_timetable']==1, 'nb_peak_hours'] = None

In [284]:
links['frequency (veh/hour)'] = 1/links['headway']*3600
links['frequency ph (veh/hour)'] = 1/links['headway_ph']*3600
links['frequency oph (veh/hour)'] = 1/links['headway_oph']*3600

In [285]:
links['headway'] = links.apply(lambda x: x['headway'] if (x['headway_ph'] == x['headway_oph']) else None, 1)
links['frequency (veh/hour)'] = links.apply(lambda x: x['frequency (veh/hour)'] if (x['frequency ph (veh/hour)'] == x['frequency oph (veh/hour)']) else None, 1)

In [286]:
res_hour = (links.groupby('route_id')['frequency (veh/hour)'].agg('mean')).to_dict()
res_day = (links.groupby('route_id')['frequency_per_day'].agg('mean')).to_dict()

df_route_id['frequency (veh/day)'] = res_day
df_route_id['frequency (veh/hour)'] = res_hour

if display_ph_columns:
    res_ph = (links.loc[links['frequency ph (veh/hour)'] != links['frequency oph (veh/hour)']].groupby('route_id')['frequency ph (veh/hour)'].agg('mean')*3600).to_dict()
    res_oph = (links.loc[links['frequency ph (veh/hour)'] != links['frequency oph (veh/hour)']].groupby('route_id')['frequency oph (veh/hour)'].agg('mean')*3600).to_dict()

    df_route_id['frequency ph (veh/hour)'] = res_ph
    df_route_id['frequency oph (veh/hour)'] = res_oph

In [287]:
res_hour = (links.groupby('trip_id')['frequency (veh/hour)'].agg('mean')).to_dict()
res_day = (links.groupby('trip_id')['frequency_per_day'].agg('mean')).to_dict()

df_trip_id['frequency (veh/day)'] = res_day
df_trip_id['frequency (veh/hour)'] = res_hour

if display_ph_columns:
    res_ph = (links.loc[links['frequency ph (veh/hour)'] != links['frequency oph (veh/hour)']].groupby('trip_id')['frequency ph (veh/hour)'].agg('mean')*3600).to_dict()
    res_oph = (links.loc[links['frequency ph (veh/hour)'] != links['frequency oph (veh/hour)']].groupby('trip_id')['frequency oph (veh/hour)'].agg('mean')*3600).to_dict()

    df_trip_id['frequency ph (veh/hour)'] = res_ph
    df_trip_id['frequency oph (veh/hour)'] = res_oph

In [288]:
res_hour = (links.groupby('route_type')['frequency (veh/hour)'].agg('mean')).to_dict()
res_day = (links.groupby('route_type')['frequency_per_day'].agg('mean')).to_dict()

df_route_type['frequency (veh/day)'] = res_day
df_route_type['frequency (veh/hour)'] = res_hour

# Line Length

In [289]:
def get_length(col='route_id', length_col='length'):
    link = links.groupby([col,'trip_id'])[[length_col]].agg(np.nansum)
    if col == 'route_type':
        return link.reset_index().groupby(col)[length_col].agg(np.nansum).to_dict()
    else:
        return link.reset_index().groupby(col)[length_col].agg(np.nanmean).to_dict()

In [290]:
# preparation. if length is NaN, or if shape dist travel exist.

length_col = None
if 'length' in links.columns and length_col == None:
    if len(links[links['length'].isnull()])==0:
        length_col = 'length'
        
if 'shape_dist_traveled' in links.columns and length_col == None:
    if len(links[links['shape_dist_traveled'].isnull()])==0:
        length_col = 'shape_dist_traveled'

if length_col == None:
    print('create length from geometry')
    links['length'] = links.to_crs(crs).length
    length_col = 'length'

In [291]:
res = get_length('route_id',length_col)

df_route_id['length (m)'] = res
print(sum([item for key,item in res.items()]))

80760.0


In [292]:
res = get_length('route_type',length_col)

df_route_type['length (m)'] = res
print(sum([item for key,item in res.items()]))

161520


# Number of station per line

In [293]:
# o-->o-->o-->o and  o<--o<--o<--o
# est-ce que j'ai 8 ou 4 stations ?
# j'ai 4 stations par trip et 4 stations par route (si c'est les memes).
# comment savoir si cest les mêmes : clustering?
# pour l'instant on prend tous les noeuds unique par route_id ou route_type (col='route_id', route_id)
def get_num_station(col='route_id'):
    link = links.groupby(col)[['a','b']].agg({'a':set,'b':set})
    link['node_len'] = link.apply(lambda row: len(row['a'].union(row['b'])), axis=1)
    return link['node_len'].to_dict()

In [294]:
nodes['nindex'] = nodes.reset_index().index
nodes['stop_name'] = nodes.apply(lambda x: x['nindex'] if (pd.isna(x['stop_name']) or x['stop_name'] is None) else x['stop_name'], 1)
nodes.drop(columns='nindex', inplace=True)

links['a_name'] = links['a'].map(nodes['stop_name'].to_dict())
links['b_name'] = links['b'].map(nodes['stop_name'].to_dict())

if len(nodes['stop_name'].values.tolist()) > len(nodes['stop_name'].unique()):
    print('!! Duplicates in node names !!')

!! Duplicates in node names !!


In [295]:
dict_nb_trips = links[['route_id', 'trip_id']].drop_duplicates().groupby('route_id')['trip_id'].count().to_dict()
df_route_id['type'] = df_route_id.index.map(dict_nb_trips)
df_route_id['type'] = df_route_id['type'].apply(lambda x: 'circular' if x == 1 else 'linear')

In [296]:
def get_node_sequence(route_id):
    links_route = links.loc[links.route_id == route_id]
    if df_route_id.loc[route_id]['type'] == 'linear':
        trip_id = route_id + '_0'
    links_route = links_route.loc[links_route.trip_id == trip_id]
    links_route = links_route.sort_values(by='link_sequence')
    nodes_seq = []
    for i in range(len(links_route)):
        nodes_seq += [links_route.iloc[i]['a']]
    nodes_seq += [links_route.iloc[-1]['b']]
    return nodes_seq

In [297]:
nodes_stops = dict(zip(nodes.index, nodes['stop_name']))

def get_stops_sequence(route_id):
    nodes_seq = get_node_sequence(route_id)
    stops_seq = []
    for node in nodes_seq:
        stops_seq += [nodes_stops[node]]
    return stops_seq

In [298]:
df_route_id['stations sequence'] = [get_stops_sequence(route_id) for route_id in df_route_id.index]
df_route_id['nodes sequence'] = [get_node_sequence(route_id) for route_id in df_route_id.index]
df_route_id['nb stations'] = df_route_id['stations sequence'].apply(lambda x: len(x))

In [299]:
stations_route_type = pd.DataFrame(df_route_id.groupby('route_type')['stations sequence'].agg(lambda x: list(set(sum(x, [])))))
stations_route_type['nb stations'] = stations_route_type['stations sequence'].apply(lambda x: len(x))
df_route_type = df_route_type.merge(stations_route_type, left_on=df_route_type.index, right_on=stations_route_type.index, how='left')
df_route_type = df_route_type.rename(columns={'key_0': 'route_type'})
df_route_type = df_route_type.set_index('route_type')

## Connections

In [300]:
iterable = list(zip(nodes['stop_name'], nodes.index))

stops_nodes = defaultdict(set)
for key, value in iterable:
    stops_nodes[key].add(value)
stops_nodes = dict(stops_nodes)

In [301]:
iterable = list(zip(links['a'], links['route_id']))
iterable = iterable + list(zip(links['b'], links['route_id']))

nodes_routes = defaultdict(set)
for key, value in iterable:
    nodes_routes[key].add(value)
nodes_routes = dict(nodes_routes)

In [302]:
stops_routes = {}

for stop, node_list in stops_nodes.items():
    routes = set()
    for node in node_list:
        if node in nodes_routes:
            routes.update(nodes_routes[node])
    stops_routes[stop] = routes

In [303]:
hubs = pd.DataFrame.from_dict(stops_routes, orient='index')
hubs['lines'] = hubs.apply(lambda row: [val for val in row if pd.notnull(val)], axis=1)
hubs = hubs.drop(columns=[i for i in range(len(hubs.columns) - 1)])
hubs['nb_lines'] = hubs['lines'].apply(lambda x: len(x))
hubs = hubs.sort_values(by='nb_lines', ascending=False)

In [304]:
dict_route_type = dict(zip(df_route_id.index, df_route_id['route_type']))
dict_veh = dict(zip(df_route_id['route_type'], df_route_id['veh_capacity (PAX)']))
route_order = sorted(dict_veh, key=lambda x: int(dict_veh[x]), reverse=True)

def lines_to_dict(lines):
    route_dict = {route_type: [] for route_type in route_order}
    for line in lines:
        route_type = dict_route_type.get(line)
        if route_type in route_dict:
            route_dict[route_type].append(line)
    route_dict = {k: sorted(v) for k, v in route_dict.items() if v}
    return route_dict

hubs['lines'] = hubs['lines'].apply(lines_to_dict)

In [305]:
from shapely.ops import unary_union

def centroid(geometries):
    combined_geometry = unary_union(geometries)
    return combined_geometry.centroid

centroids = pd.DataFrame(nodes.groupby('stop_name')['geometry'].agg(centroid))
hubs = hubs.merge(centroids, left_on=hubs.index, right_on=centroids.index, how='left')

hubs = hubs.rename(columns={'key_0': 'stop_name'})
hubs = hubs.set_index('stop_name')

In [306]:
# hubs['stop_radius'] = hubs['lines'].apply(lambda x: max(catchment_radius[mode] for mode in x.keys()))
hubs['stop_radius'] = default_catchment_radius

In [307]:
def get_connections(row):
    route_id = row.name
    connections = set()
    for station in row['stations sequence']:
        if station in stops_routes:
            connections.update(stops_routes[station])
    connections.discard(route_id)  # Supprimer la route_id de l'ensemble des connexions
    return lines_to_dict(connections), len(connections)

df_route_id[['connexions', 'nb lines connected']] = df_route_id.apply(lambda row: pd.Series(get_connections(row)), axis=1)

# df_route_id[['connexions']].loc[df_route_id['connexions'] == 'tertiary']

In [308]:
# df_route_id

# Operational Fleet

In [309]:
# def get_fleet(col='route_id'):
#     link = links.groupby([col,'trip_id'])[['time', 'frequency']].agg({'time': np.nansum, 'frequency': np.nanmean})
#     link['fleet'] = np.ceil(link['frequency'] * (link['time'] + 300))
#     return link.reset_index().groupby(col)['fleet'].agg(np.nansum).to_dict()

In [310]:
# res = get_fleet('route_id')

# #df_route_id['fleet'] = res
# #print(sum([item for key,item in res.items()]))
# res

In [311]:
def get_fleet_frequency(route_id):
    link = links.loc[links['route_id'] == route_id].groupby(['route_id','trip_id'])[['time','frequency']].agg({'time': np.nansum,'frequency': 'mean'})
    link['fleet'] = np.ceil(link['frequency'] * link['time'])
    return link['fleet'].sum()

In [312]:
def get_fleet_timetable(route_id):
    link = links.loc[links['route_id'] == route_id]
    stations_sequence = df_route_id.loc[route_id]['nodes sequence']
    circular_line = df_route_id.loc[route_id]['type'] == 'circular'

    termini = [stations_sequence[0], stations_sequence[-1]]
    link['a_terminus'] = link['a'].isin(termini)
    link['b_terminus'] = link['b'].isin(termini)

    if not circular_line:
        dep0_times = link.loc[(link['trip_id'] == route_id + '_0') & link['a_terminus']]['departures'].values.tolist()[0]
        dep1_times = link.loc[(link['trip_id'] == route_id + '_1') & link['a_terminus']]['departures'].values.tolist()[0]
        arr0_times = link.loc[(link['trip_id'] == route_id + '_0') & link['b_terminus']]['arrivals'].values.tolist()[0]
        arr1_times = link.loc[(link['trip_id'] == route_id + '_1') & link['b_terminus']]['arrivals'].values.tolist()[0]

        dep0 = [time_seconds(t) for t in dep0_times]
        dep1 = [time_seconds(t) for t in dep1_times]
        arr0 = [time_seconds(t) for t in arr0_times]
        arr1 = [time_seconds(t) for t in arr1_times]

        travel_time0 = arr0[0] - dep0[0]
        travel_time1 = arr1[0] - dep1[0]

        nb_interbuses = []

        for bus in dep0:
            departure = bus
            arrival = departure + travel_time0
            if arrival < max(dep1):
                departure_ = min([dep for dep in dep1 if dep>arrival])
                tmax = departure_ + travel_time1
            else:
                tmax = max([max(dep0), max(arr1)])

            t = bus
            necessary_buses = 1
            reserve_buses = 0
            departures = [dep for dep in dep0 if dep>t]
            arrivals = [arr for arr in arr1 if arr>t]

            while t < tmax:
                try:
                    next_event = min([min(departures), min(arrivals)])
                except ValueError:
                    break
                t=next_event
                if (next_event in departures) and (next_event in arrivals):
                    departures.pop(0)
                    arrivals.pop(0)
                    pass
                elif next_event in departures:
                    departures.pop(0)
                    if reserve_buses >= 1:
                        reserve_buses -= 1
                    else:
                        necessary_buses += 1
                elif next_event in arrivals:
                    arrivals.pop(0)
                    necessary_buses += 1
                    reserve_buses += 1

            nb_interbuses.append(necessary_buses)

        for bus in dep1:
            departure = bus
            arrival = departure + travel_time1
            if arrival < max(dep0):
                departure_ = min([dep for dep in dep0 if dep>arrival])
                tmax = departure_ + travel_time0
            else:
                tmax = max([max(dep1), max(arr0)])

            t = bus
            necessary_buses = 1
            reserve_buses = 0
            departures = [dep for dep in dep1 if dep>t]
            arrivals = [arr for arr in arr0 if arr>t]

            while t < tmax:
                try:
                    next_event = min([min(departures), min(arrivals)])
                except ValueError:
                    break
                t=next_event
                if (next_event in departures) and (next_event in arrivals):
                    departures.pop(0)
                    arrivals.pop(0)
                    pass
                elif next_event in departures:
                    departures.pop(0)
                    if reserve_buses >= 1:
                        reserve_buses -= 1
                    else:
                        necessary_buses += 1
                elif next_event in arrivals:
                    arrivals.pop(0)
                    necessary_buses += 1
                    reserve_buses += 1

            nb_interbuses.append(necessary_buses)
    
    else:
        dep0_times = link.loc[(link['trip_id'] == route_id + '_0') & link['a_terminus']]['departures'].values.tolist()[0]
        arr0_times = link.loc[(link['trip_id'] == route_id + '_0') & link['b_terminus']]['arrivals'].values.tolist()[0]

        dep0 = [time_seconds(t) for t in dep0_times]
        arr0 = [time_seconds(t) for t in arr0_times]

        travel_time0 = arr0[0] - dep0[0]

        nb_interbuses = []
        for bus in dep0:
            departure = bus
            arrival = departure + travel_time0
            n_buses = len([dep for dep in dep0 if (dep>=bus and dep<arrival)])
            nb_interbuses.append(n_buses)

    return max(max(nb_interbuses), 2)

In [313]:
def get_service_hours_timetable(route_id):
    link = links.loc[links['route_id'] == route_id]
    stations_sequence = df_route_id.loc[route_id]['nodes sequence']
    circular_line = df_route_id.loc[route_id]['type'] == 'circular'

    termini = [stations_sequence[0], stations_sequence[-1]]
    link['a_terminus'] = link['a'].isin(termini)
    link['b_terminus'] = link['b'].isin(termini)

    if not circular_line:
        dep0_times = link.loc[(link['trip_id'] == route_id + '_0') & link['a_terminus']]['departures'].values.tolist()[0]
        dep1_times = link.loc[(link['trip_id'] == route_id + '_1') & link['a_terminus']]['departures'].values.tolist()[0]
        arr0_times = link.loc[(link['trip_id'] == route_id + '_0') & link['b_terminus']]['arrivals'].values.tolist()[0]
        arr1_times = link.loc[(link['trip_id'] == route_id + '_1') & link['b_terminus']]['arrivals'].values.tolist()[0]

        dep0 = [time_seconds(t) for t in dep0_times]
        dep1 = [time_seconds(t) for t in dep1_times]
        arr0 = [time_seconds(t) for t in arr0_times]
        arr1 = [time_seconds(t) for t in arr1_times]

        time_range = np.ceil((max([max(arr0), max(arr1)]) - min([min(dep0), min(dep1)]))/3600)

    return time_range

In [314]:
links.loc[links['line_has_timetable']==1, 'fleet'] = links.loc[links['line_has_timetable']==1, 'route_id'].apply(get_fleet_timetable)
links.loc[links['line_has_timetable']==1, 'nb_service_hours'] = links.loc[links['line_has_timetable']==1, 'route_id'].apply(get_service_hours_timetable)

links.loc[links['line_has_timetable']==0, 'fleet'] = links.loc[links['line_has_timetable']==0, 'route_id'].apply(get_fleet_frequency)

In [320]:
links['nb_service_hours'].values.tolist()

[14, 12, 12, 14, 14, 14, 14, 12, 12, 14]

In [317]:
res = (links.groupby('trip_id')['nb_service_hours'].agg('mean')).to_dict()
df_trip_id['nb_service_hours'] = res

In [322]:
res = (links.groupby('route_id')['fleet'].agg('mean')).to_dict()
df_route_id['fleet'] = res

res = (links.groupby('route_id')['nb_service_hours'].agg('mean')).to_dict()
df_route_id['nb_service_hours'] = res

In [324]:
res = df_route_id.reset_index().groupby('route_type')['fleet'].agg('sum').to_dict()
df_route_type['fleet'] = res

res = df_route_id.reset_index().groupby('route_type')['nb_service_hours'].agg('max').to_dict()
df_route_type['nb_service_hours'] = res

In [326]:
links

Unnamed: 0_level_0,geometry,b,nb_peak_hours,headway_oph,route_id,route_short_name,pickup_type,a,trip_id,time,...,capex,catchment_radius,line_has_timetable,frequency_per_day,frequency (veh/hour),frequency ph (veh/hour),frequency oph (veh/hour),a_name,b_name,fleet
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
link_qTNjaUDLm9x3yJrS9ubX9T,"LINESTRING (3.2042 46.0974, 3.20418 46.09764, ...",node_6CErzt5T6gFonN6RkpvAM6,4.0,700,Gannat,Gannat,0,node_2zTroDrJuDCKmipdvjBrtL,Gannat_0,3295,...,0.3,500,0,81.0,,7.2,5.142857,Gannat,Vichy,12.0
link_wvPcELKEW52njEnuK7AeSe,"LINESTRING (3.06 45.92018, 3.0606 45.92013, 3....",node_b8e2FB6qm9PUQTLvQvNucR,,7200,ChatelGuyon,ChatelGuyon,0,node_f8Fb3oREhTRCmTVNW7TQzE,ChatelGuyon_0,1175,...,0.3,500,1,5.0,0.5,0.5,0.5,Chatel Guyon,Riom,3.0
link_vTbX3V9qJJxThakJ1WZbbk,"LINESTRING (3.07285 45.8958, 3.07269 45.89586,...",node_wbHMs2n7zPVS1pp1cXnNw5,,7200,ChatelGuyon,ChatelGuyon,0,node_b8e2FB6qm9PUQTLvQvNucR,ChatelGuyon_0,2980,...,0.3,500,1,5.0,0.5,0.5,0.5,Riom,Clermont,3.0
link_8EachMgBpjP7T2bWPiAc3E,"LINESTRING (3.50444 45.85987, 3.50422 45.85993...",node_a3ZLvXzFnMqGNGu4hjEYT6,14.0,600,Thiers,Thiers,0,node_4b2VnuC1zTsPzd9ueovVuH,Thiers_0,2344,...,0.3,500,0,84.0,6.0,6.0,6.0,Thiers,Ornon,24.0
link_purLhqauca5SLLCFkShEC7,"LINESTRING (3.38307 45.84869, 3.38239 45.84873...",node_rwwVbaGFJtCHoKTZ6B83Ax,14.0,600,Thiers,Thiers,0,node_a3ZLvXzFnMqGNGu4hjEYT6,Thiers_0,4744,...,0.3,500,0,84.0,6.0,6.0,6.0,Ornon,Clermont,24.0
link_4hsYkThjafc1NVpc8kLeid,"LINESTRING (3.0996 45.77888, 3.0994 45.77877, ...",node_a3ZLvXzFnMqGNGu4hjEYT6,14.0,600,Thiers,Thiers,0,node_rwwVbaGFJtCHoKTZ6B83Ax,Thiers_1,4744,...,0.3,500,0,84.0,6.0,6.0,6.0,Clermont,Ornon,24.0
link_1ck6aSDmpEy7Jo2fbhNAxE,"LINESTRING (3.38307 45.84869, 3.38445 45.8486,...",node_4b2VnuC1zTsPzd9ueovVuH,14.0,600,Thiers,Thiers,0,node_a3ZLvXzFnMqGNGu4hjEYT6,Thiers_1,2344,...,0.3,500,0,84.0,6.0,6.0,6.0,Ornon,Thiers,24.0
link_2sfH8EPaWDjhzsZiZJMz1d,"LINESTRING (3.10296 45.78059, 3.10359 45.78092...",node_b8e2FB6qm9PUQTLvQvNucR,,7200,ChatelGuyon,ChatelGuyon,0,node_wbHMs2n7zPVS1pp1cXnNw5,ChatelGuyon_1,2980,...,0.3,500,1,6.0,0.5,0.5,0.5,Clermont,Riom,3.0
link_oCfgaw42pToYChhHThwCwy,"LINESTRING (3.07285 45.8958, 3.07269 45.89586,...",node_f8Fb3oREhTRCmTVNW7TQzE,,7200,ChatelGuyon,ChatelGuyon,0,node_b8e2FB6qm9PUQTLvQvNucR,ChatelGuyon_1,1175,...,0.3,500,1,6.0,0.5,0.5,0.5,Riom,Chatel Guyon,3.0
link_dy68NZEgkJFiakszXTBmBp,"LINESTRING (3.41873 46.12042, 3.41844 46.12023...",node_2zTroDrJuDCKmipdvjBrtL,4.0,700,Gannat,Gannat,0,node_6CErzt5T6gFonN6RkpvAM6,Gannat_1,3295,...,0.3,500,0,81.0,,7.2,5.142857,Vichy,Gannat,12.0


# Vehicle revenue KM 

In [329]:
#frequency = freq moy jour
def get_veh_kmh(col='route_id', length_col='length'):
    link = links.groupby([col, 'trip_id'])[[length_col, 'frequency_per_day', 'nb_service_hours']].agg({length_col:'sum', 'frequency_per_day': 'mean', 'nb_service_hours': 'mean'})
    link['veh_km/h'] = np.ceil(link['frequency_per_day'] * link[length_col]) / 1000 / link['nb_service_hours'] #to km/H
    return link.reset_index().groupby(col)['veh_km/h'].agg('sum').to_dict()

In [330]:
res = get_veh_kmh('route_id', 'length')
df_route_id['veh_km/h'] = res
df_route_id['capex'] = df_route_id['veh_km/h'] * default_capex

In [336]:
res = df_route_id.reset_index().groupby('route_type')['capex'].sum()
df_route_type['capex'] = res

In [None]:
#TODO : multiplier par l'amplitude horaire pour avoir une valeur journalière
#TODO : multiplier par capex pour avoir les coûts d'exploitation afférents

# Round trip time

In [338]:
def get_round_trip_time(col='route_id'):
    link = links.groupby([col,'trip_id'])[['time']].agg('sum')
    return link.reset_index().groupby(col)['time'].agg('sum').to_dict()

In [None]:
res = get_round_trip_time('route_id')

df_route_id['round trip time (s)'] = res

In [340]:
df_route_id

Unnamed: 0_level_0,route_type,veh_capacity (PAX),headway,headway_ph,headway_oph,nb_peak_hours,catchment population,frequency (veh/day),frequency (veh/hour),frequency ph (veh/hour),...,stations sequence,nodes sequence,nb stations,connexions,nb lines connected,fleet,nb_service_hours,veh_km/h,capex,round trip time (s)
route_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Gannat,bus,60,600,500.0,800.0,4.0,1078.244331,81.0,,25920.0,...,"[Gannat, Vichy]","[node_2zTroDrJuDCKmipdvjBrtL, node_6CErzt5T6gF...",2,{},0,12.0,14.0,211.815,63.5445,6590
ChatelGuyon,bus,60,600,,,,1610.494104,5.5,0.5,,...,"[Chatel Guyon, Riom, Clermont]","[node_f8Fb3oREhTRCmTVNW7TQzE, node_b8e2FB6qm9P...",3,{'bus': ['Thiers']},1,3.0,12.0,21.157583,6.347275,8310
Thiers,bus,60,600,,,,2002.746734,84.0,6.0,,...,"[Thiers, Ornon, Clermont]","[node_4b2VnuC1zTsPzd9ueovVuH, node_a3ZLvXzFnMq...",3,{'bus': ['ChatelGuyon']},1,24.0,14.0,472.488,141.7464,14176


# Export results

## Tables

In [78]:
#TODO : formater les tableaux de sortie de df_route_id ==> caractéristiques / accessibilité / réponse au besoin
# Dans caractéristiques : longueur (km), temps de parcours (min), vitesse moyenne (km/h), nombre d'arrêts, fréquence attendue (nb services / sens / jour), flotte, vehicle capacity, veh.km/jour, capex/jour, veh.km/an, capex/an
# Dans accessibilité : toutes les valeurs de catchment par item et mode d'accès
# Dans réponse au besoin : estimation du volume de flux TC desservis sans correspondance pour une PM de 20%, estimation du taux de remplissage TC sans correspondance pour une PM TC de 10%

#TODO: ajouter les tableaux globaux df_route_type (longueur totale, nombre de stations, flotte, veh.km/jour, capex/jour, veh.km/an, capex/an) et hubs

In [79]:
# round numbers
#TODO : change label catchment
# for col in ['catchment population', 'frequency (veh/hours)','length (m)','veh.km/h','round trip time (s)']:
#     df_route_id[col] = df_route_id[col].apply(lambda x :np.round(x,2))
#     df_route_id[col] = df_route_id[col].apply(lambda x :np.round(x,2))

In [80]:
#df_route_id = df_route_id.fillna('null')
#df_route_type = df_route_type.fillna('null')

In [81]:
df_route_id.to_csv(output_folder + 'route_id_metrics.csv')
# df_route_id

In [82]:
df_route_type.to_csv(output_folder + 'route_type_metrics.csv')
# df_route_type

## Geomatic outputs

Hubs

In [83]:
hubs_plot = hubs.copy()
hubs_plot['lines'] = hubs_plot['lines'].apply(lambda x: str(x).replace(',', ';').replace("'", '')[1:-1])
hubs_plot.to_csv(output_folder + 'hubs.csv')

hubs = gpd.GeoDataFrame(hubs, geometry='geometry', crs='EPSG:4326')
hubs.to_file(output_folder + 'hubs.geojson', driver='GeoJSON', engine=io_engine)

Common sections

In [84]:
# Renvoie un fichier geojson avec les tronçons en commun entre plusieurs lignes

In [85]:
# clustering de 500m pour a et b et cluster d'appartenance
coords_nodes = np.array(nodes['geometry'].apply(lambda point: (point.x, point.y)).tolist())

# Convertir 500 mètres en degrés : 111 km = 1 degré de latitude
eps_lat = clustering_radius / (111 * 1000)  # Environ 0.0045 degrés

# 1 degré de longitude dépend de la latitude
mean_latitude = np.mean(coords_nodes[:, 1])
eps_lon = clustering_radius / (111 * 1000 * np.cos(np.radians(mean_latitude)))

# Appliquer DBSCAN avec une distance euclidienne pondérée
db = DBSCAN(eps=1, min_samples=1, metric='euclidean').fit(coords_nodes / [eps_lon, eps_lat])

# Ajouter les labels de cluster au GeoDataFrame
nodes['cluster'] = db.labels_

In [86]:
nodes = nodes.reset_index()

In [87]:
links = links.merge(nodes[['index', 'cluster']], left_on='a', right_on='index', how='left').drop(columns='index').rename(columns={'cluster': 'a_clustered'})
links = links.merge(nodes[['index', 'cluster']], left_on='b', right_on='index', how='left').drop(columns='index').rename(columns={'cluster': 'b_clustered'})

In [88]:
l_troncons = links.groupby(['a_clustered', 'b_clustered'])['route_id'].agg(list).reset_index()
l_troncons['nb_lines'] = l_troncons['route_id'].apply(lambda x: len(x))

In [89]:
l_troncons_communs = l_troncons[l_troncons.nb_lines > 1.]

In [90]:
l_troncons_communs['geometry'] = l_troncons_communs.apply(
    lambda row : links[(links.a_clustered == row['a_clustered']) & (links.b_clustered == row['b_clustered'])].drop_duplicates(subset=['a_clustered', 'b_clustered'], keep='first').geometry.values[0],
    axis=1
    )
l_troncons_communs['stations_a'] = l_troncons_communs['a_clustered'].apply(lambda x: list(nodes[nodes.cluster == x]['stop_name'].unique()))
l_troncons_communs['stations_b'] = l_troncons_communs['b_clustered'].apply(lambda x: list(nodes[nodes.cluster == x]['stop_name'].unique()))
l_troncons_communs = gpd.GeoDataFrame(l_troncons_communs[['stations_a', 'stations_b', 'route_id', 'nb_lines', 'geometry']], geometry='geometry', crs='EPSG:4326')

In [91]:
l_troncons_communs.to_file(output_folder + 'pt_common_sections.geojson', driver='GeoJSON', engine=io_engine)

Nodes catchment

In [92]:
#TODO pcq c'est visuel

In [93]:
# # Using get catchment : get the catchment radius of each node (get larger one if used by many modes)
# link = links.groupby('route_type')[['a', 'b', 'route_type']].agg({'a': set, 'b': set, 'route_type': 'first'})
# link['node'] = link.apply(lambda row: row['a'].union(row['b']), axis=1)
# link = link.drop(columns=['a','b'])
# # add catchment radius for the route_type
# link['catchment_radius'] = link['route_type'].apply(lambda x: catchment_radius.get(x,default_catchment_radius))
# link = link.explode('node').reset_index(drop=True)
# link = link.sort_values('catchment_radius',ascending=False).drop_duplicates('node',keep='first')
# link = node_dist.merge(link, left_on='node_index', right_on='node')
# link = link[link['distances'] <= link['catchment_radius']]

# temp_dict = link.groupby('node_index')['population'].sum().to_dict()
# nodes['catchment'] = nodes.index.map(temp_dict.get)

# temp_dict = link.groupby('node_index')['catchment_radius'].agg('first').to_dict() 
# nodes['catchment_radius'] = nodes.index.map(temp_dict.get)

# nodes.to_file(output_folder + 'nodes.geojson', driver='GeoJSON', engine=io_engine)

## Graphs and pictures

In [94]:
# plot = df_route_type.reset_index().plot(kind='bar', x='route_type', y='catchment', color='#559bb4', rot=0, figsize=[10, 5])
# plot.set_title('Couverture population par mode')
# plot.set_ylabel('')
# plot.set_xlabel("route_type")
# plot.legend([])