In [111]:
import sys
import json

catchment_radius={'bus':'500','subway':'800','rail':'1000'}

params = {'catchment_radius':catchment_radius}
         
default = {'training_folder': '../../scenarios/base', 'params':params} # Default execution parameters
manual, argv = (True, default) if 'ipykernel' in sys.argv[0] else (False, dict(default, **json.loads(sys.argv[1])))
print(argv)


{'training_folder': '../../scenarios/base', 'params': {'catchment_radius': {'bus': '500', 'subway': '800', 'rail': '1000'}}}


io (read): 10 secs <br>
mesh: 5-9 secs <br>
acf_dist : 6-9 secs <br>
metrics: 1.2 secs<br>
<br>
tot: 22-30 seconds


In [112]:
import os
import time
import geopandas as gpd
import pandas as pd
sys.path.insert(0, r'../../../quetzal') # Add path to quetzal
import numpy as np
import random
import matplotlib.pyplot as plt
from shapely.geometry import Point, LineString
from syspy.spatial.spatial import add_geometry_coordinates, nearest
from sklearn.neighbors import NearestNeighbors
from typing import Literal
from numba import jit, njit
import numba as nb
#num_cores = 1

In [113]:
# from quetzal_cyclops
def get_epsg(lat: float, lon: float) -> int:
    '''
    lat, lon or y, x
    return EPSG in meter for a given (lat,lon)
    lat is north south 
    lon is est west
    '''
    return int(32700 - round((45 + lat) / 90, 0) * 100 + round((183 + lon) / 6, 0))

# from quetzal_cyclops
def zones_nearest_node(zones,nodes,drop_duplicates=False):
    # getting zones centroids
    centroid = zones.copy()
    centroid['geometry'] = centroid.centroid
    # finding nearest node
    neigh = nearest(centroid, nodes, n_neighbors=1).rename(columns={'ix_one': 'zone_index', 'ix_many': 'node_index'})
    zone_node_dict = neigh.set_index('zone_index')['node_index'].to_dict()
    centroid['node_index'] = centroid.index.map(zone_node_dict.get)
    #print('max_distance found: ', neigh['distance'].max())
    # check for duplicated nodes. if there is. drop the duplicated zones.
    if drop_duplicates:
        if len(centroid.drop_duplicates('node_index')) != len(centroid):
            print('there is zones associates to the same road_node')
            # duplicated = centroid[centroid['node_index'].duplicated()]['node_index'].values
            print('dropping zones: ')
            print(centroid[centroid['node_index'].duplicated()].index.values)
            centroid = centroid.drop_duplicates('node_index')
    return centroid



In [114]:
from quetzal.engine.pathfinder_utils import simple_routing,sparse_matrix
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import dijkstra

@jit(nopython=True)
def _unstack(mat):
    # return non inf values in mat as [[row,col,val],[row,col,val]]. so, [o,d,val].
    # pd.DataFrame of this gives us [origin, destination, value] as columns
    row, col = np.where(np.isfinite(mat))
    res = np.zeros((len(row),3))
    for it in nb.prange(len(col)):
        i=row[it]
        j=col[it]
        d=mat[i,j]
        res[it]=[i,j,d]
    return res

def routing(origin, destination, links, weight_col='time', dijkstra_limit=np.inf):
    mat, node_index = sparse_matrix(links[['a', 'b', weight_col]].values)
    index_node = {v: k for k, v in node_index.items()}
    # liste des origines pour le dijkstra
    origin_sparse = [node_index[x] for x in origin]
    origin_dict =  {i:val for i,val in enumerate(origin_sparse)}
    # list des destinations 
    destination_sparse = [node_index[x] for x in destination]
    destination_dict =  {i:val for i,val in enumerate(destination_sparse)}
    # dijktra on the road network from node = incices to every other nodes.
    # from b to a.
    dist_matrix = dijkstra(
        csgraph=mat,
        directed=True,
        indices=origin_sparse,
        return_predecessors=False,
        limit=dijkstra_limit
    )
    # remove non-used destination
    dist_matrix = dist_matrix[:,destination_sparse]
    # unstack amtrix
    dist_matrix = pd.DataFrame(_unstack(dist_matrix),columns=['origin', 'destination', weight_col])
    # rename origin and destination with original indexes.
    dist_matrix['origin'] = dist_matrix['origin'].apply(lambda x: index_node.get(origin_dict.get(x)))
    dist_matrix['destination'] = dist_matrix['destination'].apply(lambda x: index_node.get(destination_dict.get(x)))
    return dist_matrix

In [115]:
def get_catchment_dist(link: gpd.GeoDataFrame, catchment_radius: dict, default: float=500):
    route_type = link['route_type'].unique()
    if len(route_type)>1:
        print('multiple route type for a single route_id.. using first one for catchment radius')
    route_type = route_type[0]
    return catchment_radius.get(route_type, default)


def nearest_radius(one, many, radius=100, geometry=False):
    try:
        # Assert df_many.index.is_unique
        assert one.index.is_unique
        assert many.index.is_unique
    except AssertionError:
        msg = 'Index of one or many should not contain duplicates'
        print(msg)
        warnings.warn(msg)
    many = add_geometry_coordinates(many, columns=['x_geometry', 'y_geometry'], add_centroids=False)
    one = add_geometry_coordinates(one, columns=['x_geometry', 'y_geometry'], add_centroids=False)
    
    x = many[['x_geometry', 'y_geometry']].values
    # Fit Nearest neighbors model
    #nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm='ball_tree').fit(x)
    nbrs = NearestNeighbors(radius=radius,algorithm='ball_tree').fit(x)


    # x = df_many[['x_geometry','y_geometry']].values
    y = one[['x_geometry', 'y_geometry']].values

    #distances, indices = nbrs.kneighbors(y,return_distance=True)
    distances, indices = nbrs.radius_neighbors(y, radius = radius, return_distance=True)

    indices = pd.DataFrame(indices)
    indices = pd.DataFrame(indices.stack(), columns=['index_nn']).reset_index().rename(
        columns={'level_0': 'ix_one', 'level_1': 'rank'}
    )
    indices['distances'] = distances
    return indices

def create_mesh(zones: gpd.GeoDataFrame ,step: float = 0.01) -> gpd.GeoDataFrame:
    '''
    create a mesh in the zones total bbox at every step (in the units of the zones crs)
    step: degree if crs=4326, else meters. 0.01 deg ~ 1km
    '''
    x_max, y_max = zones.bounds.max()[['maxx','maxy']].values
    x_min, y_min = zones.bounds.min()[['minx','miny']].values

    points = []
    x = x_min
    while x<x_max:
        y = y_min
        while y<y_max:
            points.append(Point(x,y))
            y += step
        x += step
    points = gpd.GeoDataFrame(geometry=points,crs=zones.crs)
    points.index.name='index'
    return points

# https://stackoverflow.com/questions/36399381/whats-the-fastest-way-of-checking-if-a-point-is-inside-a-polygon-in-python

@jit(nopython=True)
def fast_point_in_polygon(x: float, y: float , poly: np.ndarray) -> bool:
    n = len(poly)
    inside = False
    p2x = 0.0
    p2y = 0.0
    xints = 0.0
    p1x,p1y = poly[0]
    for i in nb.prange(n+1):
        p2x,p2y = poly[i % n]
        if y > min(p1y,p2y):
            if y <= max(p1y,p2y):
                if x <= max(p1x,p2x):
                    if p1y != p2y:
                        xints = (y-p1y)*(p2x-p1x)/(p2y-p1y)+p1x
                    if p1x == p2x or x <= xints:
                        inside = not inside
        p1x,p1y = p2x,p2y
        
    return inside


@njit(parallel=True)
def fast_points_in_polygon(points:np.ndarray, polygon:np.ndarray) -> np.ndarray:
    D = np.empty(len(points), dtype=nb.boolean) 
    for i in nb.prange(0, len(points)):
        D[i] = fast_point_in_polygon(points[i,0], points[i,1], polygon)
    return np.where(D)[0]

def points_in_polygon(points:np.ndarray, polygon:gpd.GeoDataFrame) -> np.ndarray:
    '''
    return a list of point in the polygon. values are the index in the points array.
    
    points:np.array[np.array[float,float]]
        list of all the points coords (x,y)
    polygon: gpd.GeoDataFrame
        geodataframe of multiples polygons.
    '''
    polygon = np.array([*polygon.exterior.coords])
    return fast_points_in_polygon(points,polygon)

def population_to_mesh(population: gpd.GeoDataFrame,
                       mesh: gpd.GeoDataFrame = None,
                       step: float = 0.01,
                       col: str = 'population', 
                       fill_missing: Literal['centroid', 'nearest', None] = 'centroid') ->  gpd.GeoDataFrame:
    '''
    create a mesh in the zones total bbox at every step (in the units of the zones crs)
    and assign the population to each node equaly (if 2 node in a zone. they have each 50% of the population)
    population:
        geodataframe with total population by zones ans zones geomerty
     mesh:
        road nodes for example. if None. it will be created with equal step (variable step.)
    step: 
        if mesh is None, Distance between each point degree if crs=4326, else meters. 0.01 deg ~ 1km
    col:
        column name with data to aggregation (population)
    fill_missing: 'centroid', 'nearest', or None
        centroid: zones centroid with no mesh node inside will be added to the mesh
        nearest: zones population with no mesh point inside will be added to the nearest mesh point.
    '''
    import warnings
    warnings.filterwarnings('ignore')
    population=population.copy()
    if population.index.name != 'index':
        population.index.name = 'index'
    # use existing mesh (points .geosjon) or create one.
    if mesh is not None:
        # we need numerical indexes. also,
        # new nodes will be added (new index) for zones with no points inside.
        points = mesh.copy()
        points = points.reset_index(names='node_index')
        points.index.name='index'
    else:
        points = create_mesh(population, step=step)
        
    points_coords = np.array([point.coords[0] for point in points['geometry'].values])
    
    population['nodes'] = population['geometry'].apply(lambda x: points_in_polygon(points_coords,x))
    
    nodes = population.reset_index()[['index','nodes',col]].copy()
    nodes = nodes.explode('nodes').dropna()
    print(len(nodes[nodes['nodes'].duplicated()]),'nodes in multiple zones. will be match to a single zone.')
    
    
    zone_index_dict = nodes.set_index('nodes')['index'].to_dict()
    points['zone'] = points.index.map(zone_index_dict)

    pop_dict = nodes.set_index('nodes')[col].to_dict()
    points[col] = points.index.map(pop_dict)
    points = points.dropna()
    
    # get number of points per zones. divide population equaly between each points
    len_dict = points.groupby('zone')[col].agg(len).to_dict()
    points['num_points'] = points['zone'].apply(lambda x:len_dict.get(x))
    points[col] = points[col] / points['num_points']
    points = points.drop(columns = ['num_points'])
    
    print(len(population) - len(points['zone'].unique()),'unfounded zones')
    
    zones_list = points['zone'].unique()
    unfounded_zones = population.loc[~population.index.isin(zones_list)][['geometry',col]]
    if fill_missing == 'centroid':
        print('Unfound zones centroid will be added to mesh')
        # append unfounded zones centroids as in mesh
        unfounded_zones['geometry'] = unfounded_zones.centroid
        unfounded_zones = unfounded_zones.reset_index().rename(columns={'index':'zone'})
        points = pd.concat([points,unfounded_zones]).reset_index(drop=True)
        points.index.name='index'
    elif fill_missing == 'nearest':
        print('unfound zone will be added to nearest mesh node. zone_index will be lost')
        unfounded_zones = zones_nearest_node(unfounded_zones,points)
        pop_to_append = unfounded_zones.groupby('node_index')[[col]].sum()

        points = points.merge(pop_to_append,left_index=True,right_index=True,how='left')
        points[col+'_y'] = points[col+'_y'].fillna(0)

        points[col] = points[col+'_x'] + points[col+'_y']
        points = points.drop(columns=[col+'_x', col+'_y'])
    else:
        pass
    
    
    points.index.name='index'
    
    return points

In [116]:
def get_acf_distances(nodes: gpd.GeoDataFrame, 
                      mesh: gpd.GeoDataFrame, 
                      crs:int,
                      max_dist: float = 3000) -> gpd.GeoDataFrame:
    '''
    with nearest kneibor in a radius.
    for pt node in nodes, get all mesh nodes in a distance < max_dist
    
    return gpd.Geodateframe with [node_index, mesh_index, distances, population]
    '''

    node_dist = nearest_radius(nodes.to_crs(crs), mesh.to_crs(crs), radius=max_dist)
    node_dist = node_dist.rename(columns={'ix_one': 'node_index','index_nn':'mesh_index'}).drop(columns='rank')

    nodes_index_dict = nodes.reset_index()['index'].to_dict()
    node_dist['node_index'] = node_dist['node_index'].apply(lambda x: nodes_index_dict.get(x))

    node_dist = node_dist.explode(['mesh_index','distances'])
    population_dict = mesh['population'].to_dict()
    node_dist['population'] = node_dist['mesh_index'].apply(lambda x: population_dict.get(x))
    return node_dist

def get_routing_distances(nodes: gpd.GeoDataFrame, 
                         rnodes: gpd.GeoDataFrame, 
                         rlinks: gpd.GeoDataFrame, 
                         mesh: gpd.GeoDataFrame, 
                         weight_col:str = 'length', 
                         dijkstra_limit: float = np.inf) -> gpd.GeoDataFrame:
    '''
    with dijktra on road network.
    for pt node in nodes, get all mesh nodes in a distance < max_dist. can be change with weight_col
    ex: weight_col = 'time', and dijkstra_limit = 120secs
    
    return gpd.Geodateframe with [node_index, mesh_index, distances, population]
    '''

    # transform PT nodes to nearest road nodes
    node_to_rnode_df = zones_nearest_node(nodes,rnodes)[['node_index']]

    node_rnodes_dict = node_to_rnode_df['node_index'].to_dict()
    rnodes_node_dict = node_to_rnode_df.reset_index().groupby('node_index').agg(list)['index'].to_dict()

    # there may be multiples nodes pointing to the same rnode. so rnodes_node_dict values are lists.
    # need to added them back at the end when we go from rnode to nodes
    origins = list(set(node_rnodes_dict.values()))
    destinations = mesh['node_index'].values
    mat = routing(origins, destinations, rlinks, weight_col=weight_col, dijkstra_limit=dijkstra_limit)

    mat = mat.merge(mesh.reset_index()[['index','node_index','population']],left_on='destination',right_on='node_index',how='left')
    mat = mat.drop(columns=['destination','node_index']).rename(columns={'index':'mesh_index'})

    mat['origin'] = mat['origin'].apply(lambda x: rnodes_node_dict.get(x))
    mat = mat.explode('origin')
    mat = mat.rename(columns={'origin':'node_index', weight_col:'distances'})
    return mat

In [117]:
base_folder = argv['training_folder']
pt_folder = base_folder + '/inputs/pt/'
road_folder = base_folder + '/inputs/road/'
input_folder = base_folder +'/inputs/'
od_folder = base_folder + '/inputs/od/'
output_folder = base_folder +'/outputs/'
print(pt_folder)
if not os.path.exists(output_folder):
    os.makedirs(output_folder)


../../scenarios/base/inputs/pt/


In [120]:
catchment_radius = argv['params' ]['catchment_radius']
catchment_radius = {k:float(v) for k,v in catchment_radius.items()}
default_catchment_radius = 500

# inputs

In [66]:
#cst_incline = argv['params' ]['constant']['cst_incline']
#cst_road = argv['params']['road_weight']
#cst_shared = argv['params']['shared_cycleway_weight']

In [67]:
links = gpd.read_file(pt_folder + 'links.geojson') 
nodes = gpd.read_file(pt_folder + 'nodes.geojson')
links = links.set_index('index')
nodes = nodes.set_index('index')

In [68]:
population = gpd.read_file(input_folder + 'population.geojson')
if 'index' in population.columns:
    population = population.set_index('index')
else:
    population.index.name='index'
assert 'density' in population.columns, 'need density column. in km2'

In [69]:
rnodes_file = road_folder + 'road_nodes.geojson'
rnodes_file_provided = os.path.isfile(rnodes_file)
if rnodes_file_provided:
    rnodes = gpd.read_file(road_folder + 'road_nodes.geojson')
    rnodes = rnodes.set_index('index')
    rlinks = gpd.read_file(road_folder + 'road_links.geojson')
    rlinks = rlinks.set_index('index')
print('rnodes?',rnodes_file_provided)

rnodes? False


In [70]:
od_file = od_folder + 'od.geojson'
od_file_provided = os.path.isfile(od_file)
if od_file_provided:
    od_test = gpd.read_file(od_folder + 'od.geojson')
    if 'name' not in od_test.columns:
        od_test['name'] = od_test['index']
    od_test['name'] = od_test['name'].fillna(od_test['index'].astype(str))

# population preapation

In [71]:
# find meters CRS
centroid = [*LineString(population.centroid.values).centroid.coords][0]
crs = get_epsg(centroid[1],centroid[0])
crs

32618

In [72]:
population['area (km2)'] = population.to_crs(crs).area*1e-6
population['area (km2)'].sum()

11574.507300457291

In [73]:
population['population'] = population['density']*population['area (km2)']
population['population'].sum()

4884999.000000022

# population mesh

In [74]:
if rnodes_file_provided:
    # use rnodes as mesh.
    print('using road_nodes')
    mesh = population_to_mesh(population, mesh=rnodes, step=0.005, col='population', fill_missing='nearest')
else:
    # create a mesh
    #0.01 = 1km 0.005 = 500m
    mesh = population_to_mesh(population, step=0.005, col = 'population', fill_missing='centroid')


7458 nodes in multiple zones. will be match to a single zone.
146 unfounded zones
Unfound zones centroid will be added to mesh


In [75]:
len(mesh)

46153

In [76]:
mesh.to_file(output_folder + 'population_mesh.geojson',driver='GeoJSON')

# catchment

In [77]:
# find TC nodes to mesh distance

In [78]:
max_dist = max(max(catchment_radius.values()),default_catchment_radius)

In [79]:
if rnodes_file_provided:
    print('using road_nodes')
    node_dist = get_routing_distances(nodes, rnodes, rlinks, mesh, 'length', max_dist)
else:
    node_dist = get_acf_distances(nodes, mesh, crs, max_dist)

# metrics

In [80]:
print('num route_id:',len(links['route_id'].unique()))
print('num route_type:',len(links['route_type'].unique()))

num route_id: 495
num route_type: 3


In [81]:
#init results dfs
df_route_id = pd.DataFrame(index=links['route_id'].unique())
df_route_id.index.name='route_id'

df_route_type = pd.DataFrame(index=links['route_type'].unique())
df_route_type.index.name='route_type'

In [82]:

def get_catchment(col='route_id'):
    #get all nodes with col filter
    link = links.groupby(col)[['a','b','route_type']].agg({'a':set,'b':set,'route_type':'first'})
    link['node'] = link.apply(lambda row: row['a'].union(row['b']), axis=1)
    link = link.drop(columns=['a','b'])
    # add catchment radius for the route_type
    link['catchment_radius'] = link['route_type'].apply(lambda x: catchment_radius.get(x,default_catchment_radius))

    col_exist = col == 'route_type' # cannot explode if index == route_type (a column)
    link = link.explode('node').reset_index(drop=col_exist)
    link = node_dist.merge(link, left_on='node_index', right_on='node')
    #filter by distance
    link = link[link['distances'] <= link['catchment_radius']]
    #drop duplicated mesh nodes (we count only one time)
    link = link.drop_duplicates(subset=['mesh_index',col],keep='first')

    return link.groupby(col)['population'].sum().to_dict()
    

In [83]:
res = get_catchment('route_id')

df_route_id['catchment'] = res
print(sum([item for key,item in res.items()]))


18178349.199833468


In [84]:
res = get_catchment('route_type')

df_route_type['catchment'] = res
print(sum([item for key,item in res.items()]))

4418728.561923779


In [85]:
if False:
    links['network']=True
    res=[]
    dists = [0,1,10,20,50,100,250,500,800,1000]
    for dist in dists:
        col='network'
        link = links.groupby(col)[['a','b','route_type']].agg({'a':set,'b':set,'route_type':'first'})
        link['node'] = link.apply(lambda row: row['a'].union(row['b']), axis=1)
        link = link.drop(columns=['a','b'])
        # add catchment radius for the route_type
        link['catchment_radius'] =dist

        col_exist = col == 'route_type' # cannot explode if index == route_type (a column)
        link = link.explode('node').reset_index(drop=col_exist)
        link = node_dist.merge(link, left_on='node_index', right_on='node')
        #filter by distance
        link = link[link['distances'] <= link['catchment_radius']]
        #drop duplicated mesh nodes (we count only one time)
        link = link.drop_duplicates(subset=['mesh_index',col],keep='first')
        volume = link['population'].sum()
        res.append(volume)
    plt.plot(dists,res)

# frequency

In [86]:
links['frequency'] = 1/links['headway']

In [87]:
res = (links.groupby('route_id')['frequency'].agg(np.nanmean)*3600).to_dict()

df_route_id['frequency (veh/hours)'] = res
print(np.nansum([item for key,item in res.items()]))

1179.0267365679042


In [88]:
res = (links.groupby('route_type')['frequency'].agg(np.nanmean)*3600).to_dict()

df_route_type['frequency (veh/hours)'] = res
print(sum([item for key,item in res.items()]))

20.888085455759082


In [89]:
link = (links.groupby(['route_id','trip_id'])[['frequency']].agg(np.nanmean)*3600)
res = link.reset_index().set_index('route_id')['frequency'].to_dict()
print(np.nansum([item for key,item in res.items()]))

975.8001839437252


In [90]:
link = (links.groupby(['route_type','trip_id'])[['frequency']].agg(np.nanmean)*3600)
res = link.reset_index().set_index('route_type')['frequency'].to_dict()
print(np.nansum([item for key,item in res.items()]))

16.018955285524857


# operational Fleet

In [91]:
def get_fleet(col='route_id'):
    link = links.groupby([col,'trip_id'])[['time','frequency']].agg({'time':np.nansum,'frequency':np.nanmean})
    link['fleet'] = np.ceil(link['frequency'] * link['time'])
    return link.reset_index().groupby(col)['fleet'].agg(np.nansum).to_dict()
    

In [92]:
res = get_fleet('route_id')

df_route_id['fleet'] = res
print(sum([item for key,item in res.items()]))

1623.0


In [93]:
res = get_fleet('route_type')

df_route_type['fleet'] = res
print(sum([item for key,item in res.items()]))

1623.0


# Line Length

In [94]:
def get_length(col='route_id'):
    link = links.groupby([col,'trip_id'])[[length_col]].agg(np.nansum)
    return link.reset_index().groupby(col)[length_col].agg(np.nansum).to_dict()
    

In [95]:
# rpeparation. if legnth is NaN, or if shsape dist travel exist.

length_col = None
if 'shape_dist_traveled' in links.columns and length_col == None:
    if len(links[links['shape_dist_traveled'].isnull()])==0:
        length_col = 'shape_dist_traveled'
if 'length' in links.columns and length_col == None:
    if len(links[links['length'].isnull()])==0:
        length_col = 'length'

if length_col == None:
    print('create length from geometry')
    links['length'] = links.to_crs(crs).length
    length_col = 'length'


In [96]:
res = get_length('route_id')

df_route_id['length (m)'] = res
print(sum([item for key,item in res.items()]))

15622877.232517038


In [97]:
res = get_length('route_type')

df_route_type['length (m)'] = res
print(sum([item for key,item in res.items()]))

15622877.232517032


# Number of station per line

In [98]:
# o-->o-->o-->o and  o<--o<--o<--o
# est-ce que j'ai 8 ou 4 stations ?
# j'ai 4 stations par trip et 4 stations par route (si c'est les memes).
# comment savoir si cest les memes. clustering?
# pour linstant. on prend tous les noeds unique par route_id ou route_type (col='route_id', route_id)
def get_num_station(col='route_id'):
    link = links.groupby(col)[['a','b']].agg({'a':set,'b':set})
    link['node_len'] = link.apply(lambda row: len(row['a'].union(row['b'])), axis=1)
    return link['node_len'].to_dict()


In [99]:
res = get_num_station('route_id')

df_route_id['num station'] = res
print(sum([item for key,item in res.items()]))

41517


In [100]:
res = get_num_station('route_type')

df_route_type['num station'] = res
print(sum([item for key,item in res.items()]))

32871


# Vehicle revenue KM 

In [101]:
def get_veh_kmh(col='route_id'):
    link = links.groupby([col,'trip_id'])[[length_col,'frequency']].agg({length_col:np.nansum,'frequency':np.nanmean})
    link['veh_km/h'] = np.ceil(link['frequency'] * link[length_col]) * 3600/1000 #to km/H
    return link.reset_index().groupby(col)['veh_km/h'].agg(np.nansum).to_dict()

In [102]:
res = get_veh_kmh('route_id')

df_route_id['veh.km/h'] = res
print(sum([item for key,item in res.items()]))

30308.399999999998


In [103]:
res = get_veh_kmh('route_type')

df_route_type['veh.km/h'] = res
print(sum([item for key,item in res.items()]))

30308.399999999998


# Round trip time

In [104]:
def get_round_trip_time(col='route_id'):
    link = links.groupby([col,'trip_id'])[['time']].agg(np.nansum)
    return link.reset_index().groupby(col)['time'].agg(np.nansum).to_dict()
    

In [105]:
res = get_round_trip_time('route_id')

df_route_id['round trip time (s)'] = res
print(sum([item for key,item in res.items()]))

2035849.0


In [106]:
df_route_id.to_csv(output_folder+'route_id_metrics.csv')
df_route_id

Unnamed: 0_level_0,catchment,frequency (veh/hours),fleet,length (m),num station,veh.km/h,round trip time (s)
route_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
OMITSJU_350,2789.790909,2.514685,2.0,42180.0,4,111.6,2460.0
OMITSJU_330,7231.541126,0.670391,1.0,28995.0,8,21.6,2760.0
OMITSJU_325,7592.053030,1.005587,1.0,53450.0,10,28.8,5040.0
OMITSJU_600,6968.436364,3.128492,4.0,71398.0,5,194.4,4500.0
OMITSJU_3,12851.074516,2.682563,2.0,9871.0,42,28.8,1440.0
...,...,...,...,...,...,...,...
CITROUS_38,9115.812500,2.011173,1.0,7505.0,28,18.0,960.0
CITROUS_40,14626.619318,2.682563,1.0,9467.0,37,28.8,1320.0
CITROUS_43,21376.767045,,0.0,16744.0,61,0.0,2280.0
CITROUS_210,7895.450000,2.011173,3.0,48139.0,5,97.2,3600.0


In [107]:
df_route_type.to_csv(output_folder+'route_type_metrics.csv')
df_route_type

Unnamed: 0_level_0,catchment,frequency (veh/hours),fleet,length (m),num station,veh.km/h
route_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
bus,3245550.0,2.508795,1545.0,14885480.0,32527,27802.8
subway,659274.9,16.763005,63.0,147313.2,174,2062.8
rail,513903.5,1.616285,15.0,590083.0,170,442.8


# geomatic outputs

In [108]:
#using get catchment. get the catchment radius of each node (get larger one if used by many mode.)
link = links.groupby('route_type')[['a','b','route_type']].agg({'a':set,'b':set,'route_type':'first'})
link['node'] = link.apply(lambda row: row['a'].union(row['b']), axis=1)
link = link.drop(columns=['a','b'])
# add catchment radius for the route_type
link['catchment_radius'] = link['route_type'].apply(lambda x: catchment_radius.get(x,default_catchment_radius))
link = link.explode('node').reset_index(drop=True)
link = link.sort_values('catchment_radius',ascending=False).drop_duplicates('node',keep='first')
link = node_dist.merge(link, left_on='node_index', right_on='node')
link = link[link['distances'] <= link['catchment_radius']]

temp_dict = link.groupby('node_index')['population'].sum().to_dict()
nodes['catchment'] = nodes.index.map(temp_dict.get)

temp_dict = link.groupby('node_index')['catchment_radius'].agg('first').to_dict() 
nodes['catchment_radius'] = nodes.index.map(temp_dict.get)


In [109]:
nodes.to_file(output_folder+'nodes.geojson',driver='GeoJSON')

# test