In [89]:
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely import geometry
from quetzal.model import stepmodel
from quetzal.engine import engine, connectivity

# Preparation of the transport network.
## Saves access and egress links for each zone.
## Needs all networks.


In [2]:
input_path = '../input/'
output_path = '../output/'
model_path = '../model/'

In [3]:
sm = stepmodel.read_json(model_path + 'de_pt_network')
bus = stepmodel.read_json(model_path + 'de_pt_network_bus')
road = stepmodel.read_json(model_path + 'de_road_network')

### Clean zones

In [4]:
# Drop unused zones for later steps to work 
print(len(sm.zones.index))
sm.zones = sm.zones.loc[sm.zones['NUTS_ID'].str.startswith('DE')]
sm.zones = sm.zones[sm.zones["LEVL_CODE"]==3]
print(len(sm.zones.index))

401
401


In [5]:
# Compute controids
sm.preparation_ntlegs(
    zone_to_transit=False,
    zone_to_road=False)

## Assumptions

In [6]:
# Walking vs. driving to the train station
threshold = 500 # in m
# Average speeds from MiD calibration data
speed_non_motorised_modes = 17 # in km/h
speed_car = 44 # in km/h
speed_footpaths = 5 # in km/h, assumed
# Max. distances for accessing the transport nodes
# in m
threshold_dict = {'road': 10000,
                  'road_pt': 500,
                  'pt': 12000,
                  'footpaths': 500,
                  'footpaths_air': 2200,
                  'cycling_zones': 20000}

## Generate footpaths table

In [7]:
sm.nodes.shape

(14990, 3)

In [9]:
# The Voronoi algorithm is too slow for this amount of nodes
#sm.footpaths = connectivity.build_footpaths(
#    sm.nodes,
#    speed=speed_footpaths,
#    max_length=threshold_dict['footpaths'],
#    n_clusters=1000,
#    coordinates_unit=sm.coordinates_unit)

In [8]:
# Generate footpaths between modes
sm.footpaths = sm.footpaths.iloc[0:0]
for o in ['rail_short_distance']:
    for d in [t for t in sm.pt_route_types if t!='bus']:
        if o != d:
            ntlegs = engine.ntlegs_from_centroids_and_nodes(
                sm.nodes.loc[sm.nodes['route_type']==o],
                sm.nodes.loc[sm.nodes['route_type']==d],
                short_leg_speed=speed_footpaths,
                long_leg_speed=speed_non_motorised_modes,
                threshold=threshold,
                n_neighbors=1,
                coordinates_unit=sm.coordinates_unit)
            if d == 'air':
                ntlegs = ntlegs.loc[ntlegs['distance']<=
                                    threshold_dict['footpaths_air']]
            else:
                ntlegs = ntlegs.loc[ntlegs['distance']<=
                                    threshold_dict['footpaths']]
            sm.footpaths = sm.footpaths.append(ntlegs)
len(sm.footpaths.index)

6632

In [9]:
# Add bus connections
for d in [t for t in sm.pt_route_types if t!='bus']:
    ntlegs = engine.ntlegs_from_centroids_and_nodes(
        bus.nodes,
        sm.nodes.loc[sm.nodes['route_type']==d],
        short_leg_speed=speed_footpaths,
        long_leg_speed=speed_non_motorised_modes,
        threshold=threshold,
        n_neighbors=1,
        coordinates_unit=sm.coordinates_unit)
    if d == 'air':
        ntlegs = ntlegs.loc[ntlegs['distance']<=
                            threshold_dict['footpaths_air']]
    else:
        ntlegs = ntlegs.loc[ntlegs['distance']<=
                            threshold_dict['footpaths']]
    sm.footpaths = sm.footpaths.append(ntlegs)
len(sm.footpaths.index)

104830

In [10]:
# Number of nodes that overlay each other
sm.footpaths.loc[sm.footpaths['distance']==0].shape

(3700, 11)

In [11]:
# Keep only one footpath to stops of the same trip
# List of footpath nodes
node_list = list(sm.footpaths[['a', 'b']].stack())
n_foot_dict = sm.footpaths[['a', 'b']].stack().value_counts().to_dict()
# Reduce list by keeping the most connected stops of each trip
links = sm.links.append(bus.links)
links = links.loc[
    (links['a'].isin(node_list)) | (links['b'].isin(node_list))]
links['n_foot'] = links['a'].map(n_foot_dict) + links['b'].map(n_foot_dict)
node_list = set(list(links.sort_values('n_foot').drop_duplicates(
    'trip_id')[['a', 'b']].stack()))

In [12]:
len(node_list)

56521

In [13]:
# Restrict footpaths to one to two connections between stops of every two trip_ids
sm.footpaths = sm.footpaths.loc[(sm.footpaths['a'].isin(node_list)) |
                                (sm.footpaths['b'].isin(node_list))]
sm.footpaths.shape

(97232, 11)

In [14]:
# Generate footpaths between centroids
ntlegs = engine.ntlegs_from_centroids_and_nodes(
    sm.centroids,
    sm.centroids,
    short_leg_speed=speed_non_motorised_modes,
    long_leg_speed=speed_non_motorised_modes,
    threshold=threshold,
    n_neighbors=2,
    coordinates_unit=sm.coordinates_unit)
ntlegs = ntlegs.loc[ntlegs['distance']<=
                    threshold_dict['cycling_zones']]
ntlegs = ntlegs.loc[ntlegs['distance']!=0]
ntlegs.drop_duplicates(['direction', 'distance', 'time'], inplace=True)
sm.footpaths = sm.footpaths.append(ntlegs)
ntlegs.shape

(228, 11)

In [15]:
# Reindex
sm.footpaths.reset_index(drop=True, inplace=True)
sm.footpaths.index = 'foot_' + pd.Series(sm.footpaths.index).astype(str)

In [16]:
sm.footpaths.sample(n=3)

Unnamed: 0,a,b,direction,distance,geometry,long_leg_speed,rank,short_leg_speed,speed,speed_factor,time
foot_13057,rail_long_node_113,bus_n_383802,eggress,306.050315,"LINESTRING (8.09268 50.38083, 8.09611 50.38250)",17.0,0.0,5.0,5.0,0.782369,220.356227
foot_3590,rail_short_node_6947,coach_node_FLIXBUS:291,access,254.308681,"LINESTRING (8.25872 50.00112, 8.25773 50.00331)",17.0,0.0,5.0,5.0,0.713174,183.10225
foot_56361,rail_short_node_11069,bus_n_243284,eggress,198.991159,"LINESTRING (7.26032 49.65442, 7.26242 49.65559)",17.0,0.0,5.0,5.0,0.630858,143.273634


## Add access and egress links from zone centroids

In [90]:
# When having centroids computed as geometric centers of
# political zones, it can happen that centroids come very
# close to each other. This requires special filtering
# of connectors for not making routes unrealistically short
def filter_connectors(df, keep=2, length_weight=None, dist_centr_weight=None,
                      conn_weight=None, conn_dict=None):
    # Each link exists as both access and egress
    ac = df.loc[df['direction']=='access']
    eg = df.loc[df['direction']=='eggress']
    analysis_cols = []
    if length_weight:
        # Link length (in km) weighted
        ac['length'] = - np.power(ac['distance'] / 1e3, length_weight)
        eg['length'] = - np.power(eg['distance'] / 1e3, length_weight)
        analysis_cols.append('length')
    if dist_centr_weight:
        # Calculate distance to the nearest centroid (except own one)
        ac['centr_dist'] = np.power([min([p.distance(c) for c in list(
            sm.centroids.loc[sm.centroids['NUTS_ID']!=own_c, 'geometry'])]
                                        ) / 1.4e-2 for own_c, p in zip(
            list(ac['a']), [geometry.Point(l.coords[-1]) for l in ac['geometry']])],
                                    dist_centr_weight)
        eg['centr_dist'] = np.power([min([p.distance(c) for c in list(
            sm.centroids.loc[sm.centroids['NUTS_ID']!=own_c, 'geometry'])]
                                        ) / 1.4e-2 for own_c, p in zip(
            list(eg['b']), [geometry.Point(l.coords[0]) for l in eg['geometry']])],
                                    dist_centr_weight)
        analysis_cols.append('centr_dist')
    if conn_weight:
        # Calculate connectivity
        ac['connectivity'] = np.power(ac['b'].map(n_links_dict), conn_weight)
        eg['connectivity'] = np.power(eg['a'].map(n_links_dict), conn_weight)
        analysis_cols.append('connectivity')
    # Calculate link performance with given attributes
    ac['sum'] = ac[analysis_cols].sum(axis=1)
    eg['sum'] = eg[analysis_cols].sum(axis=1)
    # Get the n most wanted links
    ac = ac.sort_values('sum', ascending=False).groupby('a').head(keep)
    eg = eg.sort_values('sum', ascending=False).groupby('b').head(keep)
    return ac.append(eg).reset_index()[df.columns]

In [61]:
# Compute road access and egress links
sm.zone_to_road = engine.ntlegs_from_centroids_and_nodes(
    sm.centroids,
    road.road_nodes,
    short_leg_speed=speed_footpaths,
    long_leg_speed=speed_non_motorised_modes,
    threshold=threshold,
    n_neighbors=20,
    coordinates_unit=sm.coordinates_unit)
sm.zone_to_road = sm.zone_to_road.loc[
    sm.zone_to_road['distance']<=threshold_dict['road']]
len(sm.zone_to_road.index)

15766

In [64]:
sm.zone_to_road = filter_connectors(sm.zone_to_road, keep=1,
                                    length_weight=None,
                                    dist_centr_weight=1)
len(sm.zone_to_road.index)

802

In [65]:
# Every zone must have an access and an egress link to roads
assert sm.zones['NUTS_ID'].isin(list(sm.zone_to_road['a'])).all()
assert sm.zones['NUTS_ID'].isin(list(sm.zone_to_road['b'])).all()

In [66]:
# Compute road - PT links
sm.road_to_transit = engine.ntlegs_from_centroids_and_nodes(
    sm.nodes,
    road.road_nodes,
    short_leg_speed=speed_footpaths,
    long_leg_speed=speed_non_motorised_modes,
    threshold=threshold,
    n_neighbors=1,
    coordinates_unit=sm.coordinates_unit)
sm.road_to_transit = sm.road_to_transit.loc[
    sm.road_to_transit['distance']<=threshold_dict['road_pt']]
len(sm.road_to_transit.index)

15584

In [67]:
# Every route type is saved once in this attribute
assert len(sm.pt_route_types) == len(sm.links['route_type'].unique())
sm.pt_route_types

['rail_long_distance', 'rail_short_distance', 'coach', 'air']

In [69]:
# How many links does every node have?
n_links_dict = sm.links[['a', 'b']].append(bus.links[['a', 'b']]).append(
    sm.footpaths[['a', 'b']]).stack().value_counts().to_dict()

In [91]:
# Compute PT access and egress links by route type
sm.zone_to_transit = sm.zone_to_transit.iloc[0:0]
for t in sm.pt_route_types:
    ntlegs = engine.ntlegs_from_centroids_and_nodes(
        sm.centroids,
        sm.nodes.loc[sm.nodes['route_type']==t],
        short_leg_speed=speed_footpaths,
        long_leg_speed=speed_non_motorised_modes,
        threshold=threshold,
        n_neighbors=5,
        coordinates_unit=sm.coordinates_unit)
    # Cut off long links
    ntlegs = ntlegs.loc[ntlegs['distance']<=
                        threshold_dict['pt']]
    ntlegs['route_type'] = t
    sm.zone_to_transit = sm.zone_to_transit.append(filter_connectors(
        ntlegs, keep=1, length_weight=1, dist_centr_weight=2,
        conn_weight=.5, conn_dict=n_links_dict))
sm.zone_to_transit.reset_index(drop=True, inplace=True)
len(sm.zone_to_transit.index)

1650

In [93]:
# Add bus connections
ntlegs = engine.ntlegs_from_centroids_and_nodes(
    sm.centroids,
    bus.nodes,
    short_leg_speed=speed_footpaths,
    long_leg_speed=speed_non_motorised_modes,
    threshold=threshold,
    n_neighbors=5, # Generate a bunch and clean later
    coordinates_unit=sm.coordinates_unit)
# Cut off long links
ntlegs = ntlegs.loc[ntlegs['distance']<=
                    threshold_dict['pt']]
ntlegs['route_type'] = 'bus'
len(ntlegs.index)

3992

In [94]:
sm.zone_to_transit = sm.zone_to_transit.append(filter_connectors(
    ntlegs, keep=1, length_weight=1, dist_centr_weight=2,
    conn_weight=.5, conn_dict=n_links_dict))
sm.zone_to_transit.reset_index(drop=True, inplace=True)
len(sm.zone_to_transit.index)

2450

In [95]:
# Number of legs by route type
sm.zone_to_transit.groupby(['route_type', 'direction'])['a'].count()

route_type           direction
air                  access        25
                     eggress       25
bus                  access       400
                     eggress      400
coach                access       221
                     eggress      221
rail_long_distance   access       187
                     eggress      187
rail_short_distance  access       392
                     eggress      392
Name: a, dtype: int64

In [96]:
# Every zone must have an access and an egress link to PT
#sm.zones.loc[sm.zones['NUTS_ID'].isin(list(sm.zone_to_transit['a']))==False]
assert sm.zones['NUTS_ID'].isin(list(sm.zone_to_transit['a'])).all()
assert sm.zones['NUTS_ID'].isin(list(sm.zone_to_transit['b'])).all()

### Parametrise access and egress links
Only zone-PT connectors will be handled in a seperate step

In [26]:
# Road - PT connectors
sm.road_to_transit['distance'] = 0
sm.road_to_transit['time'] = 5*60 # in seconds
sm.road_to_transit.sample(n=2)

Unnamed: 0,a,b,direction,distance,geometry,rank,speed_factor,short_leg_speed,long_leg_speed,speed,time
5294,rail_short_node_9000,903673001,access,0,"LINESTRING (13.668919 51.073086, 13.6683718 51...",0,0.299735,5,17,5.0,300
25991,1486400569,rail_short_node_7536,eggress,0,"LINESTRING (11.106115 49.473976, 11.1049355 49...",0,0.602147,5,17,5.0,300


In [27]:
# Road - centroid connectors
sm.zone_to_road['distance'] = 0
sm.zone_to_road['time'] = 0
sm.zone_to_road.sample(n=2)

Unnamed: 0,a,b,direction,distance,geometry,rank,speed_factor,short_leg_speed,long_leg_speed,speed,time
370,DEG0C,292109095,access,0,LINESTRING (10.69382058262155 50.9106100665805...,0,1.737356,5,17,8.686778,0
5,DE715,1393731601,access,0,LINESTRING (8.660199656582915 49.6176876292569...,0,1.261475,5,17,6.307376,0


In [28]:
# Parameterisation comes later for PT
sm.zone_to_transit.sample(n=2)

Unnamed: 0,a,b,direction,distance,geometry,long_leg_speed,n_links,rank,route_type,short_leg_speed,speed,speed_factor,time
526,DE40C,rail_short_node_11698,access,7682.334875,"LINESTRING (14.21903 52.24394, 14.25097 52.17768)",17.0,27.0,0.0,rail_short_distance,5.0,17.0,3.919779,1626.847385
784,rail_short_node_14503,DE126,eggress,2567.736473,"LINESTRING (8.50049 49.49943, 8.47777 49.51720)",17.0,38.0,2.0,rail_short_distance,5.0,11.330791,2.266158,815.816949


## Save model

In [29]:
# Drop unneccessary columns
cols = ['speed_factor', 'short_leg_speed', 'long_leg_speed', 'rank']
sm.footpaths.drop(cols, axis=1, inplace=True)
sm.zone_to_transit.drop(cols, axis=1, inplace=True)
sm.zone_to_road.drop(cols, axis=1, inplace=True)
sm.road_to_transit.drop(cols, axis=1, inplace=True)
sm.zone_to_transit.drop(['n_links'], axis=1, inplace=True, errors='ignore')

In [32]:
# Make tables lighter
cols = ['distance', 'speed', 'time']
sm.footpaths[cols] = sm.footpaths[cols].astype(int)
sm.zone_to_transit[cols] = sm.zone_to_transit[cols].astype(int)
sm.zone_to_road[cols] = sm.zone_to_road[cols].astype(int)
sm.road_to_transit[cols] = sm.road_to_transit[cols].astype(int)

In [33]:
sm.footpaths.sample()

Unnamed: 0,a,b,direction,distance,geometry,speed,time
foot_12744,rail_long_node_1080,bus_n_41640,eggress,131,"LINESTRING (7.15786 50.68442, 7.15956 50.68393)",5,94


In [34]:
# Saving model...
tables = ['centroids', 'footpaths', 'zone_to_transit']
sm.to_json(model_path + 'de_pt_access_egress',
           only_attributes=tables, encoding='utf-8')
sm.to_json(model_path + 'de_road_access_egress',
           only_attributes=['centroids', 'zone_to_road', 'road_to_transit'],
           encoding='utf-8')

to_hdf(overwriting): 100%|█████████████████████████████████████████████████████████████| 36/36 [00:16<00:00,  2.24it/s]
to_hdf(overwriting): 100%|█████████████████████████████████████████████████████████████| 36/36 [00:03<00:00, 11.07it/s]
