In [1]:
import sys # for automation and parallelisation
manual, scenario = (True, 'base') if 'ipykernel' in sys.argv[0] else (False, sys.argv[1])

In [2]:
import pandas as pd
import numpy as np
import geopandas as gpd
from quetzal.model import stepmodel
from quetzal.engine.engine import od_volume_from_zones
from quetzal.io import excel
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
from biogeme import expressions as ex

PyTables is not installed. No support for HDF output.
SQLalchemy is not installed. No support for SQL output.


In [3]:
model_path = '../model/' + scenario + '/'
input_path = '../input/'
input_static_path = '../input_static/'
output_path = '../output/'

In [4]:
# Load scenario parameters
params = excel.read_var(file='../input/parameters.xls', scenario=scenario)

# OD matrix generation

## Needs zone attributes and destination choice model results

## Saves volumes table

In [5]:
# Transport demand segments
segments = [s.strip() for s in params['general']['demand_segments'].split(';')]
purposes_compulsory = ['commuting', 'business', 'education']
purposes_optional = ['buy/execute', 'leisure', 'accompany']
segments_compulsory = [s for s in segments if s.split('_')[0] in purposes_compulsory]
segments_optional = [s for s in segments if s.split('_')[0] in purposes_optional]

In [6]:
# Load zones
sm = stepmodel.read_json(model_path + 'de_zones')

## Calculate probabilities of inner/inter-zonal choice for optional trips

In [7]:
# Fill POI values
pois = pd.read_csv(input_path + 'spatial_num_pois_raw.csv', index_col='index')
cats = pd.read_excel(input_path + 'spatial_OSM_POI_list.xlsx', sheet_name='categories')
cats['label'] = (cats['key'] + ' ' + cats['value'].fillna('')).str.strip()
for category, columns in cats.loc[cats['category'].notna()
                                 ].groupby('category').agg(
                                {'label': list})['label'].items():
    sm.zones[category] = sm.zones['FID'].map(pois[columns].sum(axis=1)).fillna(0) \
                         * sm.zones['urbanisation'].map(params['poi_change']).astype(float)

In [8]:
# Load inter-zonal composite cost from mode choice step
try:
    cc = pd.read_csv(output_path + scenario + '/mode_choice_od_composite_cost.csv')
except FileNotFoundError:
    try:
        cc = pd.read_csv(output_path + 'base' + '/mode_choice_od_composite_cost.csv')
    except FileNotFoundError:
        raise FileNotFoundError(
            'You can compute composite cost with the mode choice model or download them in the latest major release')
cc.set_index(['origin', 'destination'], inplace=True)
# Rename segments to integer values
cc.columns = pd.MultiIndex.from_tuples(
    [(seg.split('_')[0], {'no': 0, 'car': 1}[seg.split('_')[1]])
     for seg in cc.columns],
    names=['purpose_model', 'car_av'])
# Reshape the table into a mergable format
# Use the mean CC to all destinations as accessibility
cc = cc.unstack('destination').stack('purpose_model').stack('car_av').mean(axis=1)
cc = cc.unstack('purpose_model')

In [9]:
# Load inner/inter choice model results
try:
    betas = pd.read_csv(input_path + 'inner-inter_betas_{}.csv'.format(scenario), index_col=0)
except FileNotFoundError:
    betas = pd.read_csv(input_path + 'inner-inter_betas.csv', index_col=0)

In [10]:
# Simulate the probabilities from the zones table using biogeme
inner_probs = pd.DataFrame(index=sm.zones.index)
car_names = {0: '_no_car', 1: '_car'}
for car in [0,1]:
    for p in purposes_optional:
        # Add accessibility for this segment to database
        sm.zones['acc_'+p] = sm.zones.merge(cc.xs(car, level='car_av'),
                                            how='left', left_index=True, right_index=True
                                           )[p]
        # Create the database
        zones_db = db.Database('zones', sm.zones.loc[
            sm.zones['acc_'+p].notna(), # might be NaN from previous sampling in mode choice step
            ['employment', 'population', 'area', 'acc_'+p]
            + list(cats.loc[cats['category'].notna(), 'category'].unique())])
        globals().update(zones_db.variables)
        
        # Define utility formulations as in cal22
        # Define parameters
        ASC_0 = ex.Beta('ASC_0', 0, None, None, 1)
        ASC_1 = ex.Beta('ASC_1', 0, None, None, 0)
        b_pop = ex.Beta('b_pop', 0, None, None, 0)
        b_attr = ex.Beta('b_attr', 0, None, None, 0)
        b_acc = ex.Beta('b_acc', 0, None, None, 0)

        # Define the utility formulation by purpose
        if p == 'commuting':
            V= {0:
                ASC_0
                + ex.log(1+globals()['employment'])*b_attr
                + ex.log((1+globals()['population'])/globals()['area'])*b_pop,
                1:
                ASC_1
                + globals()['acc_commuting'] * b_acc}
        elif p == 'business':
            V= {0:
                ASC_0
                + ex.log(1+globals()['employment'])*b_attr
                + ex.log((1+globals()['population'])/globals()['area'])*b_pop,
                1:
                ASC_1
                + globals()['acc_business'] * b_acc}
        elif p == 'education':
            V= {0:
                ASC_0
                + ex.log(1+globals()['childcare'] + globals()['school'] + globals()['higher_education'])*b_attr
                + ex.log((1+globals()['population'])/globals()['area'])*b_pop,
                1:
                ASC_1
                + globals()['acc_education'] * b_acc}
        elif p == 'buy/execute':
            V= {0:
                ASC_0
                + ex.log(1+globals()['shop'] + globals()['medical'] + globals()['special_shop'])*b_attr
                + ex.log((1+globals()['population'])/globals()['area'])*b_pop,
                1:
                ASC_1
                + globals()['acc_buy/execute'] * b_acc}
        elif p == 'leisure':
            V= {0:
                ASC_0
                + ex.log(1+globals()['daily_leisure'] + globals()['holiday'] + globals()['occasional_leisure'])*b_attr
                + ex.log((1+globals()['population'])/globals()['area'])*b_pop,
                1:
                ASC_1
                + globals()['acc_leisure'] * b_acc}
        elif p == 'accompany':
            V= {0:
                ASC_0
                + ex.log(1+globals()['medical'] + globals()['school'] + globals()['childcare'])*b_attr
                + ex.log((1+globals()['population'])/globals()['area'])*b_pop,
                1:
                ASC_1
                + globals()['acc_accompany'] * b_acc}
        
        # Simulate
        simulate = {j: models.logit(V, None, int(j)) for j in [0,1]}
        model = bio.BIOGEME(zones_db, simulate, numberOfThreads=1)
        probs = model.simulate(theBetaValues=betas[p+car_names[car]].to_dict())
        
        # Calculate generation volumes
        inner_probs[p+car_names[car]] = probs[0]

In [11]:
# If all that doesn't make sense:
# Take inner-zonal probabilities from MiD2017 data by urbanisation degree
#inner_probs = pd.read_csv(input_path + 'inner-zonal_probabilities_agg_urban.csv', index_col=0)
#inner_probs

## Compute volumes (OD matrix) for optional trips

$F_{ods} = N_{os} * p^{dest}_{ods} * (1 - p^{inner}_{os})$

Indecies:
* o: origin zone
* d: destination zone
* s: demand segment

In [12]:
# Load probability results from logit step
try:
    dm = stepmodel.read_zippedpickles(model_path + 'de_destination_choice')
except FileNotFoundError:
    dm = stepmodel.read_zippedpickles(model_path.replace(scenario, 'base') + 'de_destination_choice')

utility_values: 100%|████████████████████████████████████████████████████████████████████| 7/7 [00:03<00:00,  2.28it/s]


In [13]:
# Extract probability values and create new table from it
prob = dm.probabilities.set_index(['origin', 'segment']).drop(columns=['destination', 'root'])
prob.columns.name = 'destination'
volumes = prob.stack('destination').unstack('segment').reset_index()
volumes.columns.name = None
# Fill 0 for segments that were not covered by logit models
for seg in set(segments) - set(volumes.columns):
    volumes[seg] = 0
volumes.sample(2)

Unnamed: 0,origin,destination,accompany_car,accompany_no_car,buy/execute_car,buy/execute_no_car,leisure_car,leisure_no_car,business_car,education_no_car,commuting_car,education_car,commuting_no_car,business_no_car
1993453,DE406_2,DE133_1,1.108507e-07,2e-06,8.238047e-26,8.89979e-13,5e-06,5.745226e-09,0,0,0,0,0,0
3651911,DEA47_1,DEA29_2,6.177531e-07,2.3e-05,7.556094e-09,2.408245e-05,2.4e-05,0.0002702592,0,0,0,0,0,0


In [None]:
# Drop empty rows
volumes = volumes.loc[volumes[segments].sum(axis=1)>0]
len(volumes)

In [14]:
# Create inner-zonal probabilities
in_mask = volumes['origin']==volumes['destination']
zone_list = list(set(volumes['origin']))
if len(volumes.loc[in_mask]) == 0:
    volumes = pd.concat([volumes,
                         pd.DataFrame({'origin': zone_list,
                                       'destination': zone_list})
                        ]).reset_index()
    in_mask = volumes['origin']==volumes['destination']
urban_dict = sm.zones['urbanisation'].to_dict()
for seg in segments_optional:
    probs = inner_probs[seg]
    # Correct probabilities in volumes
    if len(probs) < len(zone_list): # aggregated probabilities by urbanisation degree
        volumes.loc[in_mask, seg] = volumes.loc[in_mask, 'origin'].map(urban_dict).map(probs)
        volumes.loc[~in_mask, seg] = volumes.loc[~in_mask, seg] * \
                                     volumes.loc[~in_mask, 'origin'].map(urban_dict).map(1 - probs)
    else:
        volumes.loc[in_mask, seg] = volumes.loc[in_mask, 'origin'].map(probs)
        volumes.loc[~in_mask, seg] = volumes.loc[~in_mask, seg] * \
                                     volumes.loc[~in_mask, 'origin'].map(1 - probs)

In [15]:
# make sure all probabilities sum up to 1 for each zone in each segments
#volumes.groupby('origin')[segments_optional].sum().describe()

In [16]:
# calculate number of trips
try:
    generation = pd.read_csv(model_path + 'generation_volumes.csv', index_col=0)
except FileNotFoundError:
    generation = pd.read_csv(model_path.replace(scenario, 'base') + 'generation_volumes.csv', index_col=0)
# Manually correct the choice model
correct = params['correction_generation'].astype(float)
for s in segments_optional:
    volumes[s] = volumes[s] * volumes['origin'].map(generation[s]) * correct[s]

In [17]:
# check the sum (million trips per day)
print('Million trips per day: {}'.format(volumes[segments_optional].sum().sum() / 1e6 / 365))

127.2037539287193

In [18]:
# inter-zonal trips (billion per year)
volumes.loc[volumes['origin']!=volumes['destination'], segments_optional].sum().sum() / 1e9

16.335694337327656

## Sparsify

In [None]:
# Load OD set
try:
    od_set = json.load(open(model_path + 'od_set.json'))
    od_set = [tuple(l) for l in od_set]
    print('Found OD set with {} pairs'.format(len(od_set)))
    volumes = volumes.set_index(['origin', 'destination']).loc[od_set].reset_index()
    print('Million trips per day after sparsification: {}'.format(
        volumes[segments].sum().sum() / 1e6 / 365))
except FileNotFoundError:
    print('Saving the full OD set')

## Policies

In [None]:
# Multi-purpose densified quarters in agglomeration areas around large cities
max_dist = params['trip_reduction_suburban_quarters']['max_dist_to_city']
if max_dist > 0:
    # Find quarters
    zones = gpd.GeoDataFrame(sm.zones, crs=sm.epsg).to_crs('EPSG:5234') # distance to meter
    cities = zones.loc[(zones['urbanisation']==1) & (zones['population']>200000)]
    centroids = gpd.GeoDataFrame(index=zones.loc[zones['urbanisation']==2].index,
                                 geometry=zones.loc[zones['urbanisation']==2, 'geometry'].centroid)
    quarters = gpd.sjoin_nearest(centroids, cities[['geometry']], max_distance=max_dist)
    # Reduce trips
    for seg in segments:
        volumes.loc[volumes['origin'].isin(quarters.index), seg] *= \
            1 - params['trip_reduction_suburban_quarters'][seg]

## Save

In [34]:
# Save volumes table
sm.volumes = volumes.reset_index(drop=True)
sm.to_zippedpickles(model_path + 'de_volumes_choice', only_attributes=['volumes'])

volumes: 100%|███████████████████████████████████████████████████████████████████████████| 9/9 [00:24<00:00,  2.67s/it]
