In [30]:
import sys # for automation and parallelisation
manual, scenario = (True, 'base') if 'ipykernel' in sys.argv[0] else (False, sys.argv[1])
if manual:
    %matplotlib inline

In [31]:
import pandas as pd
import numpy as np
import geopandas as gpd
from quetzal.model import stepmodel
from quetzal.engine.engine import od_volume_from_zones
from quetzal.io import excel
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
from biogeme import expressions as ex

In [32]:
model_path = '../model/' + scenario + '/'
input_path = '../input/'
input_static_path = '../input_static/'
output_path = '../output/'

In [33]:
# Load scenario parameters
params = excel.read_var(file='../input/parameters.xls', scenario=scenario)

# OD matrix generation

## Needs zone attributes and destination choice model results

## Saves volumes table

In [34]:
# Transport demand segments
segments = [s.strip() for s in params['general']['demand_segments'].split(';')]
purposes_compulsory = ['commuting', 'business', 'education']
purposes_optional = ['buy/execute', 'leisure', 'accompany']
segments_compulsory = [s for s in segments if s.split('_')[0] in purposes_compulsory]
segments_optional = [s for s in segments if s.split('_')[0] in purposes_optional]

In [35]:
# Load zones
sm = stepmodel.read_json(model_path + 'de_zones')

## Calculate probabilities of inner/inter-zonal choice for optional trips

In [36]:
# Fill POI values
pois = pd.read_csv(input_path + 'spatial_num_pois_raw.csv', index_col='index')
cats = pd.read_excel(input_path + 'spatial_OSM_POI_list.xlsx', sheet_name='categories')
cats['label'] = (cats['key'] + ' ' + cats['value'].fillna('')).str.strip()
for category, columns in cats.loc[cats['category'].notna()
                                 ].groupby('category').agg(
                                {'label': list})['label'].items():
    sm.zones[category] = sm.zones['FID'].map(pois[columns].sum(axis=1)).fillna(0)

In [37]:
# Load inter-zonal composite cost from mode choice step
cc = pd.read_csv(output_path + scenario + '/mode_choice_od_composite_cost.csv')
cc.set_index(['origin', 'destination'], inplace=True)
# Rename segments to integer values
cc.columns = pd.MultiIndex.from_tuples(
    [(seg.split('_')[0], {'no': 0, 'car': 1}[seg.split('_')[1]])
     for seg in cc.columns],
    names=['purpose_model', 'car_av'])
# Reshape the table into a mergable format
# Use the mean CC to all destinations as accessibility
cc = cc.unstack('destination').stack('purpose_model').stack('car_av').mean(axis=1)
cc = cc.unstack('purpose_model')

In [38]:
# Load inner/inter choice model results
betas = pd.read_csv(input_path + 'inner-inter_betas.csv', index_col=0)

In [39]:
# Simulate the probabilities from the zones table using biogeme
inner_probs = pd.DataFrame(index=sm.zones.index)
car_names = {0: '_no_car', 1: '_car'}
for car in [0,1]:
    for p in purposes_optional:
        # Add accessibility for this segment to database
        sm.zones['acc_'+p] = sm.zones.merge(cc.xs(car, level='car_av'),
                                            how='left', left_index=True, right_index=True
                                           )[p]
        # Create the database
        zones_db = db.Database('zones', sm.zones[
            ['employment', 'population', 'area', 'acc_'+p]
            + list(cats.loc[cats['category'].notna(), 'category'].unique())])
        globals().update(zones_db.variables)
        
        # Define utility formulations as in cal22
        # Define parameters
        ASC_0 = ex.Beta('ASC_0', 0, None, None, 1)
        ASC_1 = ex.Beta('ASC_1', 0, None, None, 0)
        b_pop = ex.Beta('b_pop', 0, None, None, 0)
        b_attr = ex.Beta('b_attr', 0, None, None, 0)
        b_acc = ex.Beta('b_acc', 0, None, None, 0)

        # Define the utility formulation by purpose
        if p == 'commuting':
            V= {0:
                ASC_0
                + ex.log(1+employment)*b_attr
                + ex.log((1+population)/area)*b_pop,
                1:
                ASC_1
                + acc_commuting * b_acc}
        elif p == 'business':
            V= {0:
                ASC_0
                + ex.log(1+employment)*b_attr
                + ex.log((1+population)/area)*b_pop,
                1:
                ASC_1
                + acc_business * b_acc}
        elif p == 'education':
            V= {0:
                ASC_0
                + ex.log(1+childcare + school + higher_education)*b_attr
                + ex.log((1+population)/area)*b_pop,
                1:
                ASC_1
                + acc_education * b_acc}
        elif p == 'buy/execute':
            V= {0:
                ASC_0
                + ex.log(1+shop + medical + special_shop)*b_attr
                + ex.log((1+population)/area)*b_pop,
                1:
                ASC_1
                + globals()['acc_buy/execute'] * b_acc}
        elif p == 'leisure':
            V= {0:
                ASC_0
                + ex.log(1+daily_leisure + holiday + occasional_leisure)*b_attr
                + ex.log((1+population)/area)*b_pop,
                1:
                ASC_1
                + acc_leisure * b_acc}
        elif p == 'accompany':
            V= {0:
                ASC_0
                + ex.log(1+medical + school + childcare)*b_attr
                + ex.log((1+population)/area)*b_pop,
                1:
                ASC_1
                + acc_accompany * b_acc}
        
        # Simulate
        simulate = {j: models.logit(V, None, int(j)) for j in [0,1]}
        model = bio.BIOGEME(zones_db, simulate)
        probs = model.simulate(theBetaValues=betas[p+car_names[car]].to_dict())
        
        # Calculate generation volumes
        inner_probs[p+car_names[car]] = probs[0]
        print('{}_{} av. prob. inner: {}'.format(p,car,np.round(probs[0].mean(), 2)))

buy/execute_0 av. prob. inner: 0.55
leisure_0 av. prob. inner: 0.57
accompany_0 av. prob. inner: 0.55
buy/execute_1 av. prob. inner: 0.54
leisure_1 av. prob. inner: 0.55
accompany_1 av. prob. inner: 0.54


In [40]:
# If all that doesn't make sense:
# Take inner-zonal probabilities from MiD2017 data by urbanisation degree
#inner_probs = pd.read_csv(input_path + 'inner-zonal_probabilities_agg_urban.csv', index_col=0)
#inner_probs

## Compute volumes (OD matrix) for optional trips

$F_{ods} = N_{os} * p^{dest}_{ods} * (1 - p^{inner}_{os})$

Indecies:
* o: origin zone
* d: destination zone
* s: demand segment

In [41]:
# Load probability results from logit step
dm = stepmodel.read_zippedpickles(model_path + 'de_destination_choice')

utility_values: 100%|████████████████████████████████████████████████████████████████████| 7/7 [00:02<00:00,  2.81it/s]


In [42]:
# Extract probability values and create new table from it
prob = dm.probabilities.set_index(['origin', 'segment']).drop(columns=['destination', 'root'])
prob.columns.name = 'destination'
volumes = prob.stack('destination').unstack('segment').reset_index()
volumes.columns.name = None
# Fill 0 for segments that were not covered by logit models
for seg in set(segments) - set(volumes.columns):
    volumes[seg] = 0
volumes.sample(2)

Unnamed: 0,origin,destination,accompany_car,accompany_no_car,buy/execute_car,buy/execute_no_car,leisure_car,leisure_no_car,business_car,commuting_no_car,education_no_car,education_car,commuting_car,business_no_car
4347778,DED53_3,DEF0C_6,1.213911e-10,3.551938e-07,1.106815e-17,1.092361e-10,1.740014e-07,0.0,0,0,0,0,0,0
689437,DE21B_3,DE21B_6,0.0030745,0.003819316,0.001957222,0.002123872,0.001742281,0.0,0,0,0,0,0,0


In [43]:
# Create inner-zonal probabilities
in_mask = volumes['origin']==volumes['destination']
if len(volumes.loc[in_mask]) == 0:
    volumes = volumes.append(pd.DataFrame({'origin': list(sm.zones.index),
                                           'destination': list(sm.zones.index)})
                            ).reset_index()
    in_mask = volumes['origin']==volumes['destination']
urban_dict = sm.zones['urbanisation'].to_dict()
for seg in segments_optional:
    probs = inner_probs[seg]
    # Correct probabilities in volumes
    if len(probs) < len(sm.zones): # aggregated probabilities by urbanisation degree
        volumes.loc[in_mask, seg] = volumes.loc[in_mask, 'origin'].map(urban_dict).map(probs)
        volumes.loc[~in_mask, seg] = volumes.loc[~in_mask, seg] * \
                                     volumes.loc[~in_mask, 'origin'].map(urban_dict).map(1 - probs)
    else:
        volumes.loc[in_mask, seg] = volumes.loc[in_mask, 'origin'].map(probs)
        volumes.loc[~in_mask, seg] = volumes.loc[~in_mask, seg] * \
                                     volumes.loc[~in_mask, 'origin'].map(1 - probs)

In [44]:
# make sure all probabilities sum up to 1 for each zone in each segments
#volumes.groupby('origin')[segments_optional].sum().describe()

In [45]:
# calculate number of trips
generation = pd.read_csv(input_path + 'generation_volumes.csv', index_col=0)
for s in segments_optional:
    volumes[s] = volumes[s] * volumes['origin'].map(generation[s])

In [46]:
# check the sum (million trips per day)
volumes[segments_optional].sum().sum() / 1e6 / 365

82.4499299043983

In [47]:
# inter-zonal trips (billion per year)
volumes.loc[volumes['origin']!=volumes['destination'], segments_optional].sum().sum() / 1e9

10.04342748758862

## Compulsory trips

Choose the doubly constrained gravity model as distribution method because logit models don't perform well for purposes commuting, business and education.

In [48]:
# Load inter-zonal composite cost from mode choice step
cc = pd.read_csv(output_path + scenario + '/mode_choice_od_composite_cost.csv')
cc.set_index(['origin', 'destination'], inplace=True)
# And inner-zonal CC
cc_inner = pd.read_csv(input_path + 'inner-zonal_cc.csv', index_col='Unnamed: 0')

In [78]:
# Get the resistance to move (composite cost from mode choice)
def get_deterrence(segment):
    deterrence_matrix = cc[segment].unstack('destination')
    # Add inner-zonal resistance
    cc_inner_mean = cc_inner[segment].mean()
    for z in deterrence_matrix.index:
        try:
            deterrence_matrix.loc[z,z] = cc_inner.loc[z, segment]
        except KeyError: # zone not found
            deterrence_matrix.loc[z,z] = cc_inner_mean
    return deterrence_matrix.fillna(deterrence_matrix.max() * 10)

In [76]:
# Volumes for commuting with car
sm.zones['emission'] = sm.zones['population'] * sm.zones['car_avail_hh']
sm.zones['attraction'] = sm.zones['employment'] * sm.zones['car_avail_hh']
vol = od_volume_from_zones(sm.zones, get_deterrence('commuting_car'))
volumes['commuting_car'] = volumes.merge(vol, how='left', on=['origin', 'destination'])['volume'].fillna(0)

In [None]:
# Volumes for commuting without car
sm.zones['emission'] = sm.zones['population'] * (1 - sm.zones['car_avail_hh'])
sm.zones['attraction'] = sm.zones['employment'] * (1 - sm.zones['car_avail_hh'])
vol = od_volume_from_zones(sm.zones, get_deterrence('commuting_no_car'))
volumes['commuting_no_car'] = vol.set_index(['origin', 'destination']).fillna(0)

## Sparsify the OD set

Reduce the number of OD pairs to a sample while keeping the number of trips constant.

In [27]:
# Only sample, if the parameter is specified
sample_size = params['general']['od_sparse_sample']
if sample_size > 0:
    # Set a stable seed, generating the same OD set in every scenario run
    np.random.seed(42)
    # Choice probabilities of OD pairs weighted by trip volumes
    od_probabilities = volumes[segments].sum(axis=1) / volumes[segments].sum().sum()
    sample = np.random.choice(a=volumes.index,
                              size=sample_size,
                              p=od_probabilities)
    # Reduce the volumes matrix
    for seg in segments:
        expansion = volumes[seg].sum() / sample_size
        volumes[seg] = pd.Series(sample).value_counts() * expansion

In [28]:
# Check the sum (million trips per day)
volumes.loc[volumes.notna().all(axis=1), segments].sum().sum() / 365 / 1e6

157.9334806821937

In [29]:
volumes = volumes.loc[volumes.notna().all(axis=1)]

## Save

In [25]:
# check the sum (million trips per day)
volumes[segments].sum().sum() / 1e6 / 365

89.65594118230415

In [26]:
# inter-zonal trips (billion per year)
volumes.loc[volumes['origin']!=volumes['destination'], segments].sum().sum() / 1e9

10.94356178163238

In [27]:
# Save volumes table
sm.volumes = volumes.reset_index(drop=True)
sm.to_zippedpickles(model_path + 'de_volumes', only_attributes=['volumes'])

volumes: 100%|█████████████████████████████████████████████████████████████████████████| 34/34 [00:24<00:00,  1.42it/s]
