In [1]:
import sys # for automation and parallelisation
manual, scenario = (True, 'base') if 'ipykernel' in sys.argv[0] else (False, sys.argv[1])
if manual:
    %matplotlib inline


Bad key text.latex.preview in file C:\Users\marlin.arnz\AppData\Local\Continuum\miniconda3\envs\quetzal\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle, line 123 ('text.latex.preview : False')
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.5.2/matplotlibrc.template
or from the matplotlib source distribution

Bad key mathtext.fallback_to_cm in file C:\Users\marlin.arnz\AppData\Local\Continuum\miniconda3\envs\quetzal\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle, line 155 ('mathtext.fallback_to_cm : True  # When True, use symbols from the Computer Modern')
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.5.2/matplotlibrc.template
or from the matplotlib source distribution

Bad key savefig.jpeg_quality in file C:\Users\marlin.arnz\AppData\Local\Continuum\miniconda3\envs\quetzal\lib\site-packages\matplotlib\mpl-data\stylelib\

In [12]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from quetzal.model import stepmodel
from quetzal.io import excel
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
import biogeme.messaging as message
from biogeme import expressions as ex

In [10]:
model_path = '../model/' + scenario + '/'
input_path = '../input/'
zone_data_path = '../input_static/zones_Gebietsaenderungen/'
input_static_path = '../input_static/'
output_path = '../output/'

In [4]:
# Load parameters for settings
params = excel.read_var(file='../input/parameters.xls', scenario=scenario)

# Calibration: inner/inter-zonal destination choice

## Requires MiD data

## Saves inner-zonal probabilities for zones

Assumptions apply as in `model_generation`

In [8]:
sm = stepmodel.read_json(model_path + 'de_zones')

In [19]:
# Transport demand segments
segments = [s.strip() for s in params['general']['demand_segments'].split(';')]
purposes = ['commuting', 'business', 'education', 'buy/execute', 'leisure', 'accompany']

In [6]:
# Load trips from MiD2017
trips = pd.read_csv(input_path + 'transport_demand/calibration_all_trips_MiD2017.csv',
                    usecols=['purpose_model', 'purpose2', 'car_avail', 'distance',
                             'origin', 'destination', 'P_ID', 'W_GEW'])
trips.rename(columns={'car_avail': 'car_av'}, inplace=True)
trips.shape

(260723, 8)

In [9]:
# Drop return trips
trips = trips.loc[~trips['purpose2'].isin([8,9])]
len(trips)

155235

## Add choice attributes

In [13]:
# Add employment to trips
zone_dict = sm.zones['employment'].to_dict()
trips['employment'] = trips['origin'].map(zone_dict)

In [14]:
# Load other attraction POIs
pois = pd.read_csv(input_path + 'spatial_num_pois_raw.csv', index_col='index')
cats = pd.read_excel(input_path + 'spatial_OSM_POI_list.xlsx', sheet_name='categories')
cats['label'] = (cats['key'] + ' ' + cats['value'].fillna('')).str.strip()
for category, columns in cats.loc[cats['category'].notna()
                                 ].groupby('category').agg(
                                {'label': list})['label'].items():
    trips[category] = trips['origin'].map(pois[columns].sum(axis=1)).fillna(0)
    sm.zones[category] = sm.zones['FID'].map(pois[columns].sum(axis=1)).fillna(0)

In [15]:
# Put zone data in trips table (population, area, urbanisation)
trips = trips.merge(sm.zones[['population', 'area', 'urbanisation']],
                    how='inner', # drop trips outside of model zones
                    left_on='origin', right_index=True)
trips.shape

(155235, 21)

In [23]:
# Add accessibility
# Load inter-zonal composite cost from mode choice step
cc = pd.read_csv(output_path + scenario + '/mode_choice_od_composite_cost.csv')
cc.set_index(['origin', 'destination'], inplace=True)
# Rename segments to integer values
cc.columns = pd.MultiIndex.from_tuples(
    [(seg.split('_')[0], {'no': 0, 'car': 1}[seg.split('_')[1]])
     for seg in cc.columns],
    names=['purpose_model', 'car_av'])
# Reshape the table into a mergable format
# Use the mean CC to all destinations as accessibility
cc = cc.unstack('destination').stack('purpose_model').stack('car_av').mean(axis=1)
cc = cc.unstack('purpose_model')
# Merge
for p in purposes:
    trips['acc_'+p] = trips.merge(cc[[p]], how='left', left_on=['origin', 'car_av'],
                                  right_index=True)[p]
    trips['acc_'+p] = trips['acc_'+p].fillna(trips['acc_'+p].max())

In [24]:
# Drop relevant NaN data
trips = trips.loc[trips['car_av'].isin([0,1])]
trips.shape

(155227, 27)

In [28]:
trips = trips.loc[trips.notna().all(axis=1)]
trips.shape

(155223, 28)

In [27]:
# Define a choice variable
trips['choice'] = (trips['origin']!=trips['destination']).astype(int)

In [30]:
# Probabilities of going to another zone
trips.groupby(['purpose_model', 'car_av']).agg({'choice': 'mean'})

Unnamed: 0_level_0,Unnamed: 1_level_0,choice
purpose_model,car_av,Unnamed: 2_level_1
accompany,0,0.170833
accompany,1,0.293776
business,0,0.255618
business,1,0.454887
buy/execute,0,0.142567
buy/execute,1,0.287372
commuting,0,0.216151
commuting,1,0.472394
education,0,0.139949
education,1,0.299156


## Build a choice model

The model is segmented in
* population segments: car availability in household
* trip purposes

The utility formulation for the choice between making a trip within the home zone or to another one is:

$V^{inner}_i= log(\sum_{j\in A} a_{i,j}) \beta^{ATTR}_i + log(\text{POP} / \text{AREA}) \beta^{POP}_i$

$V^{inter}_i=ASC^{inter}_i + ACC \beta^{ACC}_i$

Attraction attributes $a$ are chosen for each demand segment $i$ based on their relevance

In [54]:
# Result containers
betas = pd.DataFrame()
t_tests = pd.DataFrame()
# Save utility functions for later
Vs = {}

car_names = {0: '_no_car', 1: '_car'}
for car in [0,1]:
    # create a database for this population segment
    database = db.Database(car_names[car][1:], trips.loc[trips['car_av']==car].drop(
        ['origin', 'destination', 'purpose_model'], axis=1).copy())
    globals().update(database.variables)
    Vs[car] = {}
    
    # Define parameters
    ASC_0 = ex.Beta('ASC_0', 0, None, None, 1)
    ASC_1 = ex.Beta('ASC_1', 0, None, None, 0)
    b_pop = ex.Beta('b_pop', 0, None, None, 0)
    b_attr = ex.Beta('b_attr', 0, None, None, 0)
    b_acc = ex.Beta('b_acc', 0, None, None, 0)

    # Define the utility formulation by purpose
    Vs[car]['commuting'] = {0:
                            ASC_0
                            + ex.log(1+employment)*b_attr
                            + ex.log((1+population)/area)*b_pop,
                            1:
                            ASC_1
                            + acc_commuting * b_acc}
    Vs[car]['business'] = {0:
                            ASC_0
                            + ex.log(1+employment)*b_attr
                            + ex.log((1+population)/area)*b_pop,
                            1:
                            ASC_1
                            + acc_business * b_acc}
    Vs[car]['education'] = {0:
                            ASC_0
                            + ex.log(1+childcare + school + higher_education)*b_attr
                            + ex.log((1+population)/area)*b_pop,
                            1:
                            ASC_1
                            + acc_education * b_acc}
    Vs[car]['buy/execute'] = {0:
                            ASC_0
                            + ex.log(1+shop + medical + special_shop)*b_attr
                            + ex.log((1+population)/area)*b_pop,
                            1:
                            ASC_1
                            + globals()['acc_buy/execute'] * b_acc}
    Vs[car]['leisure'] = {0:
                            ASC_0
                            + ex.log(1+daily_leisure + holiday + occasional_leisure)*b_attr
                            + ex.log((1+population)/area)*b_pop,
                            1:
                            ASC_1
                            + acc_leisure * b_acc}
    Vs[car]['accompany'] = {0:
                            ASC_0
                            + ex.log(1+medical + school + childcare)*b_attr
                            + ex.log((1+population)/area)*b_pop,
                            1:
                            ASC_1
                            + acc_accompany * b_acc}

    for p in purposes:
        # Run the estimation
        logprob = models.loglogit(Vs[car][p], None, choice)
        formulas = {'loglike': logprob, 'weight': W_GEW} # give weights to the estimator
        model = bio.BIOGEME(database, formulas)
        model.modelName = p + car_names[car]
        model.generateHtml = False
        model.generatePickle = False
        model.saveIterations = False
        results = model.estimate()
        
        # Save results
        betas[model.modelName] = results.getEstimatedParameters()['Value']
        t_tests[model.modelName] = np.abs(results.getEstimatedParameters()['Rob. t-test'])

In [55]:
# Save results
betas.to_csv(input_path + 'inner-inter_betas.csv')
t_tests.to_csv(input_path + 'inner-inter_t_tests.csv')

In [56]:
np.round(betas, 3)

Unnamed: 0,commuting_no_car,business_no_car,education_no_car,buy/execute_no_car,leisure_no_car,accompany_no_car,commuting_car,business_car,education_car,buy/execute_car,leisure_car,accompany_car
ASC_1,5.148,2.095,0.72,1.46,2.526,0.647,4.305,4.801,0.094,0.889,1.012,0.173
b_acc,-0.049,0.467,-0.023,-0.049,-0.089,-0.011,-0.029,-0.148,-0.024,-0.006,0.019,-0.008
b_attr,0.812,0.869,0.837,0.86,0.705,0.872,0.708,0.713,0.689,0.674,0.589,0.688
b_pop,-0.578,-0.643,-0.496,-0.508,-0.268,-0.551,-0.488,-0.502,-0.402,-0.381,-0.231,-0.42


In [57]:
np.round(t_tests, 2)

Unnamed: 0,commuting_no_car,business_no_car,education_no_car,buy/execute_no_car,leisure_no_car,accompany_no_car,commuting_car,business_car,education_car,buy/execute_car,leisure_car,accompany_car
ASC_1,20.35,2.73,1.92,4.72,7.9,2.01,46.01,20.73,1.05,6.85,7.95,2.5
b_acc,4.58,2.91,3.69,2.17,4.65,2.91,6.1,4.3,2.98,0.56,1.04,1.46
b_attr,22.7,26.25,23.07,23.56,21.19,23.71,80.99,82.25,78.51,80.47,74.11,80.33
b_pop,10.74,12.71,9.91,10.27,6.03,10.82,48.7,51.33,43.53,42.24,29.63,45.05


## Calculate choice probabilities in model zones

In [58]:
# Simulate the probabilities from the zones table
inner_probs = pd.DataFrame(index=sm.zones.index)
for p in purposes:
    for car in [0,1]:
        # Add accessibility for this segment to database
        sm.zones['acc_'+p] = sm.zones.merge(cc.xs(car, level='car_av'),
                                            how='left', left_index=True, right_index=True
                                           )[p]
        zones_db = db.Database('zones', sm.zones[
            ['employment', 'population', 'area', 'acc_'+p]
            + list(cats.loc[cats['category'].notna(), 'category'].unique())])
        
        # Simulate
        simulate = {j: models.logit(Vs[car][p], None, int(j))
                    for j in list(trips['choice'].unique())}
        model = bio.BIOGEME(zones_db, simulate)
        model.generateHtml = False
        model.generatePickle = False
        model.saveIterations = False
        probs = model.simulate(theBetaValues=betas[p+car_names[car]].to_dict())
        
        # Calculate generation volumes
        inner_probs[p+car_names[car]] = probs[0]
        print('{}_{} av. prob. inner: {}'.format(p,car,np.round(probs[0].mean(), 2)))

commuting_0 av. prob. inner: 0.56
commuting_1 av. prob. inner: 0.54
business_0 av. prob. inner: 0.56
business_1 av. prob. inner: 0.54
education_0 av. prob. inner: 0.56
education_1 av. prob. inner: 0.55
buy/execute_0 av. prob. inner: 0.55
buy/execute_1 av. prob. inner: 0.54
leisure_0 av. prob. inner: 0.57
leisure_1 av. prob. inner: 0.55
accompany_0 av. prob. inner: 0.55
accompany_1 av. prob. inner: 0.54


In [61]:
for s in segments:
    sm.zones[str((s, 'generation'))] *= (1-inner_probs[s])

In [62]:
sm.zones[[str((s, 'generation')) for s in segments]].sum().sum() / 1e9

20.875440276185447

## Save results

In [32]:
inner_probs.to_csv(input_path + 'inner-zonal_probabilities.csv')

to_hdf(overwriting): 100%|█████████████████████████████████████████████████████████████| 33/33 [00:08<00:00,  3.80it/s]
