In [2]:
import sys # for automation and parallelisation
manual, scenario = (True, 'base') if 'ipykernel' in sys.argv[0] else (False, sys.argv[1])

In [1]:
import pandas as pd
import numpy as np
from quetzal.model import stepmodel
from quetzal.io import excel
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
import biogeme.messaging as message
from biogeme import expressions as ex

In [3]:
model_path = '../model/' + scenario + '/'
input_path = '../input/'
output_path = '../output/'

In [4]:
# Load parameters for settings
params = excel.read_var(file='../input/parameters.xls', scenario=scenario)

# Calibration: inner/inter-zonal destination choice

## Requires MiD data

## Saves inner-zonal probabilities for zones

Assumptions apply as in `model_generation`

In [5]:
sm = stepmodel.read_json(model_path + 'de_zones')

In [6]:
# Transport demand segments
segments = [s.strip() for s in params['general']['demand_segments'].split(';')]
purposes = [s.split('_')[0] for s in segments][::2]

In [8]:
# Load trips from MiD2017
trips = pd.read_csv(input_path + 'transport_demand/calibration_all_trips_MiD2017.csv',
                    usecols=['purpose_model', 'purpose2', 'car_avail', 'distance',
                             'origin', 'destination', 'P_ID', 'W_GEW'])
trips.rename(columns={'car_avail': 'car_av'}, inplace=True)
trips = trips.loc[trips['car_av'].isin([0,1])]
trips.shape

(254876, 8)

In [9]:
# Drop return trips
trips = trips.loc[~trips['purpose2'].isin([8,9])]
len(trips)

153608

## Add choice attributes

In [10]:
# Add employment to trips
zone_dict = sm.zones['employment'].to_dict()
trips['employment'] = trips['origin'].map(zone_dict)

In [11]:
# Load other attraction POIs
pois = pd.read_csv(input_path + 'spatial_num_pois_raw.csv', index_col='index')
cats = pd.read_excel(input_path + 'spatial_OSM_POI_list.xlsx', sheet_name='categories')
cats['label'] = (cats['key'] + ' ' + cats['value'].fillna('')).str.strip()
for category, columns in cats.loc[cats['category'].notna()
                                 ].groupby('category').agg(
                                {'label': list})['label'].items():
    trips[category] = trips['origin'].map(pois[columns].sum(axis=1)).fillna(0)
    sm.zones[category] = sm.zones['FID'].map(pois[columns].sum(axis=1)).fillna(0)

In [12]:
# Put zone data in trips table (population, area, urbanisation)
trips = trips.merge(sm.zones[['population', 'area', 'urbanisation']],
                    how='inner', # drop trips outside of model zones
                    left_on='origin', right_index=True)
trips.shape

(153608, 21)

In [15]:
# Add accessibility
# Load inter-zonal composite cost from mode choice step
cc = pd.read_csv(output_path + scenario + '/mode_choice_od_composite_cost.csv')
cc.set_index(['origin', 'destination'], inplace=True)
# Rename segments to integer values
cc.columns = pd.MultiIndex.from_tuples(
    [(seg.split('_')[0], {'no': 0, 'car': 1}[seg.split('_')[1]])
     for seg in cc.columns],
    names=['purpose_model', 'car_av'])
# Reshape the table into a mergable format
# Use the mean CC to all destinations as accessibility
cc = cc.unstack('destination').stack('purpose_model').stack('car_av').mean(axis=1)
cc = cc.unstack('purpose_model')
# Merge
for p in purposes:
    trips['acc_'+p] = trips.merge(cc[[p]], how='left', left_on=['origin', 'car_av'],
                                  right_index=True)[p]
    # Fill NaN with maximums
    trips['acc_'+p] = trips['acc_'+p].fillna(trips['acc_'+p].max())
    # Inner-zonal does not matter here
    trips.loc[trips['origin']==trips['destination'], 'acc_'+p] = 0

In [20]:
trips = trips.loc[trips.notna().all(axis=1)]
trips.shape

(153608, 27)

In [21]:
# Define a choice variable
trips['choice'] = (trips['origin']!=trips['destination']).astype(int)

In [24]:
# Probabilities of going to another zone
trips.groupby(['purpose_model', 'car_av']).agg({'choice': 'mean'})

Unnamed: 0_level_0,Unnamed: 1_level_0,choice
purpose_model,car_av,Unnamed: 2_level_1
accompany,0,0.241107
accompany,1,0.358839
business,0,0.255747
business,1,0.452851
buy/execute,0,0.183023
buy/execute,1,0.363827
commuting,0,0.239953
commuting,1,0.518339
education,0,0.209677
education,1,0.370278


## Build a choice model

The model is segmented in
* population segments: car availability in household
* trip purposes

The utility formulation for the choice between making a trip within the home zone or to another one is:

$V^{inner}_i= log(\sum_{j\in A} a_{i,j}) \beta^{ATTR}_i + log(\text{POP} / \text{AREA}) \beta^{POP}_i$

$V^{inter}_i=ASC^{inter}_i + ACC \beta^{ACC}_i$

Attraction attributes $a$ are chosen for each demand segment $i$ based on their relevance

In [33]:
# Result containers
betas = pd.DataFrame()
t_tests = pd.DataFrame()
# Save utility functions for later
Vs = {}

car_names = {0: '_no_car', 1: '_car'}
for car in [0,1]:
    # create a database for this population segment
    database = db.Database(car_names[car][1:], trips.loc[trips['car_av']==car].drop(
        ['origin', 'destination', 'purpose_model'], axis=1).copy())
    globals().update(database.variables)
    Vs[car] = {}
    
    # Define parameters
    ASC_0 = ex.Beta('ASC_0', 0, None, None, 1)
    ASC_1 = ex.Beta('ASC_1', 0, None, None, 0)
    b_pop = ex.Beta('b_pop', 0, None, None, 0)
    b_attr = ex.Beta('b_attr', 0, None, None, 0)
    b_acc = ex.Beta('b_acc', 0, None, None, 0)

    # Define the utility formulation by purpose
    Vs[car]['commuting'] = {0:
                            ASC_0
                            + ex.log(1+employment)*b_attr
                            + ex.log((1+population)/area)*b_pop,
                            1:
                            ASC_1
                            + acc_commuting * b_acc}
    Vs[car]['business'] = {0:
                            ASC_0
                            + ex.log(1+employment)*b_attr
                            + ex.log((1+population)/area)*b_pop,
                            1:
                            ASC_1
                            + acc_business * b_acc}
    Vs[car]['education'] = {0:
                            ASC_0
                            + ex.log(1+childcare + school + higher_education)*b_attr
                            + ex.log((1+population)/area)*b_pop,
                            1:
                            ASC_1
                            + acc_education * b_acc}
    Vs[car]['buy/execute'] = {0:
                            ASC_0
                            + ex.log(1+shop + medical + special_shop)*b_attr
                            + ex.log((1+population)/area)*b_pop,
                            1:
                            ASC_1
                            + globals()['acc_buy/execute'] * b_acc}
    Vs[car]['leisure'] = {0:
                            ASC_0
                            + ex.log(1+daily_leisure + holiday + occasional_leisure)*b_attr
                            + ex.log((1+population)/area)*b_pop,
                            1:
                            ASC_1
                            + acc_leisure * b_acc}
    Vs[car]['accompany'] = {0:
                            ASC_0
                            + ex.log(1+medical + school + childcare)*b_attr
                            + ex.log((1+population)/area)*b_pop,
                            1:
                            ASC_1
                            + acc_accompany * b_acc}

    for p in purposes:
        # Run the estimation
        logprob = models.loglogit(Vs[car][p], None, choice)
        formulas = {'loglike': logprob, 'weight': W_GEW} # give weights to the estimator
        model = bio.BIOGEME(database, formulas)
        model.modelName = p + car_names[car]
        model.generate_html = False
        model.generate_pickle = False
        model.saveIterations = False
        results = model.estimate()
        
        # Save results
        betas[model.modelName] = results.getEstimatedParameters()['Value']
        t_tests[model.modelName] = np.abs(results.getEstimatedParameters()['Rob. t-test'])

The sum of the weights (13032.724443093686) is different from the sample size (7498). Multiply the weights by 0.5753209954479892 to reconcile the two.
The sum of the weights (13032.724443093686) is different from the sample size (7498). Multiply the weights by 0.5753209954479892 to reconcile the two.
The sum of the weights (13032.724443093686) is different from the sample size (7498). Multiply the weights by 0.5753209954479892 to reconcile the two.
The sum of the weights (13032.724443093686) is different from the sample size (7498). Multiply the weights by 0.5753209954479892 to reconcile the two.
The sum of the weights (13032.724443093686) is different from the sample size (7498). Multiply the weights by 0.5753209954479892 to reconcile the two.
The sum of the weights (13032.724443093686) is different from the sample size (7498). Multiply the weights by 0.5753209954479892 to reconcile the two.
The sum of the weights (136602.64700843094) is different from the sample size (146102). Multip

In [36]:
# Correct betas for attraction, if negative and insignificant
for seg in segments:
    if betas.loc['b_attr', seg]<0 and t_tests.loc['b_attr', seg]<1.96:
        betas.loc['b_attr', seg] = 0

In [38]:
# Save results
betas.to_csv(input_path + 'estimation_results_inner-inter_betas.csv')
t_tests.to_csv(input_path + 'estimation_results_inner-inter_t_tests.csv')

In [37]:
np.round(betas, 3)

Unnamed: 0,commuting_no_car,business_no_car,education_no_car,buy/execute_no_car,leisure_no_car,accompany_no_car,commuting_car,business_car,education_car,buy/execute_car,leisure_car,accompany_car
ASC_1,-12.887,-9.303,-14.546,0.18,-12.201,-15.644,-8.448,-11.458,-10.713,-9.336,-10.23,-10.045
b_acc,3.91,4.977,1.715,4.435,3.097,0.642,2.3,15.477,7.253,5.857,15.226,3.817
b_attr,0.102,0.053,0.108,0.933,0.0,0.121,0.0,0.0,0.282,0.527,0.656,0.687
b_pop,0.183,0.726,-0.014,0.259,0.467,-0.047,1.294,0.795,0.609,0.718,0.444,0.535


In [35]:
np.round(t_tests, 2)

Unnamed: 0,commuting_no_car,business_no_car,education_no_car,buy/execute_no_car,leisure_no_car,accompany_no_car,commuting_car,business_car,education_car,buy/execute_car,leisure_car,accompany_car
ASC_1,20.87,17.34,34.8,0.11,16.73,52.36,39.12,78.51,81.19,48.89,65.83,64.34
b_acc,289.87,55.43,405.7,10.46,170.58,452.9,189.11,464.02,415.23,251.15,412.48,305.95
b_attr,4.32,0.53,2.94,3.56,1.19,4.24,1.11,0.85,5.79,8.64,18.32,11.23
b_pop,2.48,5.21,0.17,0.53,2.89,0.76,14.8,17.7,16.21,16.98,15.1,14.02


## Calculate choice probabilities in model zones

In [39]:
# Simulate the probabilities from the zones table
inner_probs = pd.DataFrame(index=sm.zones.index)
for p in purposes:
    for car in [0,1]:
        # Add accessibility for this segment to database
        sm.zones['acc_'+p] = sm.zones.merge(cc.xs(car, level='car_av'),
                                            how='left', left_index=True, right_index=True
                                           )[p]
        zones_db = db.Database('zones', sm.zones[
            ['employment', 'population', 'area', 'acc_'+p]
            + list(cats.loc[cats['category'].notna(), 'category'].unique())])
        
        # Simulate
        simulate = {j: models.logit(Vs[car][p], None, int(j))
                    for j in list(trips['choice'].unique())}
        model = bio.BIOGEME(zones_db, simulate)
        model.generate_html = False
        model.generate_pickle = False
        model.saveIterations = False
        probs = model.simulate(theBetaValues=betas[p+car_names[car]].to_dict())
        
        # Calculate generation volumes
        inner_probs[p+car_names[car]] = probs[0]
        #print('{}_{} av. prob. inner: {}'.format(p,car,np.round(probs[0].mean(), 2)))

In [41]:
inner_probs.mean()

commuting_no_car      1.476935e-03
commuting_car         8.870643e-04
business_no_car       8.755168e-04
business_car          8.700239e-04
education_no_car      1.662612e-04
education_car         8.706502e-04
buy/execute_no_car    1.008059e-02
buy/execute_car       8.710829e-04
leisure_no_car        8.714255e-04
leisure_car           8.696684e-04
accompany_no_car      4.311141e-07
accompany_car         8.704971e-04
dtype: float64

## Save results

In [32]:
inner_probs.to_csv(model_path + 'inner-zonal_probabilities.csv')

to_hdf(overwriting): 100%|█████████████████████████████████████████████████████████████| 33/33 [00:08<00:00,  3.80it/s]
