In [1]:
import sys # for automation and parallelisation
manual, scenario = (True, 'base') if 'ipykernel' in sys.argv[0] else (False, sys.argv[1])

In [2]:
import pandas as pd
import numpy as np
from quetzal.model import stepmodel
from quetzal.io import excel
from scipy.optimize import minimize
from scipy.special import expit  # Sigmoid function

In [3]:
model_path = '../model/' + scenario + '/'
input_path = '../input/'
output_path = '../output/'

In [4]:
# Load parameters for settings
params = excel.read_var(file='../input/parameters.xls', scenario=scenario)

# Calibration: inner/inter-zonal destination choice

## Requires MiD data

## Saves inner-zonal probabilities for zones

Assumptions apply as in `model_generation`

In [5]:
sm = stepmodel.read_json(model_path + 'de_zones')

In [6]:
# Transport demand segments
segments = [s.strip() for s in params['general']['demand_segments'].split(';')]
purposes = [s.split('_')[0] for s in segments][::2]

In [147]:
# Load trips from MiD2017
trips = pd.read_csv(input_path + 'transport_demand/calibration_all_trips_MiD2017.csv',
                    usecols=['purpose_model', 'purpose2', 'car_avail', 'distance',
                             'origin', 'destination', 'P_ID', 'W_GEW'])
trips.rename(columns={'car_avail': 'car_av'}, inplace=True)
trips = trips.loc[trips['car_av'].isin([0,1])]
trips.shape

(254861, 8)

In [148]:
# Drop return trips
trips = trips.loc[~trips['purpose2'].isin([8,9])]
len(trips)

153600

## Add choice attributes

In [149]:
# Add employment to trips
zone_dict = sm.zones['employment'].to_dict()
trips['employment'] = trips['origin'].map(zone_dict)

In [150]:
# Load other attraction POIs
pois = pd.read_csv(input_path + 'spatial_num_pois_raw.csv', index_col='index')
cats = pd.read_excel(input_path + 'spatial_OSM_POI_list.xlsx', sheet_name='categories')
cats['label'] = (cats['key'] + ' ' + cats['value'].fillna('')).str.strip()
for category, columns in cats.loc[cats['category'].notna()
                                 ].groupby('category').agg(
                                {'label': list})['label'].items():
    trips[category] = trips['origin'].map(pois[columns].sum(axis=1)).fillna(0)
    sm.zones[category] = sm.zones['FID'].map(pois[columns].sum(axis=1)).fillna(0)

In [151]:
# Put zone data in trips table (population, area, urbanisation)
trips = trips.merge(sm.zones[['population', 'area', 'urbanisation']],
                    how='inner', # drop trips outside of model zones
                    left_on='origin', right_index=True)
trips.shape

(153600, 21)

In [12]:
# Add accessibility
# Load inter-zonal composite cost from mode choice step
cc = pd.read_csv(output_path + scenario + '/mode_choice_od_composite_cost.csv')
cc.set_index(['origin', 'destination'], inplace=True)
# Rename segments to integer values
cc.columns = pd.MultiIndex.from_tuples(
    [(seg.split('_')[0], {'no': 0, 'car': 1}[seg.split('_')[1]])
     for seg in cc.columns],
    names=['purpose_model', 'car_av'])
# Reshape the table into a mergable format
# Use the mean CC to all destinations as accessibility
cc = cc.unstack('destination').stack('purpose_model').stack('car_av').mean(axis=1)
cc = cc.rename('acc').to_frame()

In [168]:
# Merge
trips['acc'] = trips.set_index(['origin', 'purpose_model', 'car_av']).index.map(cc['acc'])
trips.loc[trips['origin']==trips['destination'], 'acc'] = 0
for p in purposes:
    for car in [0,1]:
        mask = (trips['purpose_model']==p) & (trips['car_av']==car)
        trips.loc[mask, 'acc'] = trips.loc[mask, 'acc'].fillna(trips.loc[mask, 'acc'].max())

In [170]:
trips = trips.loc[trips.notna().all(axis=1)]
trips.shape

(153600, 23)

In [160]:
# Define a choice variable
trips['choice'] = (trips['origin']!=trips['destination']).astype(int)

In [161]:
# Probabilities of making an inner-zonal trip
np.round(1 - trips.groupby(['purpose_model', 'car_av']).apply(
    lambda df: (df['choice'] * df['W_GEW']).mean()), 3)*100

purpose_model  car_av
accompany      0         63.5
               1         63.7
business       0         64.4
               1         62.0
buy/execute    0         68.5
               1         67.9
commuting      0         55.2
               1         49.5
education      0         47.6
               1         61.5
leisure        0         54.1
               1         60.6
dtype: float64

## Build a choice model

The model is segmented in
* population segments: car availability in household
* trip purposes

The utility formulation for the choice between making a trip within the home zone or to another one is:

$V^{inner}_i= log(\sum_{j\in A} a_{i,j}) \beta^{ATTR}_i + log(\text{POP} / \text{AREA}) \beta^{POP}_i$

$V^{inter}_i=ASC^{inter}_i + ACC \beta^{ACC}_i$

Attraction attributes $a$ are chosen for each demand segment $i$ based on their relevance

In [210]:
# Results container
betas = pd.DataFrame(index=['ASC0', 'b_pop', 'b_attr', 'ASC1', 'b_acc'],
                     columns=segments)

# POI categories for utility functions in order of purposes
purpose_categories = [
    ['employment'], ['employment'], ['childcare', 'school', 'higher_education'],
    ['shop', 'medical', 'special_shop'],
    ['daily_leisure', 'occasional_leisure', 'holiday'],
    ['school', 'medical', 'childcare']
]
for p,poi_cats in zip(purposes, purpose_categories):
    for car, car_name in zip([0,1], ['_no_car', '_car']):
        
        # Utility formulation
        data = trips.loc[(trips['car_av']==car) & (trips['purpose_model']==p)].copy()
        data['pop_dens'] = np.log((1 + data['population']) / data['area'])
        data['attr'] = np.log(1 + data[poi_cats].sum(axis=1))
        # Design matrices for the choices
        X0 = np.column_stack([np.zeros(len(data)), data['pop_dens'], data['attr']])  # Choice 0
        X1 = np.column_stack([np.ones(len(data)), data['acc']])  # Choice 1
        
        # Define the log-likelihood function
        def log_likelihood(params, X, y):
            # Parameters for choice 0 and choice 1
            beta0 = params[:X0.shape[1]]
            beta1 = params[X0.shape[1]:]
            # Utility functions
            U0 = np.dot(X0, beta0)
            U1 = np.dot(X1, beta1)
            # Choice probabilities
            # Add a small epsilon to probabilities to avoid log(0)
            epsilon = 1e-10
            P0 = np.clip(expit(U0), epsilon, 1 - epsilon)
            P1 = np.clip(expit(U1), epsilon, 1 - epsilon)
            # Log-likelihood
            ll = np.sum(y * np.log(P1) + (1 - y) * np.log(P0))
            return -ll  # Negative because we will minimize
        
        # Optimize the log-likelihood function
        X = np.hstack([X0, X1])
        y = data['choice']
        initial_params = [0., 1., 1., 0., -1.]
        bounds = [(None,None), (None,None), (0, None), (None,None), (None,0)]
        result = minimize(log_likelihood, initial_params, args=(X, y), method='L-BFGS-B', bounds=bounds)
        #result = minimize(log_likelihood, initial_params, args=(X, y), method='BFGS')
        
        # Output the results
        betas[p+car_name] = result.x
        print("Estimation message for {} (N={}): {}".format(
            p+car_name, len(data), result.message))

Estimation message for commuting_no_car (N=1717): CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL
Estimation message for commuting_car (N=28136): CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL
Estimation message for business_no_car (N=348): CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL
Estimation message for business_car (N=6278): CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL
Estimation message for education_no_car (N=372): CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL
Estimation message for education_car (N=6803): CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL
Estimation message for buy/execute_no_car (N=2415): CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL
Estimation message for buy/execute_car (N=51890): CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL
Estimation message for leisure_no_car (N=2393): CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL
Estimation message for leisure_car (N=38802): CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL
Estimation message for acc

In [211]:
np.round(betas, 2)

Unnamed: 0,commuting_no_car,commuting_car,business_no_car,business_car,education_no_car,education_car,buy/execute_no_car,buy/execute_car,leisure_no_car,leisure_car,accompany_no_car,accompany_car
ASC0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
b_pop,2.46,4.27,2.9,3.95,2.24,3.98,2.74,3.56,2.3,3.12,2.01,3.94
b_attr,1.77,2.24,1.79,2.23,1.5,2.06,2.33,2.63,2.12,2.57,1.67,1.99
ASC1,30.55,34.93,42.28,24.99,30.88,36.38,23.1,36.84,31.29,27.65,0.0,39.87
b_acc,-0.93,0.0,0.0,-0.89,-0.5,0.0,-0.66,-0.86,-0.9,-0.97,-1.0,-0.69


In [212]:
# Save results
betas.to_csv(input_path + 'estimation_results_inner-inter_betas.csv')

## Calculate choice probabilities in model zones

In [213]:
# Simulate the probabilities from the zones table
inner_probs = pd.DataFrame(index=sm.zones.index)
for p,poi_cats in zip(purposes, purpose_categories):
    for car, car_name in zip([0,1], ['_no_car', '_car']):
        # Add accessibility for this segment to database
        sm.zones['acc'] = sm.zones.index.map(cc.xs(car, level='car_av').xs(p, level='purpose_model')['acc'])
        sm.zones['pop_dens'] = np.log((1 + sm.zones['population']) / sm.zones['area'])
        sm.zones['attr'] = np.log(1 + sm.zones[poi_cats].sum(axis=1))
        # Design matrices for the choices
        X0 = np.column_stack([np.zeros(len(sm.zones)), sm.zones['pop_dens'], sm.zones['attr']])
        X1 = np.column_stack([np.ones(len(sm.zones)), sm.zones['acc']])
        X = np.hstack([X0, X1])
        sm.zones[p+car_name] = np.dot(X, betas[p+car_name])

In [227]:
# Inner-zonal probabilities
sm.zones[segments].mean()

commuting_no_car      inf
commuting_car         inf
business_no_car       inf
business_car          inf
education_no_car      inf
education_car         inf
buy/execute_no_car    inf
buy/execute_car       inf
leisure_no_car        inf
leisure_car           inf
accompany_no_car      inf
accompany_car         inf
dtype: float64

## Save results

In [24]:
inner_probs.to_csv(model_path + 'inner-zonal_probabilities.csv')