In [1]:
import sys # for automation and parallelisation
manual, scenario = (True, 'base') if 'ipykernel' in sys.argv[0] else (False, sys.argv[1])
if manual:
    %matplotlib inline


Bad key text.latex.preview in file C:\Users\marlin.arnz\AppData\Local\Continuum\miniconda3\envs\quetzal\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle, line 123 ('text.latex.preview : False')
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.5.2/matplotlibrc.template
or from the matplotlib source distribution

Bad key mathtext.fallback_to_cm in file C:\Users\marlin.arnz\AppData\Local\Continuum\miniconda3\envs\quetzal\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle, line 155 ('mathtext.fallback_to_cm : True  # When True, use symbols from the Computer Modern')
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.5.2/matplotlibrc.template
or from the matplotlib source distribution

Bad key savefig.jpeg_quality in file C:\Users\marlin.arnz\AppData\Local\Continuum\miniconda3\envs\quetzal\lib\site-packages\matplotlib\mpl-data\stylelib\

In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from quetzal.model import stepmodel
from quetzal.io import excel

# Calibration
## Preparation of input data
## Saves mean composite cost for inner-zonal trips

In [3]:
input_path = '../input/'
model_path = '../model/'
output_path = '../output/'

In [4]:
# Load scenario parameters
params = excel.read_var(file='../input/parameters.xls', scenario=scenario)

In [5]:
# Load calibration dataset
trips = pd.read_csv(input_path + 'transport_demand/calibration_all_trips_MiD2017.csv')
trips = trips.drop(columns='Unnamed: 0').reset_index(drop=True)
len(trips.index)

319867

In [6]:
# Filter for inner-zonal only
trips = trips.loc[trips['origin']==trips['destination']]

### Composite generalised cost

Calculated for each zone as the logsum of composite cost to every destination zone. Composite cost is the logsum of all available modes. Using the formula from Walker (1977) - see formula 6.10 in Ortúzar and Willumsen (2011) p. 213 - we can assume that the perception of alternatives is depicted properly.

The database should have columns with prices and travel times for each mode of the trip which can be used to calculate utilities of mode choice, given the estimated beta parameters from the mode choice step.

In [8]:
# Load mode choice parameters
beta_time = {} # time in minutes
beta_price = {}
excel = pd.ExcelFile(input_path + 'estimation_results.xls')
p_dict_model = {1:'commuting', 2:'business', 3:'education', 4:'buy/execute', 6:'leisure', 7:'accompany'}
for p, p_str in p_dict_model.items():
    beta_time[p] = {}
    beta_price[p] = {}
    for car, car_str in {1:'_car', 0:'_no_car'}.items():
        params_est = excel.parse(p_str.replace('/', '-')+car_str, index_col=0)
        beta_time[p][car] = params_est.loc['b_t', 'Value']
        try:
            beta_price[p][car] = params_est.loc['b_c', 'Value']
        except KeyError:
            beta_price[p][car] = 0

In [9]:
# Function to get utility of travel time
def time_ut(time, p, car):
    # Given time in minutes and the segment (purpose, car)
    # The fist part of the spline of the mode choice model is a log-power
    return np.power(np.log(time), 3) * beta_time[p][car]

In [10]:
# Function to get utility of price
def price_ut(price, p, car):
    # Given the price and the segment (purpose, car)
    return price * beta_price[p][car]

In [25]:
#function to calculate composite cost for one destination for all inner-zonal observations
p_dict_model_rev = dict(zip(p_dict_model.values(), p_dict_model.keys()))
def get_inner_cc(d, p, car):
    mask = (trips['origin']==d) & (trips['purpose_model']==p) & (trips['car_avail'].replace({9:0})==car)
    if len(trips.loc[mask]) > 0:
        car_av_s = trips.loc[mask, 'car_avail'].replace({9:0})
        p_s = trips.loc[mask, 'purpose_model'].map(p_dict_model_rev)
        t_rail_s = pd.Series([time_ut(t, p, car) if t!=np.inf else np.nan
                              for t,p,car in zip(trips.loc[mask, 'time_rail'], p_s, car_av_s)])
        t_bus_s = pd.Series([time_ut(t, p, car) if t!=np.inf else np.nan
                             for t,p,car in zip(trips.loc[mask, 'time_bus'], p_s, car_av_s)])
        t_car_s = pd.Series([time_ut(t, p, car) if car==1 else np.nan
                             for t,p,car in zip(trips.loc[mask, 'time_car'], p_s, car_av_s)])
        t_non_s = pd.Series([time_ut(t, p, car) if t!=np.inf else np.nan
                             for t,p,car in zip(trips.loc[mask, 'time_walk'], p_s, car_av_s)])
        p_rail_s = pd.Series([price_ut(t, p, car) if t!=np.inf else np.nan
                              for t,p,car in zip(trips.loc[mask, 'cost_rail'], p_s, car_av_s)])
        p_bus_s = pd.Series([price_ut(t, p, car) if t!=np.inf else np.nan
                             for t,p,car in zip(trips.loc[mask, 'cost_bus'], p_s, car_av_s)])
        p_car_s = pd.Series([price_ut(t, p, car) if car==1 else np.nan
                             for t,p,car in zip(trips.loc[mask, 'cost_bus'], p_s, car_av_s)])
        cc_d = sum([np.log(np.maximum(4, np.sum(pd.Series([np.exp(-1*(t_rail+p_rail)),
                                         np.exp(-1*(t_bus+p_bus)),
                                         np.exp(-1*(t_car+p_car)),
                                         np.exp(-1*t_non)
                                        ]))))
                    for t_rail, t_bus, t_car, t_non, p_rail, p_bus, p_car in zip(
                        t_rail_s, t_bus_s, t_car_s, t_non_s, p_rail_s, p_bus_s, p_car_s)
                   ]) / len(trips.loc[mask])
        return cc_d
    else:
        return np.nan

In [26]:
# Calculate CC
origins = list(trips['origin'].unique())
cc = pd.DataFrame(index=origins)
for p in tqdm(list(trips['purpose_model'].unique())):
    for car, car_str in {1:'_car', 0:'_no_car'}.items():
        cc[p+car_str] = pd.Series(index=origins,
                                  data=[get_inner_cc(o, p, car) for o in origins],
                                  dtype=np.float64)
        cc[p+car_str] = cc[p+car_str].fillna(cc[p+car_str].mean())

100%|███████████████████████████████████████████████████████████████████████████████████| 6/6 [13:05<00:00, 130.92s/it]


In [34]:
cc.describe()

Unnamed: 0,buy/execute_car,buy/execute_no_car,commuting_car,commuting_no_car,education_car,education_no_car,leisure_car,leisure_no_car,accompany_car,accompany_no_car,business_car,business_no_car
count,1802.0,1802.0,1802.0,1802.0,1802.0,1802.0,1802.0,1802.0,1802.0,1802.0,1802.0,1802.0
mean,5.083086,5.883757,5.337517,1.990056,14.242078,5.730267,3.556122,2.491868,4.168639,1.883649,2.937218,2.390209
std,0.286349,0.97418,0.366774,0.159469,0.337324,0.358077,0.249183,0.33079,0.151677,0.067431,0.22462,0.141427
min,4.573348,2.512268,4.365681,1.386294,13.649253,3.144647,3.139393,1.533854,3.474428,1.386294,2.622341,1.569895
25%,5.003274,5.883757,5.232167,1.990056,14.152795,5.730267,3.442381,2.491868,4.116967,1.883649,2.894076,2.390209
50%,5.072607,5.883757,5.337517,1.990056,14.242078,5.730267,3.546806,2.491868,4.168639,1.883649,2.937218,2.390209
75%,5.126138,5.883757,5.351048,1.990056,14.242078,5.730267,3.587292,2.491868,4.195362,1.883649,2.937218,2.390209
max,15.031021,16.992756,14.374999,7.275105,22.277186,14.209054,8.022295,10.379989,6.833756,3.144314,6.619084,5.32615


## Save inner-zonal CC

In [28]:
assert cc.isna().any(axis=1).max()==0

In [29]:
cc.to_csv(input_path + 'inner-zonal_cc.csv')