In [1]:
import sys # for automation and parallelisation
manual, scenario = (True, 'base') if 'ipykernel' in sys.argv[0] else (False, sys.argv[1])

In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm

# Calibration
## Preparation of input data
## Saves mean composite cost for inner-zonal trips

In [3]:
input_path = '../input/'
model_path = '../model/'
output_path = '../output/'

In [4]:
# Load calibration dataset
trips = pd.read_csv(input_path + 'transport_demand/calibration_all_trips_MiD2017.csv')
trips = trips.drop(columns='Unnamed: 0').reset_index(drop=True)
len(trips.index)

254876

In [7]:
# Filter for inner-zonal only
trips = trips.loc[trips['origin']==trips['destination']]
len(trips)

150187

### Composite generalised cost

Calculated for each zone as the logsum of composite cost to every destination zone. Composite cost is the logsum of all available modes. Using the formula from Walker (1977) - see formula 6.10 in Ortúzar and Willumsen (2011) p. 213 - we can assume that the perception of alternatives is depicted properly.

The database should have columns with prices and travel times for each mode of the trip which can be used to calculate utilities of mode choice, given the estimated beta parameters from the mode choice step.

In [8]:
# Load mode choice parameters
beta_time = {} # time in minutes
beta_price = {}
excel = pd.ExcelFile(input_path + 'estimation_results_mode.xlsx')
p_dict_model = {1:'commuting', 2:'business', 3:'education', 4:'shopping', 5:'errands', 6:'leisure', 7:'accompany'}
for p, p_str in p_dict_model.items():
    beta_time[p] = {}
    beta_price[p] = {}
    for car, car_str in {1:'_car', 0:'_no_car'}.items():
        params_est = excel.parse(p_str.replace('/', '-')+car_str, index_col=0)
        beta_time[p][car] = params_est.loc['b_t', 'Value']
        try:
            beta_price[p][car] = params_est.loc['b_c', 'Value']
        except KeyError:
            beta_price[p][car] = 0

In [9]:
# Function to get utility of travel time
def time_ut(time, p, car):
    # Given time in minutes and the segment (purpose, car)
    # The fist part of the spline of the mode choice model is a log-power
    return np.power(np.log(time), 3) * beta_time[p][car]

In [10]:
# Function to get utility of price
def price_ut(price, p, car):
    # Given the price and the segment (purpose, car)
    return price * beta_price[p][car]

In [12]:
#function to calculate composite cost for one destination for all inner-zonal observations
p_dict_model_rev = dict(zip(p_dict_model.values(), p_dict_model.keys()))
def get_inner_cc(d, p, car):
    mask = (trips['origin']==d) & (trips['purpose_model']==p) & (trips['car_avail'].replace({9:0})==car)
    if len(trips.loc[mask]) > 0:
        car_av_s = trips.loc[mask, 'car_avail'].replace({9:0})
        p_s = trips.loc[mask, 'purpose_model'].map(p_dict_model_rev)
        t_rail_s = pd.Series([time_ut(t, p, car) if t!=np.inf else np.nan
                              for t,p,car in zip(trips.loc[mask, 'time_rail_short'], p_s, car_av_s)])
        t_bus_s = pd.Series([time_ut(t, p, car) if t!=np.inf else np.nan
                             for t,p,car in zip(trips.loc[mask, 'time_bus'], p_s, car_av_s)])
        t_car_s = pd.Series([time_ut(t, p, car) if car==1 else np.nan
                             for t,p,car in zip(trips.loc[mask, 'time_car'], p_s, car_av_s)])
        t_non_s = pd.Series([time_ut(t, p, car) if t!=np.inf else np.nan
                             for t,p,car in zip(trips.loc[mask, 'time_walk'], p_s, car_av_s)])
        p_rail_s = pd.Series([price_ut(t, p, car) if t!=np.inf else np.nan
                              for t,p,car in zip(trips.loc[mask, 'cost_rail_short'], p_s, car_av_s)])
        p_bus_s = pd.Series([price_ut(t, p, car) if t!=np.inf else np.nan
                             for t,p,car in zip(trips.loc[mask, 'cost_bus'], p_s, car_av_s)])
        p_car_s = pd.Series([price_ut(t, p, car) if car==1 else np.nan
                             for t,p,car in zip(trips.loc[mask, 'cost_bus'], p_s, car_av_s)])
        cc_d = sum([np.log(np.maximum(4, np.sum(pd.Series([np.exp(-1*(t_rail+p_rail)),
                                             np.exp(-1*(t_bus+p_bus)),
                                             np.exp(-1*(t_car+p_car)),
                                             np.exp(-1*t_non)])
                                                )
                                     ))
                    for t_rail, t_bus, t_car, t_non, p_rail, p_bus, p_car in zip(
                        t_rail_s, t_bus_s, t_car_s, t_non_s, p_rail_s, p_bus_s, p_car_s)
                   ]) / len(trips.loc[mask])
        return cc_d
    else:
        return np.nan

In [13]:
# Calculate CC
origins = list(trips['origin'].unique())
cc = pd.DataFrame(index=origins)
for p in tqdm(list(trips['purpose_model'].unique())):
    for car, car_str in {1:'_car', 0:'_no_car'}.items():
        cc[p+car_str] = pd.Series(index=origins,
                                  data=[get_inner_cc(o, p, car) for o in origins],
                                  dtype=np.float64)
        #cc[p+car_str] = cc[p+car_str].fillna(cc[p+car_str].mean())

100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [07:36<00:00, 76.09s/it]


In [14]:
cc.describe()

Unnamed: 0,buy/execute_car,buy/execute_no_car,commuting_car,commuting_no_car,education_car,education_no_car,leisure_car,leisure_no_car,accompany_car,accompany_no_car,business_car,business_no_car
count,2247.0,285.0,1661.0,122.0,1031.0,52.0,1969.0,226.0,1406.0,59.0,793.0,55.0
mean,3.362196,4.849161,3.200528,1.386294,4.096181,2.828804,2.105372,1.935136,3.872617,4.307274,1.987326,2.356271
std,0.691329,2.171773,0.384798,5.321572e-16,1.215077,0.622975,0.27558,0.376588,0.753932,1.474951,0.258315,0.536175
min,1.925299,2.009875,1.574972,1.386294,2.092351,2.055569,1.571192,1.386294,1.902731,1.80052,1.532196,1.543274
25%,2.792856,3.291623,2.954582,1.386294,3.069591,2.375462,1.884555,1.668061,3.267248,3.507575,1.796729,1.995571
50%,3.20336,4.140688,3.177505,1.386294,3.876979,2.735073,2.051825,1.801126,3.665493,4.10805,1.931529,2.284375
75%,3.973411,5.672564,3.424181,1.386294,5.044899,2.950557,2.326619,2.157916,4.470173,4.933013,2.164555,2.559131
max,5.500265,10.685294,4.568394,1.386294,7.658,4.888317,3.008337,3.251892,6.158476,8.911151,2.88932,3.976674


## Save inner-zonal CC

In [17]:
#assert cc.isna().any(axis=1).max()==0

In [18]:
cc.to_csv(input_path + 'inner-zonal_cc.csv')