In [None]:
import pandas as pd
import numpy as np
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
import biogeme.messaging as message
from biogeme.expressions import Beta

# Intra-zonal trips
## Parameter estimation
Assignment of inner-zonal trips is not possible with common methods of transport modelling. A Logit regression based on zonal attributes is required.

In [None]:
input_path = '../input/'
output_path = '../output/'
model_path = '../model/'

### Model formulation
The Logit regression model consists of observable utility functions, one for each mode j:
> V_ij = ASC_ij + b_ac_i * AC_j + b_pop_i * POP + b_cars_i * CARS

With explainatory variables
* AC: accessibility as average distance to and from PT stops in km or binary for car usage
* POP: population density
* CARS: car ownership density
* ROADS: road density in km/km² -- not implemented
* INCOME: household income -- not implemented

Index i marks the demand group. I = {'commuting' (1), 'education' (2), 'shopping/medical' (3), 'official' (4), 'private' (6)}

In [None]:
# Load calibration data set
df = pd.read_csv(input_path + 'transport_demand/calibration_intra-cellular_trips_MiD2017.csv')
print(df.shape)

In [None]:
col_dict = {'mode_model': 'MODE', 'purpose_vp': 'PURPOSE', 'pop_density': 'POP',
            'car_density': 'CARS', 'accessibility_rail': 'AC_RAIL',
            'accessibility_bus': 'AC_BUS', 'accessibility_car': 'AC_CAR',
            'accessibility_walk': 'AC_NM'}
df.rename(columns=col_dict, inplace=True)

In [None]:
# Remove unused columns
df = df[[col for _, col in col_dict.items()]]

In [None]:
# Remove trips where mode is car but the car availability is zero
# because it irritates the MLE algorithm
mask = ((df['MODE']==6) & (df['AC_CAR']==0))
print('Share of car trips dropped: {}. New number of observations is {}'.format(
    len(df.loc[mask])/len(df.loc[df['MODE']==6]), len(df.loc[~mask])))
df = df.loc[~mask]

### Build the calibration model with Biogeme

In [None]:
database = db.Database('MiD', df.copy())
globals().update(database.variables)
database.getSampleSize()

In [None]:
# Define Betas
asc_rail = Beta('asc_rail', 0, None, None, 0)
asc_bus = Beta('asc_bus', 0, None, None, 0)
asc_car = Beta('asc_car', 0, None, None, 1)
asc_nm = Beta('asc_nm', 0, None, None, 0)
b_ac_rail = Beta('b_ac_rail', 0, None, None, 0)
b_pop_rail = Beta('b_pop_rail', 0, None, None, 0)
b_cars_rail = Beta('b_cars_rail', 0, None, None, 0)
b_ac_bus = Beta('b_ac_bus', 0, None, None, 0)
b_pop_bus = Beta('b_pop_bus', 0, None, None, 0)
b_cars_bus = Beta('b_cars_bus', 0, None, None, 0)
b_ac_car = Beta('b_ac_car', 0, None, None, 0)
b_pop_car = Beta('b_pop_car', 0, None, None, 0)
b_cars_car = Beta('b_cars_car', 0, None, None, 0)
b_ac_nm = Beta('b_ac_nm', 0, None, None, 0)
b_pop_nm = Beta('b_pop_nm', 0, None, None, 0)
b_cars_nm = Beta('b_cars_nm', 0, None, None, 0)

In [None]:
# Parameter for the nested logit structure
mu_pt = Beta('mu_pt', 1, 1, 10, 0)

In [None]:
# Utility functions
V_RAIL = asc_rail + b_ac_rail * AC_RAIL + b_pop_rail * POP + b_cars_rail * CARS
V_BUS = asc_bus + b_ac_bus * AC_BUS + b_pop_bus * POP + b_cars_bus * CARS
V_CAR = asc_car + b_ac_car * AC_CAR + b_pop_car * POP + b_cars_car * CARS
V_NM = asc_nm + b_ac_nm * AC_NM + b_pop_nm * POP + b_cars_nm * CARS

In [None]:
# Define level of verbosity
logger = message.bioMessage()
#logger.setSilent()
logger.setWarning()
#logger.setGeneral()
#logger.setDetailed()

In [None]:
# Map modes to utility functions
V = {1:V_RAIL,
     2:V_RAIL,
     4:V_BUS,
     6:V_CAR,
     7:V_NM}

In [None]:
# Map the availability of alternatives with MODE as key
# Except for the car, it is always one
av = {1:1,
      2:1,
      4:1,
      6:AC_CAR,
      7:1}

In [None]:
# Mode nests as tuples with nest name and dictionary where
# alternative IDs are mapped to alpha values. Missing ID's alpha is zero
nests = ((mu_pt, [1,2, 4]), # PT
         (1, [6]), # Car
         (1, [7])) # Non-motorised

In [None]:
# Choose the logarithmic nested logit model
nl = models.lognested(V, av, nests, MODE)

In [None]:
# All purposes
model_nl = bio.BIOGEME(database, nl)
model_nl.modelName = 'NL'
results = model_nl.estimate()

In [None]:
# Write results to a file
writer = pd.ExcelWriter(input_path + 'estimation_results_inner_cell.xlsx', engine='xlsxwriter')

In [None]:
params = results.getEstimatedParameters()
for key, val in results.getGeneralStatistics().items():
    params.loc[key] = [val[0], val[1]] + ['' for i in range(len(params.columns)-2)]
params

In [None]:
params.to_excel(writer, sheet_name=model_nl.modelName)

In [72]:
# Run all purposes
results = []
for p in [1,2,3,4,6]:
    database = db.Database('MiD2017', df.copy())
    database.remove(PURPOSE!=p)
    print('Sample size for purpose {}: {}'.format(p, database.getSampleSize()))
    model = bio.BIOGEME(database, nl) # Choose the model formulation
    model.modelName = 'NL_Fz' + str(p) # Name it
    results.append(model.estimate()) # Estimation
    output = results[-1].getEstimatedParameters()
    # Add results to the Excel file
    for key, val in results[-1].getGeneralStatistics().items():
        output.loc[key] = [val[0], val[1]] + ['' for i in range(len(output.columns)-2)]
    output.to_excel(writer, sheet_name=model.modelName)

Sample size for purpose 1: 42414


biogemeError: [-2.93451366e+00 -3.27530464e+00 -4.46636234e+00 -1.14221702e-06
  3.52581777e-04  4.38561872e-04  1.68587320e-02 -5.22827464e-09
  1.00000000e+00] is not feasible for the bounds [(-2.934513664784777, -2.934513664784776), (-3.275304642076927, -3.275304642076926), (-4.466362344738283, -4.466362344738283), (-1.1422170167982762e-06, -1.1422170161642509e-06), (0.00035258177677297323, 0.0003525817767736073), (0.00043856187182814057, 0.0004385618718287746), (0.016858732002223962, 0.016858732002224593), (-5.228272444991406e-09, -5.228271810966256e-09), (1.0, 1.0000000000000002)]

In [None]:
writer.save()