# Nested Logit Model

In [None]:
import pandas as pd
import numpy as np
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
import matplotlib.pyplot as plt

**Import Optima data**

In [None]:
pandas = pd.read_csv("data/optima.dat",sep='\t')
database = db.Database ("data/optima", pandas)

**Use collumn names as variables**

In [None]:
from headers import *

**Exclude some unwanted entries**

In [None]:
exclude = (Choice == -1.)
database.remove(exclude)

**Define some dummy variables**

In [None]:
male = (Gender == 1)
female = (Gender == 2)
unreportedGender = (Gender == -1)

fulltime = (OccupStat == 1)
notfulltime = (OccupStat != 1)

**Rescale some data**

In [None]:
TimePT_scaled = TimePT / 200
TimeCar_scaled = TimeCar / 200
MarginalCostPT_scaled = MarginalCostPT / 10 
CostCarCHF_scaled = CostCarCHF / 10
distance_km_scaled = distance_km / 5

## Let's see what this dataset has

* Estimation of a mode choice behavior model for inhabitants in Switzerland using revealed preference data
* Objective was to show the market potential for combined mobility, especially within agglomerations, taking into account the factors that influence travellers in their choice of transport
* The survey was conducted between 2009 and 2010 for CarPostal, the public transport branch of the Swiss Postal Service
* 1124 completed surveys were collected. For each respondent, cyclic sequences of trips (starting and ending at the same location) are detected and their main transport mode is identified. The resulting data base includes 1906 sequences of trips linked with psychometric indicators and socio-economic attributes of the respondents.

![](img/optima_var1.png)

![](img/optima_var2.png)

And many more variables...

In [None]:
pandas = database.data

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

pandas.describe()

In [None]:
plt.figure(1)
plt.title('Frequency of pairs of trips per day')
data = pandas['frequency']
plt.hist(data, bins=np.arange(min(data), max(data) + 1+1, 1));
plt.xticks(np.arange(4)+1, ('Low', 
                          'Low-middle',
                          'Middle-high',
                          'High'));
plt.xticks(rotation=90);

In [None]:
plt.figure(1)
plt.title('Trips purpose')
data = pandas['TripPurpose']
plt.hist(data, bins=np.arange(min(data), max(data) + 1+1, 1));
plt.xticks(np.arange(5)-1, ('Missing data',
                            '',
                            'Work related', 
                            'Work and leasure related',
                            'Leasure related'));
plt.xticks(rotation=90);

In [None]:
plt.figure(1)
plt.title('Mode of transportation used by the respondent (kid) to go to school')
data = pandas['ModeToSchool']
plt.hist(data, bins=np.arange(min(data), max(data) + 1+1, 1));
plt.xticks([-2,-1,1,2,3,4,5,6,7,8], ('Didn\'t answer',
                            'Missing Data',
                            'Car (passenger)',
                            'Train',
                            'Public Transportation', 
                            'Walking',
                            'Cycling',
                            'Motorbike',
                            'Other',
                            'Multiple modes'));
plt.xticks(rotation=90);

**Define the utility functions**

\begin{align}
V_{PT} & = \beta_{PT} + \beta_{time_{fulltime}} X_{time_{PT}} X_{fulltime} + \beta_{time_{other}} X_{time_{PT}} X_{not\_fulltime} + \beta_{cost} X_{cost_{PT}} \\
V_{car} & = \beta_{car} + \beta_{time_{fulltime}} X_{time_{car}} X_{fulltime} + \beta_{time_{other}} X_{time_{car}} X_{not\_fulltime} + \beta_{cost} X_{cost_{car}} \\
V_{SM} & = \beta_{SM} + \beta_{male} X_{distance} X_{male} + \beta_{female} X_{distance} X_{female} + \beta_{unreported} X_{distance} X_{unreported}
\end{align}

**Create parameters to be estimated**

In [None]:
ASC_CAR              = Beta('ASC_CAR',0,None,None,0)
ASC_PT               = Beta('ASC_PT',0,None,None,1)
ASC_SM               = Beta('ASC_SM',0,None,None,0)
BETA_TIME_FULLTIME   = Beta('BETA_TIME_FULLTIME',0,None,None,0)
BETA_TIME_OTHER      = Beta('BETA_TIME_OTHER',0,None,None,0)
BETA_DIST_MALE       = Beta('BETA_DIST_MALE',0,None,None,0)
BETA_DIST_FEMALE     = Beta('BETA_DIST_FEMALE',0,None,None,0)
BETA_DIST_UNREPORTED = Beta('BETA_DIST_UNREPORTED',0,None,None,0)
BETA_COST            = Beta('BETA_COST',0,None,None,0)

**Define the utility functions**

In [None]:
V_PT = ASC_PT + BETA_TIME_FULLTIME * TimePT_scaled * fulltime + \
       BETA_TIME_OTHER * TimePT_scaled * notfulltime + \
       BETA_COST * MarginalCostPT_scaled
V_CAR = ASC_CAR + \
        BETA_TIME_FULLTIME * TimeCar_scaled * fulltime + \
        BETA_TIME_OTHER * TimeCar_scaled * notfulltime + \
        BETA_COST * CostCarCHF_scaled
V_SM = ASC_SM + \
       BETA_DIST_MALE * distance_km_scaled * male + \
       BETA_DIST_FEMALE * distance_km_scaled * female + \
       BETA_DIST_UNREPORTED * distance_km_scaled * unreportedGender

**Associate utility functions with alternatives and associate availability of alternatives**

In this example all alternatives are available for each individual

In [None]:
V = {0: V_PT,
     1: V_CAR,
     2: V_SM}

av = {0: 1,
      1: 1,
      2: 1}

**Define the nests**

1. Define the nests paramenters
2. List alternatives in nests

In [None]:
MU_NO_CAR = Beta('MU_NO_CAR', 1.,1.,None,0)

CAR_NEST = 1., [1]
NO_CAR_NEST = MU_NO_CAR, [0, 2]

nests = CAR_NEST, NO_CAR_NEST

**Define the model**

In [None]:
logprob = models.lognested (V, av , nests , Choice )

**Define the Biogeme object**

In [None]:
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = "optima_nested_logit"

**Estimate the model**

In [None]:
biogeme.generateHtml = True
biogeme.generatePickle = True

results = biogeme.estimate()

print(f"HTML file:    {results.data.htmlFileName}")
print(f"Pickle file:  {results.data.pickleFileName }")

**Print results**

In [None]:
print("Estimated betas: {}".format(len(results.data.betaValues)))

betas = results.getBetaValues()
for k,v in betas.items():
    print("{}= {}".format(k.ljust(25),v))

**Clean up output files**

In [None]:
import glob, os

result_files = glob.glob(biogeme.modelName+'*')
result_files = [x for x in result_files if x != biogeme.modelName+'.ipynb']
if len(result_files) != 0:
    result_dir = "results"
    print('Moving the following files:')
    for result_file in result_files:
        print('\t',result_file)
        os.rename(result_file, os.path.join(result_dir, result_file))