# The nested logit model

## Specification of the utility functions

In [1]:
import pandas as pd
import biogeme.biogeme as bio
import biogeme.database as db
import biogeme.models as models
import biogeme.optimization as opt
from biogeme.expressions import Beta, log
from scipy.stats import chi2

In [2]:
url = 'https://raw.githubusercontent.com/michelbierlaire/mooc-discrete-choice/master/swissmetro.dat'
df = pd.read_csv(url, sep='\t')
database = db.Database('swissmetro', df)

# The following statement allows you to use the names of the
# variable as Python variable.
globals().update(database.variables)

In [3]:
# Removing some observations
exclude = CHOICE == 0
database.remove(exclude)

# Dummy variables variables for segmentation
age_00_24 = AGE == 1
age_25_39 = AGE == 2
age_40_54 = AGE == 3
age_55_65 = AGE == 4
age_65_plus = AGE == 5


female = 1 - MALE
male = MALE
noGA = GA == 0

FIRST_CLASS = FIRST
SECOND_CLASS = FIRST == 0

# Parameters to be estimated
ASC_CAR_MALE = Beta('ASC_CAR_MALE', 0, None, None, 0)
ASC_CAR_FEMALE = Beta('ASC_CAR_FEMALE', 0, None, None, 0)
ASC_CAR = ASC_CAR_MALE * male + ASC_CAR_FEMALE * female

ASC_TRAIN_MALE = Beta('ASC_TRAIN_MALE', 0, None, None, 0)
ASC_TRAIN_FEMALE = Beta('ASC_TRAIN_FEMALE', 0, None, None, 0)
ASC_TRAIN = ASC_TRAIN_MALE * male + ASC_TRAIN_FEMALE * female

B_TIME_CAR = Beta('B_TIME_CAR', 0, None, None, 0)

B_TIME_TRAIN_GA = Beta('B_TIME_TRAIN_GA', 0, None, None, 0)
B_TIME_TRAIN_noGA = Beta('B_TIME_TRAIN_noGA', 0, None, None, 0)
B_TIME_TRAIN = B_TIME_TRAIN_GA * GA + B_TIME_TRAIN_noGA * noGA

B_TIME_SM_GA = Beta('B_TIME_SM_GA', 0, None, None, 0)
B_TIME_SM_noGA = Beta('B_TIME_SM_noGA', 0, None, None, 0)
B_TIME_SM = B_TIME_SM_GA * GA + B_TIME_SM_noGA * noGA

B_COST_CAR_FIRST = Beta('B_COST_CAR_FIRST', 0, None, None, 0)
B_COST_CAR_SECOND = Beta('B_COST_CAR_SECOND', 0, None, None, 0)
B_COST_CAR = B_COST_CAR_FIRST * FIRST_CLASS + B_COST_CAR_SECOND * SECOND_CLASS

B_COST_TRAIN_FIRST = Beta('B_COST_TRAIN_FIRST', 0, None, None, 0)
B_COST_TRAIN_SECOND = Beta('B_COST_TRAIN_SECOND', 0, None, None, 0)
B_COST_TRAIN = B_COST_TRAIN_FIRST * FIRST_CLASS + B_COST_TRAIN_SECOND * SECOND_CLASS

B_COST_SM_FIRST = Beta('B_COST_SM_FIRST', 0, None, None, 0)
B_COST_SM_SECOND = Beta('B_COST_SM_SECOND', 0, None, None, 0)
B_COST_SM = B_COST_SM_FIRST * FIRST_CLASS + B_COST_SM_SECOND * SECOND_CLASS

B_HEADWAY_TRAIN_00_24 = Beta('B_HEADWAY_TRAIN_00_24', 0, None, None, 0)
B_HEADWAY_TRAIN_25_39 = Beta('B_HEADWAY_TRAIN_25_39', 0, None, None, 0)
B_HEADWAY_TRAIN_40_54 = Beta('B_HEADWAY_TRAIN_40_54', 0, None, None, 0)
B_HEADWAY_TRAIN_55_65 = Beta('B_HEADWAY_TRAIN_55_65', 0, None, None, 0)
B_HEADWAY_TRAIN_65_plus = Beta('B_HEADWAY_TRAIN_65_plus', 0, None, None, 0)

B_HEADWAY_TRAIN = B_HEADWAY_TRAIN_00_24 * age_00_24 + \
    B_HEADWAY_TRAIN_25_39 * age_25_39 + \
    B_HEADWAY_TRAIN_40_54 * age_40_54 + \
    B_HEADWAY_TRAIN_55_65 * age_55_65 + \
    B_HEADWAY_TRAIN_65_plus * age_65_plus

B_HEADWAY_SM_00_24 = Beta('B_HEADWAY_SM_00_24', 0, None, None, 0)
B_HEADWAY_SM_25_39 = Beta('B_HEADWAY_SM_25_39', 0, None, None, 0)
B_HEADWAY_SM_40_54 = Beta('B_HEADWAY_SM_40_54', 0, None, None, 0)
B_HEADWAY_SM_55_65 = Beta('B_HEADWAY_SM_55_65', 0, None, None, 0)
B_HEADWAY_SM_65_plus = Beta('B_HEADWAY_SM_65_plus', 0, None, None, 0)

B_HEADWAY_SM = B_HEADWAY_SM_00_24 * age_00_24 + \
    B_HEADWAY_SM_25_39 * age_25_39 + \
    B_HEADWAY_SM_40_54 * age_40_54 + \
    B_HEADWAY_SM_55_65 * age_55_65 + \
    B_HEADWAY_SM_65_plus * age_65_plus

# Definition of new variables
SM_COST = SM_CO * (GA == 0)
TRAIN_COST = TRAIN_CO * (GA == 0)
TRAIN_TT_SCALED = TRAIN_TT / 60
TRAIN_COST_SCALED = TRAIN_COST / 100
SM_TT_SCALED = SM_TT / 60
SM_COST_SCALED = SM_COST / 100
CAR_TT_SCALED = CAR_TT / 60
CAR_COST_SCALED = CAR_CO / 100

TRAIN_HE_SCALED = TRAIN_HE / 60
SM_HE_SCALED = SM_HE / 60

def piecewise_cost(x):
    """
    Piecewise linear transformation of the variable
    """
    piecewiseVariables = models.piecewiseVariables(x, [0, 0.5, 1, 1.75, None])
    return (piecewiseVariables[0] +
            Beta('pw_cost_0.5_1', 0, None, None, 0) * piecewiseVariables[1] +
            Beta('pw_cost_1_1.75', 0, None, None, 0) * piecewiseVariables[2] +
            Beta('pw_cost_1.75_more', 0, None, None, 0) * piecewiseVariables[3])

# Definition of the utility functions
V1 = ASC_TRAIN + \
     B_TIME_TRAIN * log(TRAIN_TT_SCALED) + \
     B_COST_TRAIN * piecewise_cost(TRAIN_COST_SCALED) + \
     B_HEADWAY_TRAIN * TRAIN_HE**0.5

V2 = B_TIME_SM * log(SM_TT_SCALED) + \
     B_COST_SM * piecewise_cost(SM_COST_SCALED) + \
     B_HEADWAY_SM * SM_HE**0.5

V3 = ASC_CAR + \
     B_TIME_CAR * log(CAR_TT_SCALED) + \
     B_COST_CAR * piecewise_cost(CAR_COST_SCALED)

# Associate utility functions with the numbering of alternatives
V = {1: V1,
     2: V2,
     3: V3}

# Associate the availability conditions with the alternatives
av = {1: TRAIN_AV,
      2: SM_AV,
      3: CAR_AV}

## Estimation of a logit model

In [4]:
logprob = models.loglogit(V, av, CHOICE)
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'logit'
logit_results = biogeme.estimate(algorithm=opt.bioNewton)

In [5]:
print(logit_results.shortSummary())

Results for model logit
Nbr of parameters:		28
Sample size:			10719
Excluded data:			9
Final log likelihood:		-7645.798
Akaike Information Criterion:	15347.6
Bayesian Information Criterion:	15551.43



In [6]:
logit_results.getEstimatedParameters()

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
ASC_CAR_FEMALE,-1.182429,0.145389,-8.132874,4.440892e-16,0.1562,-7.569989,3.730349e-14
ASC_CAR_MALE,-0.945398,0.138657,-6.818248,9.215739e-12,0.146449,-6.455476,1.07879e-10
ASC_TRAIN_FEMALE,1.422539,0.17967,7.917518,2.442491e-15,0.182264,7.804825,5.995204e-15
ASC_TRAIN_MALE,0.749154,0.174422,4.295066,1.746411e-05,0.17767,4.216539,2.480808e-05
B_COST_CAR_FIRST,-1.527848,0.223407,-6.838866,7.982282e-12,0.221335,-6.902889,5.09548e-12
B_COST_CAR_SECOND,-0.777313,0.15758,-4.932826,8.104831e-07,0.154176,-5.041709,4.613932e-07
B_COST_SM_FIRST,-2.21204,0.316082,-6.998319,2.590594e-12,0.335585,-6.591586,4.351519e-11
B_COST_SM_SECOND,-2.016679,0.273315,-7.378586,1.598721e-13,0.29648,-6.802078,1.03122e-11
B_COST_TRAIN_FIRST,-2.45676,0.342127,-7.18085,6.927792e-13,0.366677,-6.700071,2.083178e-11
B_COST_TRAIN_SECOND,-1.884222,0.269872,-6.981918,2.911671e-12,0.291028,-6.474359,9.521539e-11


## Nested logit

There are three possibilities to partition the choice set:

- [Car, Train]  and [Swissmetro],
- [Train, Swissmetro] and [Car],
- [Car, Swissmetro] and [Train].

The first one groups existing alternatives together. The second one groups public transportation modes together. The third one being less intuitive, we select the two first specifications. 


### Nested logit: existing alternatives

In [7]:
MU = Beta('MU', 1, 0, None, 0)
existing = MU, [1, 3]
future = 1.0, [2]
nests = existing, future
logprob = models.lognested(V, av, nests, CHOICE)
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'nested_existing'
nested_existing_results = biogeme.estimate(algorithm=opt.bioNewton)

In [8]:
print(nested_existing_results.shortSummary())

Results for model nested_existing
Nbr of parameters:		29
Sample size:			10719
Excluded data:			9
Final log likelihood:		-7640.153
Akaike Information Criterion:	15338.31
Bayesian Information Criterion:	15549.42



In [9]:
nested_existing_table = nested_existing_results.getEstimatedParameters()
nested_existing_table

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
ASC_CAR_FEMALE,-0.990797,0.14515,-6.826006,8.731016e-12,0.156796,-6.319022,2.632246e-10
ASC_CAR_MALE,-0.797239,0.134885,-5.910497,3.410778e-09,0.14271,-5.586415,2.318049e-08
ASC_TRAIN_FEMALE,1.296326,0.175505,7.38628,1.509903e-13,0.179873,7.206912,5.72431e-13
ASC_TRAIN_MALE,0.703357,0.166891,4.214466,2.503702e-05,0.170435,4.126832,3.677945e-05
B_COST_CAR_FIRST,-1.322511,0.216515,-6.108171,1.007793e-09,0.220086,-6.009059,1.866031e-09
B_COST_CAR_SECOND,-0.722826,0.146262,-4.942,7.732512e-07,0.143131,-5.050099,4.415809e-07
B_COST_SM_FIRST,-1.8114,0.301255,-6.01285,1.822895e-09,0.325482,-5.565279,2.617328e-08
B_COST_SM_SECOND,-1.694546,0.264149,-6.415107,1.407241e-10,0.285854,-5.928023,3.066037e-09
B_COST_TRAIN_FIRST,-1.902173,0.327262,-5.812381,6.159037e-09,0.35943,-5.29219,1.208604e-07
B_COST_TRAIN_SECOND,-1.485451,0.255247,-5.81966,5.896758e-09,0.278323,-5.337157,9.441514e-08


The nested parameter is greater than one, consistently with the theory.

In [10]:
mu = nested_existing_table.loc['MU','Value']
mu

1.1885149822854972

If we test the null hypothesis that the true value of MU is 1, we use a $t$-test:

In [11]:
mu_stderr = nested_existing_table.loc['MU', 'Rob. Std err']
tested_value = 1
ttest = (tested_value - mu) / mu_stderr
ttest

-2.7645793685745135

Therefore, we can reject the null hypothesis at the 5% level. It means that we reject logit.

We can also test the null hypothesis that the two models are equivalent using a likelihood ratio test: 

In [12]:
LL_logit = logit_results.data.logLike
LL_nested_existing = nested_existing_results.data.logLike
LR = -2 * (LL_logit - LL_nested_existing)
LR

11.290592779776489

Number of degrees of freedom:

In [13]:
dof = nested_existing_results.data.nparam - logit_results.data.nparam
dof

1

The threshold value of the $\chi$-square test with one degree of freedom at 5% level is:

In [14]:
chi2.isf(0.05, dof)

3.8414588206941285

Therefore, the null hypothesis can be rejected, and the nested logit model is preferred. 

### Nested logit: public transportation modes


In [15]:
MU = Beta('MU', 1, 0, None, 0)
public = MU, [1, 2]
private = 1.0, [3]
nests = public, private
logprob = models.lognested(V, av, nests, CHOICE)
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'nested_public'
nested_public_results = biogeme.estimate(algorithm=opt.bioNewton)

In [16]:
nested_public_table = nested_public_results.getEstimatedParameters()
nested_public_table

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
ASC_CAR_FEMALE,-0.646488,0.19996,-3.23308,0.001224634,0.271346,-2.382528,0.01719424
ASC_CAR_MALE,-0.508096,0.17903,-2.838045,0.004539074,0.233262,-2.178222,0.02938949
ASC_TRAIN_FEMALE,2.278088,0.339101,6.718027,1.842015e-11,0.39066,5.831384,5.496938e-09
ASC_TRAIN_MALE,0.999636,0.288238,3.468092,0.0005241678,0.296444,3.372088,0.0007460058
B_COST_CAR_FIRST,-1.179124,0.239587,-4.921495,8.588569e-07,0.261515,-4.508825,6.518761e-06
B_COST_CAR_SECOND,-0.680224,0.158576,-4.289592,1.790015e-05,0.156337,-4.351012,1.355108e-05
B_COST_SM_FIRST,-1.57612,0.326593,-4.825943,1.393423e-06,0.370333,-4.255956,2.081573e-05
B_COST_SM_SECOND,-1.5334,0.295595,-5.187503,2.131322e-07,0.323589,-4.738733,2.150588e-06
B_COST_TRAIN_FIRST,-1.870259,0.392528,-4.764653,1.89179e-06,0.485269,-3.854068,0.0001161714
B_COST_TRAIN_SECOND,-1.256637,0.328303,-3.827678,0.0001293576,0.454013,-2.767844,0.005642849


The nest parameter is less than 1. This is inconsistent with the theory. The model is rejected.

In [17]:
nested_public_table.loc['MU','Value']

0.4954951641030181

In conclusion, among the three models, the nested logit model where the existing alternatives are in the same nest is preferred. 