Consider the logit model presented below. 

1. Include error components to obtain a model with alternative specific variances. 
2. Estimate the model without any normalization.
3. Identify the scale parameter that must be normalized to zero. 
4. Compare the entries of the variance-covariance matrix for both models.
5. Perform the same analysis when the wrong scale parameter is normalized to zero.

In [1]:
import pandas as pd
import pickle
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.results as res
import biogeme.models as models
from biogeme.expressions import Beta, log, bioDraws, MonteCarlo

# The data

In [2]:
data = pd.read_table('airline.dat')
data.columns

Index(['SubjectId', 'OriginGMT', 'DestinationGMT', 'Direction',
       'q02_TripPurpose', 'q03_WhoPays', 'q11_DepartureOrArrivalIsImportant',
       'q12_IdealDepTime', 'q13_IdealArrTime', 'q14_PartySize', 'q15_Age',
       'q16_Income', 'Cont_Income', 'q17_Gender', 'q19_Occupation',
       'q20_Education', 'AirlineFirstFlight_1', 'AirlineFirstFlight_2',
       'AirlineFirstFlight_3', 'AirlineSecondFlight_1',
       'AirlineSecondFlight_2', 'AirlineSecondFlight_3',
       'AirplaneFirstFlight_1', 'AirplaneFirstFlight_2',
       'AirplaneFirstFlight_3', 'AirplaneSecondFlight_1',
       'AirplaneSecondFlight_2', 'AirplaneSecondFlight_3',
       'DepartureTimeHours_1', 'DepartureTimeHours_2', 'DepartureTimeHours_3',
       'DepartureTimeMins_1', 'DepartureTimeMins_2', 'DepartureTimeMins_3',
       'ArrivalTimeHours_1', 'ArrivalTimeHours_2', 'ArrivalTimeHours_3',
       'ArrivalTimeMins_1', 'ArrivalTimeMins_2', 'ArrivalTimeMins_3',
       'FlyingTimeHours_1', 'FlyingTimeHours_2', 'FlyingTi

In [3]:
database = db.Database('airline',data)
globals().update(database.variables)
exclude = (ArrivalTimeHours_1 == -1)
database.remove(exclude)

# The model 

Variables

In [4]:
chosenAlternative = (
    BestAlternative_1 * 1 +
    BestAlternative_2 * 2 +
    BestAlternative_3 * 3
)
DepartureTimeSensitive = (
    q11_DepartureOrArrivalIsImportant == 1
)
ArrivalTimeSensitive = (
    q11_DepartureOrArrivalIsImportant == 2
)
Missing = (
    (q11_DepartureOrArrivalIsImportant != 1) *
    (q11_DepartureOrArrivalIsImportant != 2)
)
DesiredDepartureTime = q12_IdealDepTime
DesiredArrivalTime = q13_IdealArrTime
ScheduledDelay_1 = (
    DepartureTimeSensitive *
    (DepartureTimeMins_1 - DesiredDepartureTime) +
    ArrivalTimeSensitive *
    (ArrivalTimeMins_1 - DesiredArrivalTime)
)
ScheduledDelay_2  = (
    DepartureTimeSensitive *
    (DepartureTimeMins_2 - DesiredDepartureTime) +
    ArrivalTimeSensitive *
    (ArrivalTimeMins_2 - DesiredArrivalTime)
)
ScheduledDelay_3  = (
    DepartureTimeSensitive *
    (DepartureTimeMins_3 - DesiredDepartureTime) +
    ArrivalTimeSensitive *
    (ArrivalTimeMins_3 - DesiredArrivalTime)
)
Opt1_SchedDelayEarly  = (
    -ScheduledDelay_1 * (ScheduledDelay_1 < 0) / 60
)
Opt2_SchedDelayEarly  = (
    -ScheduledDelay_2 * (ScheduledDelay_2 < 0) / 60
)
Opt3_SchedDelayEarly  = (
    -ScheduledDelay_3 * (ScheduledDelay_3 < 0) / 60
)
Opt1_SchedDelayLate  = (
    ScheduledDelay_1 * (ScheduledDelay_1 > 0) / 60
)
Opt2_SchedDelayLate  = (
    ScheduledDelay_2 * (ScheduledDelay_2 > 0) / 60
)
Opt3_SchedDelayLate  = (
    ScheduledDelay_3 * (ScheduledDelay_3 > 0) / 60
)

Parameters

In [5]:
Constant2 = Beta('Constant2', 0, None, None, 0)
Constant3 = Beta('Constant3', 0, None, None, 0)
Fare = Beta('Fare', 0, None, None, 0)
Legroom = Beta('Legroom', 0, None, None, 0)
SchedDE = Beta('SchedDE', 0, None, None, 0)
SchedDL = Beta('SchedDL', 0, None, None, 0)
Total_TT1 = Beta('Total_TT1', 0, None, None, 0)
Total_TT2 = Beta('Total_TT2', 0, None, None, 0)
Total_TT3 = Beta('Total_TT3', 0, None, None, 0)

Utility functions.

In [6]:
Opt1 = (
    Fare * Fare_1 +
    Legroom * Legroom_1 +
    SchedDE * Opt1_SchedDelayEarly +
    SchedDL * Opt1_SchedDelayLate +
    Total_TT1 * TripTimeHours_1
)
Opt2 = (
    Constant2 +
    Fare * Fare_2 +
    Legroom * Legroom_2 +
    SchedDE * Opt2_SchedDelayEarly +
    SchedDL * Opt2_SchedDelayLate +
    Total_TT2 * TripTimeHours_2
)
Opt3 = (
    Constant3 +
    Fare * Fare_3 +
    Legroom * Legroom_3 +
    SchedDE * Opt3_SchedDelayEarly +
    SchedDL * Opt3_SchedDelayLate +
    Total_TT3 * TripTimeHours_3
)
V = {1: Opt1, 2: Opt2, 3: Opt3}

# Estimation of the logit model

In [7]:
logprob = models.loglogit(V, None, chosenAlternative)
biogeme  = bio.BIOGEME(database, logprob)

In [8]:
results = biogeme.estimate()

In [9]:
print(results.printGeneralStatistics())

Number of estimated parameters:	9
Sample size:	3609
Excluded observations:	0
Init log likelihood:	-2475.905
Final log likelihood:	-2320.447
Likelihood ratio test for the init. model:	310.9157
Rho-square for the init. model:	0.0628
Rho-square-bar for the init. model:	0.0592
Akaike Information Criterion:	4658.894
Bayesian Information Criterion:	4714.615
Final gradient norm:	1.1591E-02
Nbr of threads:	16



In [10]:
betas = results.getEstimatedParameters()
betas

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
Constant2,-1.429221,0.190335,-7.50896,5.950795e-14,0.182916,-7.813539,5.551115e-15
Constant3,-1.640739,0.198969,-8.246209,2.220446e-16,0.192298,-8.532286,0.0
Fare,-0.019294,0.0007,-27.580161,0.0,0.000802,-24.047321,0.0
Legroom,0.225589,0.02497,9.034392,0.0,0.026685,8.453921,0.0
SchedDE,-0.139382,0.015585,-8.943294,0.0,0.016341,-8.529672,0.0
SchedDL,-0.104223,0.012783,-8.15323,4.440892e-16,0.013738,-7.586558,3.28626e-14
Total_TT1,-0.331974,0.072128,-4.602604,4.172414e-06,0.073501,-4.516592,6.284295e-06
Total_TT2,-0.298833,0.069722,-4.286072,1.818597e-05,0.069634,-4.291493,1.774757e-05
Total_TT3,-0.301675,0.070035,-4.307481,1.651244e-05,0.069931,-4.313891,1.604057e-05
