# Project Testing

## Imports

In [19]:
from IPython.core.display_functions import display
from scipy.stats import chi2

# Biogeme
import biogeme.biogeme as bio
from biogeme.models import lognested, logcnl

# Results
from models.logit_lmpc12_model0 import results as res_mod0
from models.logit_lmpc12_model1 import results as res_mod1
from models.logit_lmpc12_model2 import results as res_mod2
from models.logit_lmpc12_model2bis import results as res_mod2bis
from models.logit_lmpc12_model3 import results as res_mod3
from models.logit_lmpc12_model4 import nests, nests_cross

# For the model 4
from models.logit_lmpc12_model3 import V_3 as V
from models.logit_lmpc12_model3 import chosen_alternative as choice
from models.logit_lmpc12_model3 import database



Informations about the data [here](https://transp-or.epfl.ch/documents/technicalReports/CS_LPMC.pdf).

## Base Model (Model 0)

In [2]:
print(res_mod0.print_general_statistics())

Number of estimated parameters:	5
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4682.809
Likelihood ratio test for the init. model:	4497.326
Rho-square for the init. model:	0.324
Rho-square-bar for the init. model:	0.324
Akaike Information Criterion:	9375.618
Bayesian Information Criterion:	9408.204
Final gradient norm:	4.7836E-04
Nbr of threads:	4



In [3]:
display(res_mod0.get_estimated_parameters())

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
beta_fare,-0.218834,0.016079,-13.609492,0.0
beta_travel_time,-4.912494,0.19688,-24.951709,0.0
constant_2,-3.730648,0.109509,-34.066896,0.0
constant_3,-0.330065,0.05525,-5.974008,2.314947e-09
constant_4,-1.076784,0.080742,-13.336125,0.0


## Alternative Specific Constant Model (Model 1)

In [4]:
print(res_mod1.print_general_statistics())

Number of estimated parameters:	8
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4238.158
Likelihood ratio test for the init. model:	5386.629
Rho-square for the init. model:	0.389
Rho-square-bar for the init. model:	0.387
Akaike Information Criterion:	8492.315
Bayesian Information Criterion:	8544.453
Final gradient norm:	5.7375E-03
Nbr of threads:	4



In [5]:
display(res_mod1.get_estimated_parameters())

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
beta_fare,-0.219858,0.019544,-11.249152,0.0
beta_travel_time_1,-8.786268,0.38875,-22.601346,0.0
beta_travel_time_2,-6.713743,0.611134,-10.985718,0.0
beta_travel_time_3,-3.176268,0.247092,-12.854589,0.0
beta_travel_time_4,-6.527898,0.399929,-16.322637,0.0
constant_2,-4.545392,0.202842,-22.408566,0.0
constant_3,-2.526224,0.131769,-19.171661,0.0
constant_4,-2.0074,0.128415,-15.632118,0.0


## Socioeconomic Model (Models 2 and 2 bis)

### Purpose of the Trip (Model 2)

In [6]:
print(res_mod2.print_general_statistics())

Number of estimated parameters:	12
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4199.163
Likelihood ratio test for the init. model:	5464.618
Rho-square for the init. model:	0.394
Rho-square-bar for the init. model:	0.392
Akaike Information Criterion:	8422.325
Bayesian Information Criterion:	8500.532
Final gradient norm:	7.7964E-02
Nbr of threads:	4



In [7]:
display(res_mod2.get_estimated_parameters())

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
beta_fare,-0.232366,0.02009,-11.566286,0.0
beta_travel_time,-7.930956,0.441479,-17.964508,0.0
beta_travel_time_2,-5.32117,0.626051,-8.499582,0.0
beta_travel_time_2_non-work-education-related,-3.83324,0.897029,-4.273261,1.926343e-05
beta_travel_time_3,-2.223139,0.312604,-7.111667,1.146416e-12
beta_travel_time_3_non-work-education-related,-1.890932,0.449656,-4.205281,2.607576e-05
beta_travel_time_4,-5.492683,0.540192,-10.168015,0.0
beta_travel_time_4_non-work-education-related,-1.74592,0.787452,-2.217176,0.02661105
beta_travel_time_non-work-education-related,-1.503396,0.397607,-3.781114,0.0001561283
constant_2,-4.417436,0.200685,-22.011822,0.0


### Purpose of the Trip (Model 2 bis)

In [8]:
print(res_mod2bis.print_general_statistics())

Number of estimated parameters:	11
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4200.876
Likelihood ratio test for the init. model:	5461.192
Rho-square for the init. model:	0.394
Rho-square-bar for the init. model:	0.392
Akaike Information Criterion:	8423.751
Bayesian Information Criterion:	8495.44
Final gradient norm:	8.6083E-02
Nbr of threads:	4



In [9]:
display(res_mod2bis.get_estimated_parameters())

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
beta_fare,-0.228996,0.020099,-11.393563,0.0
beta_travel_time,-8.838747,0.393644,-22.453671,0.0
beta_travel_time_2,-6.881482,0.624008,-11.027877,0.0
beta_travel_time_3,-3.219339,0.249202,-12.918588,0.0
beta_travel_time_4,-6.290598,0.403338,-15.596347,0.0
constant_2,-4.434174,0.246477,-17.990214,0.0
constant_2_non-work-education-related,-0.17428,0.193169,-0.902215,0.366942
constant_3,-2.484461,0.161799,-15.355255,0.0
constant_3_non-work-education-related,-0.064806,0.112226,-0.577458,0.56363
constant_4,-2.447807,0.163013,-15.01607,0.0


## Box Cox Model (Model 3)

In [10]:
print(res_mod3.print_general_statistics())

Number of estimated parameters:	13
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4178.17
Likelihood ratio test for the init. model:	5506.604
Rho-square for the init. model:	0.397
Rho-square-bar for the init. model:	0.395
Akaike Information Criterion:	8382.34
Bayesian Information Criterion:	8467.063
Final gradient norm:	9.9185E-02
Nbr of threads:	4



In [11]:
display(res_mod3.get_estimated_parameters())

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
beta_fare,-0.215776,0.019744,-10.928926,0.0
beta_travel_time,-10.598984,0.806315,-13.144973,0.0
beta_travel_time_2,-4.879018,0.45688,-10.679,0.0
beta_travel_time_2_non-work-education-related,-0.061181,0.236398,-0.258803,0.795787
beta_travel_time_3,-2.766186,0.244909,-11.294735,0.0
beta_travel_time_3_non-work-education-related,-0.160581,0.227404,-0.706148,0.480096
beta_travel_time_4,-4.065288,0.355195,-11.445238,0.0
beta_travel_time_4_non-work-education-related,-0.624018,0.170858,-3.652265,0.00026
beta_travel_time_non-work-education-related,0.522344,0.686213,0.761198,0.446539
constant_2,-11.962546,0.433974,-27.565127,0.0


## Nested Model (Model 4)

In [12]:
logprob_nested = lognested(V, None,nests, choice)
biogeme_nested = bio.BIOGEME(database, logprob_nested)
biogeme_nested.modelName = 'nested_model'
results_nested = biogeme_nested.estimate(recycle=False)
print(results_nested.print_general_statistics())

Number of estimated parameters:	14
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-4167.827
Final log likelihood:	-4167.826
Likelihood ratio test for the init. model:	0.002397314
Rho-square for the init. model:	2.88e-07
Rho-square-bar for the init. model:	-0.00336
Akaike Information Criterion:	8363.651
Bayesian Information Criterion:	8454.892
Final gradient norm:	1.5966E-01
Nbr of threads:	4



In [13]:
display(results_nested.get_estimated_parameters())

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
beta_fare,-0.075537,0.034333,-2.200107,0.027799
beta_travel_time,-8.742886,0.981457,-8.908068,0.0
beta_travel_time_2,-2.320468,0.604684,-3.837492,0.000124
beta_travel_time_2_non-work-education-related,0.092278,0.171094,0.53934,0.589652
beta_travel_time_3,-0.905092,0.425696,-2.126149,0.033491
beta_travel_time_3_non-work-education-related,-0.243102,0.110389,-2.202224,0.02765
beta_travel_time_4,-1.236023,0.601338,-2.055456,0.039835
beta_travel_time_4_non-work-education-related,-0.33065,0.130214,-2.539276,0.011108
beta_travel_time_non-work-education-related,0.111036,0.548394,0.202474,0.839546
constant_2,-8.618824,0.960779,-8.970658,0.0


## Cross Nested Model (Model 4)

In [14]:
logprob_cnl = logcnl(V, None ,nests_cross, choice)
biogeme_cnl = bio.BIOGEME(database, logprob_cnl)
biogeme_cnl.modelName = 'cnl'
results_cross = biogeme_cnl.estimate(recycle=False)
print(results_cross.print_general_statistics())

Number of estimated parameters:	16
Number of free parameters:	15
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-4166.636
Final log likelihood:	-4166.636
Likelihood ratio test for the init. model:	-0
Rho-square for the init. model:	0
Rho-square-bar for the init. model:	-0.00384
Akaike Information Criterion:	8365.271
Bayesian Information Criterion:	8469.546
Final gradient norm:	3.3220E+01
Nbr of threads:	4



In [15]:
display(results_cross.get_estimated_parameters())

Unnamed: 0,Value,Active bound,Rob. Std err,Rob. t-test,Rob. p-value
alpha_car_motorized,0.811472,0.0,0.785803,1.032667,0.3017598
beta_fare,-0.098144,0.0,0.095878,-1.023634,0.3060082
beta_travel_time,-9.02888,0.0,2.540843,-3.553498,0.0003801445
beta_travel_time_2,-2.74943,0.0,2.122208,-1.295552,0.19513
beta_travel_time_2_non-work-education-related,0.053929,0.0,0.365155,0.147688,0.8825891
beta_travel_time_3,-1.142197,0.0,1.000361,-1.141785,0.2535435
beta_travel_time_3_non-work-education-related,-0.284539,0.0,0.124557,-2.284415,0.02234714
beta_travel_time_4,-1.662316,0.0,1.980509,-0.839338,0.4012796
beta_travel_time_4_non-work-education-related,-0.39664,0.0,0.192557,-2.059855,0.03941244
beta_travel_time_non-work-education-related,0.245509,0.0,0.640263,0.383451,0.7013854


## Model Comparison with statistical tests

In [16]:
stat_mod0 = res_mod0.get_general_statistics()
stat_mod1 = res_mod1.get_general_statistics()
stat_mod2 = res_mod2.get_general_statistics()
stat_mod2bis = res_mod2bis.get_general_statistics()
stat_mod3 = res_mod3.get_general_statistics()
stat_nested = results_nested.get_general_statistics()
stat_cross = results_cross.get_general_statistics()

# model 0
log_mod0 = stat_mod0.get("Final log likelihood")[0]
num_param_mod0 = stat_mod0.get("Number of estimated parameters")[0]

# model 1
log_mod1 = stat_mod1.get("Final log likelihood")[0]
num_param_mod1 = stat_mod1.get("Number of estimated parameters")[0]

# model 2
log_mod2= stat_mod2.get("Final log likelihood")[0]
num_param_mod2 = stat_mod2.get("Number of estimated parameters")[0]

# model 2 bis
log_mod2bis = stat_mod2bis.get("Final log likelihood")[0]
num_param_mod2bis = stat_mod2bis.get("Number of estimated parameters")[0]

# model 3
log_mod3 = stat_mod3.get("Final log likelihood")[0]
num_param_mod3= stat_mod3.get("Number of estimated parameters")[0]

# model nested
log_nested = stat_nested.get("Final log likelihood")[0]
num_param_nested = stat_nested.get("Number of estimated parameters")[0]

# model cross-nested
log_cross = stat_cross.get("Final log likelihood")[0]
num_param_cross = stat_cross.get("Number of estimated parameters")[0]

In [17]:
# Model statistics (replace these with actual stats from your data)
models = {
    "mod0": {"log_likelihood": log_mod0, "num_parameters": num_param_mod0},
    "mod1": {"log_likelihood": log_mod1, "num_parameters": num_param_mod1},
    "mod2": {"log_likelihood": log_mod2, "num_parameters": num_param_mod2},
    #"mod2bis": {"log_likelihood": log_mod2bis, "num_parameters": num_param_mod2bis},
    "mod3": {"log_likelihood": log_mod3, "num_parameters": num_param_mod3},
    "nested": {"log_likelihood": log_nested, "num_parameters": num_param_nested},
    "cross": {"log_likelihood": log_cross, "num_parameters": num_param_cross},
}

# List of model names (in the order you want to test)
model_names = list(models.keys())

# Iterate over pairs of consecutive models
for i in range(1, len(model_names)):
    # Current and previous models
    current_model = models[model_names[i]]
    previous_model = models[model_names[i - 1]]
    
    # Extract log-likelihood and parameter counts
    LL_simple = previous_model["log_likelihood"]
    LL_complex = current_model["log_likelihood"]
    df = current_model["num_parameters"] - previous_model["num_parameters"]  # Degrees of freedom
    
    # Statistical test
    test_stat = -2 * (LL_simple - LL_complex)
    p_value = 1 - chi2.cdf(test_stat, df)
    
    # Print the results
    print(f"Comparing {model_names[i - 1]} vs {model_names[i]}:")
    print(f"  Test Statistic: {test_stat:.4f}")
    print(f"  P-value: {p_value:.4f}")
    
    # Interpretation
    if p_value < 0.05:
        print("  The complex model provides a significantly better fit.\n")
    else:
        print("  No significant improvement in the complex model.\n")


Comparing mod0 vs mod1:
  Test Statistic: 889.3028
  P-value: 0.0000
  The complex model provides a significantly better fit.

Comparing mod1 vs mod2:
  Test Statistic: 77.9898
  P-value: 0.0000
  The complex model provides a significantly better fit.

Comparing mod2 vs mod3:
  Test Statistic: 41.9855
  P-value: 0.0000
  The complex model provides a significantly better fit.

Comparing mod3 vs nested:
  Test Statistic: 20.6885
  P-value: 0.0000
  The complex model provides a significantly better fit.

Comparing nested vs cross:
  Test Statistic: 2.3800
  P-value: 0.3042
  No significant improvement in the complex model.



In [18]:
#Comparison Model 1 and Model 2 bis
# Extract log-likelihood and parameter counts
LL_model1 = -4238.158
LL_model2bis = -4200.876
df1 = 3  # Degrees of freedom

# Statistical test
test_stat1 = -2 * (LL_model1 - LL_model2bis)
p_value1 = 1 - chi2.cdf(test_stat1, df1)

# Print the results
print(f"Comparing Model 1 vs Model 2 bis :")
print(f"  Test Statistic: {test_stat1:.4f}")
print(f"  P-value: {p_value1:.4f}")

# Interpretation
if p_value1 < 0.05:
    print("  The complex model provides a significantly better fit.\n")
else:
    print("  No significant improvement in the complex model.\n")


#Comparison Model 2 and Model 2 bis
# Extract log-likelihood and parameter counts
LL_model2 = -4199.162
LL_model2bis = -4200.876
df2 = 1  # Degrees of freedom

# Statistical test
test_stat2 = -2 * (LL_model2 - LL_model2bis)
p_value2 = 1 - chi2.cdf(test_stat2, df2)

# Print the results
print(f"Comparing Model 2 vs Model 2 bis :")
print(f"  Test Statistic: {test_stat2:.4f}")
print(f"  P-value: {p_value2:.4f}")

# Interpretation
if p_value2 < 0.05:
    print("  The complex model provides a significantly better fit.\n")
else:
    print("  No significant improvement in the complex model.\n")


Comparing Model 1 vs Model 2 bis :
  Test Statistic: 74.5640
  P-value: 0.0000
  The complex model provides a significantly better fit.

Comparing Model 2 vs Model 2 bis :
  Test Statistic: -3.4280
  P-value: 1.0000
  No significant improvement in the complex model.

