In [None]:
#check BIOGEME version
import biogeme.version as ver
print("Biogeme version:", ver.getVersion())

#Import necessary libraries
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme import models
from biogeme.expressions import Beta, Variable
from tqdm.autonotebook import tqdm

import numpy as np
import math as math
import scipy.stats as stats

In [None]:
# Load the database
df = pd.read_csv("Telephone2.csv", sep=',') # Check the path to your CSV file
# Check missing values
print(df.isnull().sum())

In [None]:
# Define the BIOGEME-specific database
database = db.Database("Telephone", df)
#print the BIOGEME-specific database
database._df

In [None]:
"""
Use the loop below to get all of columns names.
for col in df.columns:
    print(f'{col} = Variable("{col}")')

1. The code will generate the Variable for each column.    
2. Copy the output and paste it below to define the variables.
"""
for col in df.columns:
    print(f'{col} = Variable("{col}")')

In [None]:
#Define the variables
choice = Variable("choice")
area = Variable("area")
users = Variable("users")
inc = Variable("inc")
age0 = Variable("age0")
age1 = Variable("age1")
age2 = Variable("age2")
age3 = Variable("age3")
age4 = Variable("age4")
age5 = Variable("age5")
age6 = Variable("age6")
age7 = Variable("age7")
status = Variable("status")
employ = Variable("employ")
cost1 = Variable("cost1")
cost2 = Variable("cost2")
avail1 = Variable("avail1")
avail2 = Variable("avail2")
ID = Variable("ID")
# REMEMBER to define new variables if you have modified the dataset!

## MODEL 1

In [None]:
# Define beta values to be estimated
## Beta('name', initial value, lower bound, upper bound, reference)
## Reference is used to set the reference alternative in a logit model (1 = yes, 0 = no)
asc_budget = Beta('asc_budget', 0, None, None, 0)
asc_std = Beta('asc_std', 0, None, None, 1) # reference alternative
b_cost = Beta('b_cost', 0, None, None, 0)

# Utility functions
V_budget = asc_budget + b_cost * cost1
V_std = asc_std + b_cost * cost2

# Dictionary defining alternatives mapping
V = {1: V_budget, 2: V_std}

# Dictionary defining availability mapping
av = {1: avail1, 2: avail2}

# Define the choice model
# loglogit(Alternatives mapping, Availability mapping, Choice variable)
logprob= models.loglogit(V, av, choice)

# Estimate Model
#the_biogeme = bio.BIOGEME(database, logprob)
the_biogeme = bio.BIOGEME(database, logprob)
the_biogeme.model_name = 'MODEL 1' # Set the model name

#Calculate null Loglikelihood
the_biogeme.calculate_null_loglikelihood(av)

# Save the estimation results
model_1_results = the_biogeme.estimate()

#Print the results
print(model_1_results.short_summary())
model_1_results.get_estimated_parameters()

## MODEL 2

We need to add a new column for log(cost) since we cannot put math.log() directly in the BIOGEME specification.

In [None]:
import numpy as np
# Update the dataset with log-transformed cost variables
df['log_cost1'] = np.log(df['cost1'])
df['log_cost2'] = np.log(df['cost2'])
# Update the BIOGEME-specific database
database = db.Database("ModeChoice", df)
database._df
#update Variables
log_cost1 = Variable("log_cost1")
log_cost2 = Variable("log_cost2")

In [None]:
# Define beta values to be estimated
## Beta('name', initial value, lower bound, upper bound, reference)
## Reference is used to set the reference alternative in a logit model (1 = yes, 0 = no)
asc_budget = Beta('asc_budget', 0, None, None, 0)
asc_std = Beta('asc_std', 0, None, None, 1) # reference alternative
b_cost = Beta('b_cost', 0, None, None, 0)

# Utility functions
V_budget = asc_budget + b_cost * log_cost1
V_std = asc_std + b_cost * log_cost2

# Dictionary defining alternatives mapping
V = {1: V_budget, 2: V_std}

# Dictionary defining availability mapping
av = {1: avail1, 2: avail2}

# Define the choice model
# loglogit(Alternatives mapping, Availability mapping, Choice variable)
logprob= models.loglogit(V, av, choice)

# Estimate Model
#the_biogeme = bio.BIOGEME(database, logprob)
the_biogeme = bio.BIOGEME(database, logprob)
the_biogeme.model_name = 'MODEL 2' # Set the model name

#Calculate null Loglikelihood
the_biogeme.calculate_null_loglikelihood(av)

# Save the estimation results
model_2_results = the_biogeme.estimate()

#Print the results
print(model_2_results.short_summary())
model_2_results.get_estimated_parameters()

## MODEL 3

In [None]:
# Define beta values to be estimated
## Beta('name', initial value, lower bound, upper bound, reference)
## Reference is used to set the reference alternative in a logit model (1 = yes, 0 = no)
asc_budget = Beta('asc_budget', 0, None, None, 0)
asc_std = Beta('asc_std', 0, None, None, 1) # reference alternative
b_cost = Beta('b_cost', 0, None, None, 0)
b_users = Beta('b_users', 0, None, None, 0)
b_inc_2 = Beta('b_inc_2', 0, None, None, 0)
b_inc_3 = Beta('b_inc_3', 0, None, None, 0)
b_inc_4 = Beta('b_inc_4', 0, None, None, 0)
b_inc_5 = Beta('b_inc_5', 0, None, None, 0)

# Utility functions
V_budget = (asc_budget + 
            b_cost * cost1+
            b_users * users +
            b_inc_2 * (inc == 2) +
            b_inc_3 * (inc == 3) +
            b_inc_4 * (inc == 4) + 
            b_inc_5 * (inc == 5)
            )
V_std = (asc_std + b_cost * cost2)

# Dictionary defining alternatives mapping
V = {1: V_budget, 2: V_std}

# Dictionary defining availability mapping
av = {1: avail1, 2: avail2}

# Define the choice model
# loglogit(Alternatives mapping, Availability mapping, Choice variable)
logprob= models.loglogit(V, av, choice)

# Estimate Model
#the_biogeme = bio.BIOGEME(database, logprob)
the_biogeme = bio.BIOGEME(database, logprob)
the_biogeme.model_name = 'MODEL 3' # Set the model name

#Calculate null Loglikelihood
the_biogeme.calculate_null_loglikelihood(av)

# Save the estimation results
model_3_results = the_biogeme.estimate()

#Print the results
print(model_3_results.short_summary())
model_3_results.get_estimated_parameters()

## MODEL 4

In [None]:
# Define beta values to be estimated
## Beta('name', initial value, lower bound, upper bound, reference)
## Reference is used to set the reference alternative in a logit model (1 = yes, 0 = no)
asc_budget = Beta('asc_budget', 0, None, None, 0)
asc_std = Beta('asc_std', 0, None, None, 1) # reference alternative
b_cost = Beta('b_cost', 0, None, None, 0)
b_users = Beta('b_users', 0, None, None, 0)
b_inc_2 = Beta('b_inc_2', 0, None, None, 0)
b_inc_3 = Beta('b_inc_3', 0, None, None, 0)
b_inc_4 = Beta('b_inc_4', 0, None, None, 0)
b_inc_5 = Beta('b_inc_5', 0, None, None, 0)
b_status = Beta('b_status', 0, None, None, 0)

# Utility functions
V_budget = (asc_budget + 
            b_cost * cost1+
            b_users * users +
            b_inc_2 * (inc == 2) +
            b_inc_3 * (inc == 3) +
            b_inc_4 * (inc == 4) + 
            b_inc_5 * (inc == 5) +
            b_status * (status==1)
            )
V_std = (asc_std + b_cost * cost2)

# Dictionary defining alternatives mapping
V = {1: V_budget, 2: V_std}

# Dictionary defining availability mapping
av = {1: avail1, 2: avail2}

# Define the choice model
# loglogit(Alternatives mapping, Availability mapping, Choice variable)
logprob= models.loglogit(V, av, choice)

# Estimate Model
#the_biogeme = bio.BIOGEME(database, logprob)
the_biogeme = bio.BIOGEME(database, logprob)
the_biogeme.model_name = 'MODEL 4' # Set the model name

#Calculate null Loglikelihood
the_biogeme.calculate_null_loglikelihood(av)

# Save the estimation results
model_4_results = the_biogeme.estimate()

#Print the results
print(model_4_results.short_summary())
model_4_results.get_estimated_parameters()

## MODEL 5

In [None]:
# Define beta values to be estimated
## Beta('name', initial value, lower bound, upper bound, reference)
## Reference is used to set the reference alternative in a logit model (1 = yes, 0 = no)
asc_budget = Beta('asc_budget', 0, None, None, 0)
asc_std = Beta('asc_std', 0, None, None, 1) # reference alternative
b_cost = Beta('b_cost', 0, None, None, 0)
b_users = Beta('b_users', 0, None, None, 0)
b_inc_2 = Beta('b_inc_2', 0, None, None, 0)
b_inc_3 = Beta('b_inc_3', 0, None, None, 0)
b_inc_4 = Beta('b_inc_4', 0, None, None, 0)
b_inc_5 = Beta('b_inc_5', 0, None, None, 0)
b_status = Beta('b_status', 0, None, None, 0)
b_area = Beta('b_area', 0, None, None, 0)  

# Utility functions
V_budget = (asc_budget + 
            b_cost * cost1+
            b_users * users +
            b_inc_2 * (inc == 2) +
            b_inc_3 * (inc == 3) +
            b_inc_4 * (inc == 4) + 
            b_inc_5 * (inc == 5) +
            b_status * (status==1) +
            b_area * ( (area==2) | (area==1)) 
            )
V_std = (asc_std + b_cost * cost2)

# Dictionary defining alternatives mapping
V = {1: V_budget, 2: V_std}

# Dictionary defining availability mapping
av = {1: avail1, 2: avail2}

# Define the choice model
# loglogit(Alternatives mapping, Availability mapping, Choice variable)
logprob= models.loglogit(V, av, choice)

# Estimate Model
#the_biogeme = bio.BIOGEME(database, logprob)
the_biogeme = bio.BIOGEME(database, logprob)
the_biogeme.model_name = 'MODEL 5' # Set the model name

#Calculate null Loglikelihood
the_biogeme.calculate_null_loglikelihood(av)

# Save the estimation results
model_5_results = the_biogeme.estimate()

#Print the results
print(model_5_results.short_summary())
model_5_results.get_estimated_parameters()

In [None]:
# Define LR-Test and BST-Test here

def LR_test(model_1, model_2):
    #retrieve log-likelihoods
    
    if model_1.number_of_parameters == model_2.number_of_parameters: 
        raise ValueError("Both models have the same number of parameters. LR test is not applicable.")
    model_simple = model_1 if model_1.number_of_parameters < model_2.number_of_parameters else model_2
    model_complex = model_2 if model_1.number_of_parameters < model_2.number_of_parameters else model_1
    
    LL_simple = model_simple.final_loglikelihood
    LL_complex = model_complex.final_loglikelihood
    #compute LR statistic
    LR = -2 * (LL_simple - LL_complex)
    #degrees of freedom
    q = model_complex.number_of_parameters - model_simple.number_of_parameters
    #compute critical value and p-value
    critvalue = float(stats.chi2.ppf(0.95, q))
    p_value = float(1 - stats.chi2.cdf(LR, q))
    result = lambda x: "Reject the null hypothesis: The" + model_complex.model_name + " provides a significantly better fit." if LR > critvalue else "Fail to reject the null hypothesis: The " + model_simple.model_name + " is sufficient."
    return {
        "LR_statistic": LR,
        "Degrees_of_freedom": q,
        "Critical_value_5%": critvalue,
        "p_value": p_value,
        "Reject_null": result(LR)
    }


def BST_Test(model_1,model_2):
    L0 = float(model_1.get_general_statistics().get('Null log likelihood') )
    model_hi = model_1 if model_1.rho_bar_square_null > model_2.rho_bar_square_null else model_2
    model_lo = model_2 if model_1.rho_bar_square_null > model_2.rho_bar_square_null else model_1
    #compute BST value and p-value
    bst_value = -1*math.sqrt(-2*(model_hi.final_loglikelihood - model_lo.final_loglikelihood)*L0 + (model_hi.number_of_parameters - model_lo.number_of_parameters))
    p_value = stats.norm.cdf(bst_value)
    result = lambda x: "Reject the null hypothesis: The " + model_2.model_name + " provides a significantly better fit." if p_value < 0.05 else "Fail to reject the null hypothesis: The " + model_1.model_name + " is sufficient."
    return {
        "BST_value": bst_value,
        "p_value": float(p_value),
        "Reject_null": result(p_value)
    }