# BIOGEME
1. Installation and libraries imports
2. Cleaning the data and defining variables
3. Binary Logit model

# 1. Installation and libraries imports

In [None]:
pip install biogeme

In [None]:
#check BIOGEME version
import biogeme.version as ver
print("Biogeme version:", ver.getVersion())

#Import necessary libraries
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme import models
from biogeme.expressions import Beta, Variable

# 2. Cleaning the data and defining variables

**Important!: All data have to be numeric**

In [None]:
# Load the database
df = pd.read_csv("ModeChoiceData_RP.csv", sep=',') # Check the path to your CSV file

In [None]:
# Load the database
df

In [None]:
# Check missing values
print(df.isnull().sum())

In [None]:
# Drop columns with missing values
## df = df.drop(columns=["SP_task"])
# Or, set the missing values to -1
df = df.fillna(-1)
print(df.isnull().sum())

# Create a row index
df["RowID"] = df.index + 1
df

In [None]:
# Define the BIOGEME-specific database
database = db.Database("ModeChoice", df)
#print the BIOGEME-specific database
database.data

In [None]:
"""
Use the loop below to get all of columns names.
for col in df.columns:
    print(f'{col} = Variable("{col}")')

1. The code will generate the Variable for each column.    
2. Copy the output and paste it below to define the variables.
"""
for col in df.columns:
    print(f'{col} = Variable("{col}")')


In [None]:
#Define the variables
ID = Variable("ID")
RP = Variable("RP")
SP = Variable("SP")
RP_journey = Variable("RP_journey")
av_car = Variable("av_car")
av_rail = Variable("av_rail")
time_car = Variable("time_car")
cost_car = Variable("cost_car")
time_rail = Variable("time_rail")
cost_rail = Variable("cost_rail")
business = Variable("business")
access_rail = Variable("access_rail")
service_rail = Variable("service_rail")
female = Variable("female")
income = Variable("income")
choice = Variable("choice")

# 3. Binary Logit Model


#### Model A

In [None]:
# Define beta values to be estimated
## Beta('name', initial value, lower bound, upper bound, reference)
## Reference is used to set the reference alternative in a logit model (1 = yes, 0 = no)
asc_car = Beta('asc_car', 0, None, None, 1) #Reference alternatives
asc_rail = Beta('asc_rail', 0, None, None, 0) 
b_cost = Beta('b_cost', 0, None, None, 0)

# Utility functions
V_car = asc_car + b_cost * cost_car
V_rail = asc_rail + b_cost * cost_rail

'''
Remember the coding of the alternatives in the dataset:
1: Car
2: Rail
''' 

# Dictionary defining alternatives mapping
V = {1: V_car, 2: V_rail}

# Dictionary defining availability mapping
av = {1: av_car, 2: av_rail}

# Define the choice model
# loglogit(Alternatives mapping, Availability mapping, Choice variable)
logprob= models.loglogit(V, av, choice)

# Estimate Model
the_biogeme = bio.BIOGEME(database, logprob)
the_biogeme.model_name = 'Model_A' # Set the model name

#Calculate null Loglikelihood
the_biogeme.calculate_null_loglikelihood(av)

# Save the estimation results
Model_A = the_biogeme.estimate()

#Print the results
print(Model_A.short_summary())
Model_A.get_estimated_parameters()

#### Model B

In [None]:
# Define beta values to be estimated
## Beta('name', initial value, lower bound, upper bound, reference)
## Reference is used to set the reference alternative in a logit model (1 = yes, 0 = no)
asc_car = Beta('asc_car', 0, None, None, 1) #Reference alternatives
asc_rail = Beta('asc_rail', 0, None, None, 0) 
b_cost = Beta('b_cost', 0, None, None, 0)
b_tt = Beta('b_tt', 0, None, None, 0)

# Utility functions
V_car = asc_car + b_cost * cost_car + b_tt * time_car
V_rail = asc_rail + b_cost * cost_rail + b_tt * time_rail

'''
Remember the coding of the alternatives in the dataset:
1: Car
2: Rail
''' 

# Dictionary defining alternatives mapping
V = {1: V_car, 2: V_rail}

# Dictionary defining availability mapping
av = {1: av_car, 2: av_rail}

# Define the choice model
# loglogit(Alternatives mapping, Availability mapping, Choice variable)
logprob= models.loglogit(V, av, choice)

# Estimate Model
the_biogeme = bio.BIOGEME(database, logprob)
the_biogeme.model_name = 'Model_B' # Set the model name

#Calculate null Loglikelihood
the_biogeme.calculate_null_loglikelihood(av)

# Save the estimation results
Model_B = the_biogeme.estimate()

#Print the results
print(Model_B.short_summary())
Model_B.get_estimated_parameters()

#### Model C

In [None]:
# Define beta values to be estimated
## Beta('name', initial value, lower bound, upper bound, reference)
## Reference is used to set the reference alternative in a logit model (1 = yes, 0 = no)
asc_car = Beta('asc_car', 0, None, None, 1) #Reference alternatives
asc_rail = Beta('asc_rail', 0, None, None, 0) 
b_cost = Beta('b_cost', 0, None, None, 0)
b_tt = Beta('b_tt', 0, None, None, 0)
b_business_rail = Beta('b_business_rail', 0, None, None, 0)

# Utility functions
V_car = asc_car + b_cost * cost_car + b_tt * time_car
V_rail = asc_rail + b_cost * cost_rail + b_tt * time_rail + b_business_rail * business

'''
Remember the coding of the alternatives in the dataset:
1: Car
2: Rail
''' 

# Dictionary defining alternatives mapping
V = {1: V_car, 2: V_rail}

# Dictionary defining availability mapping
av = {1: av_car, 2: av_rail}

# Define the choice model
# loglogit(Alternatives mapping, Availability mapping, Choice variable)
logprob= models.loglogit(V, av, choice)

# Estimate Model
the_biogeme = bio.BIOGEME(database, logprob)
the_biogeme.model_name = 'Model_C' # Set the model name

#Calculate null Loglikelihood
the_biogeme.calculate_null_loglikelihood(av)

# Save the estimation results
Model_C = the_biogeme.estimate()

#Print the results
print(Model_C.short_summary())
Model_C.get_estimated_parameters()