In [1]:
import pandas as pd
from biogeme.database import Database
from biogeme import models
from biogeme.expressions import Beta, Variable
import biogeme.biogeme as bio

  from tqdm.autonotebook import tqdm


In [2]:
# Load the data
df = pd.read_csv('Exp_bg_WithTC.csv')
database = Database('MM_exp1', df)

# Step 1: Identify the chosen alternative for each (RID, DESIGN_ROW) group
chosen_alternatives = df[df["CHOICE"] == 1].groupby(["RID", "Choice_set"])["alt"].first()

# Step 2: Map the chosen alternative back to all rows in the group
df["choice_biogeme"] = df.groupby(["RID", "Choice_set"])["alt"].transform(
    lambda x: chosen_alternatives.get((x.name[0], x.name[1])))
df.to_csv("Exp_bg_WithTC_recoded.csv", index=False)
print(df)

      RID  SCENARIO  Choice_set  alt  CHOICE   x1   x2   x3  x4  \
0       5         4           4    1       1  3.0  1.8  1.0  10   
1       5         4           4    2       0  1.7  5.5  0.2   0   
2       5         4           4    3       0  6.0  1.8  0.5   5   
3       5         4           4    4       0  0.0  0.0  0.0   0   
4       5         5           5    1       1  4.5  1.8  0.5   0   
...   ...       ...         ...  ...     ...  ...  ...  ...  ..   
1903  568         5           2    4       0  0.0  0.0  0.0   0   
1904  568         6           3    1       0  1.7  5.5  0.2   5   
1905  568         6           3    2       0  6.0  1.8  1.0   5   
1906  568         6           3    3       1  3.0  1.8  0.5   5   
1907  568         6           3    4       0  0.0  0.0  0.0   0   

      choice_biogeme  
0                  1  
1                  1  
2                  1  
3                  1  
4                  1  
...              ...  
1903               2  
1904       

In [3]:
# Variables (use ONLY columns that exist)
Choice = Variable('choice_biogeme')   # chosen alternative id (per your biogeme loglogit signature)
Price = Variable('x1')
Time = Variable('x2')
AvailProb = Variable('x3')     # renamed to avoid confusion with old "Avail" variable
WalkTime = Variable('x4')

# Coefficients
B_PRICE = Beta('B_PRICE', 0, None, None, 0)
B_TIME  = Beta('B_TIME',  0, None, None, 0)
B_WALK  = Beta('B_WALK',  0, None, None, 0)
B_AVAIL = Beta('B_AVAIL', 0, None, None, 0)
ASC_2 = Beta('ASC_2', 0, None, None, 0)
ASC_3 = Beta('ASC_3', 0, None, None, 0)
ASC_OPT_OUT = 0


In [4]:
# Identify opt-out
optout = df['alt'] == 4

# Zero out all attributes for opt-out
df.loc[optout, ['x1', 'x2', 'x3', 'x4']] = 0

# Utility functions
V = {
    1: B_PRICE * Price + B_TIME * Time + B_WALK * WalkTime + B_AVAIL * AvailProb,
    2: ASC_2 + B_PRICE * Price + B_TIME * Time + B_WALK * WalkTime + B_AVAIL * AvailProb,
    3: ASC_3 + B_PRICE * Price + B_TIME * Time + B_WALK * WalkTime + B_AVAIL * AvailProb,
    4: ASC_OPT_OUT,
}

# Availability (all available)
av = {1: 1, 2: 1, 3: 1, 4: 1}


In [8]:

# Logit model (your biogeme version: 3 args)
logprob = models.loglogit(V, av, Choice)

biogeme = bio.BIOGEME(database, logprob)
biogeme.model_name = 'mnl_withTC'

results = biogeme.estimate()
results.get_estimated_parameters()



  results.get_estimated_parameters()


Unnamed: 0,Name,Value,Robust std err.,Robust t-stat.,Robust p-value
0,B_PRICE,0.219718,0.049573,4.432198,9.327746e-06
1,B_TIME,0.158204,0.054965,2.878268,0.003998649
2,B_WALK,0.01332,0.025479,0.522778,0.6011288
3,B_AVAIL,0.347406,0.293404,1.184054,0.2363916
4,ASC_2,0.292626,0.058373,5.013025,5.358104e-07
5,ASC_3,0.221857,0.059367,3.737013,0.000186219


In [9]:
df.groupby('CHOICE')[['x1','x2','x3','x4']].mean()


Unnamed: 0_level_0,x1,x2,x3,x4
CHOICE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,2.910063,2.040531,0.344235,3.794549
1,2.961006,3.033333,0.667296,3.668763


In [11]:
df[df["alt"] != 4].groupby("CHOICE")[["x1","x2","x3","x4"]].mean()

Unnamed: 0_level_0,x1,x2,x3,x4
CHOICE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,4.253626,2.982635,0.503166,5.546476
1,3.124779,3.201106,0.704204,3.871681


In [10]:
df.groupby('alt')[['x1','x2','x3','x4']].mean()
df.groupby('alt')['CHOICE'].mean()


alt
1    0.295597
2    0.337526
3    0.314465
4    0.052411
Name: CHOICE, dtype: float64