In [1]:
import pandas as pd
from biogeme.database import Database
from biogeme import models
from biogeme.expressions import Beta, Variable
import biogeme.biogeme as bio

  from tqdm.autonotebook import tqdm


In [2]:
# Load the data
df = pd.read_csv('Exp_bg_NTC.csv')
database = Database('MM_exp2', df)

# Step 1: Identify the chosen alternative for each (RID, DESIGN_ROW) group
chosen_alternatives = df[df["CHOICE"] == 1].groupby(["RID", "Choice_set"])["alt"].first()

# Step 2: Map the chosen alternative back to all rows in the group
df["choice_biogeme"] = df.groupby(["RID", "Choice_set"])["alt"].transform(
    lambda x: chosen_alternatives.get((x.name[0], x.name[1])))
df.to_csv("Exp_bg_NTC_recoded.csv", index=False)
print(df)

      RID  SCENARIO  Choice_set  alt  CHOICE   x1   x2   x3  x4  \
0       5         1           1    1       0  6.0  0.8  0.5  10   
1       5         1           1    2       1  1.7  5.5  1.0   5   
2       5         1           1    3       0  6.0  3.6  0.2   0   
3       5         1           1    4       0  0.0  0.0  0.0   0   
4       5         2           2    1       0  6.0  3.6  0.5   5   
...   ...       ...         ...  ...     ...  ...  ...  ...  ..   
1903  568         2          17    4       0  0.0  0.0  0.0   0   
1904  568         3          18    1       1  1.7  3.6  1.0   0   
1905  568         3          18    2       0  6.0  3.6  0.5   5   
1906  568         3          18    3       0  4.5  0.8  0.2  10   
1907  568         3          18    4       0  0.0  0.0  0.0   0   

      choice_biogeme  
0                  2  
1                  2  
2                  2  
3                  2  
4                  3  
...              ...  
1903               2  
1904       

In [3]:
# Variables (use ONLY columns that exist)
Choice = Variable('choice_biogeme')   # chosen alternative id (per your biogeme loglogit signature)
Price = Variable('x1')
Time = Variable('x2')
AvailProb = Variable('x3')     # renamed to avoid confusion with old "Avail" variable
WalkTime = Variable('x4')

# Coefficients
B_PRICE = Beta('B_PRICE', 0, None, None, 0)
B_TIME  = Beta('B_TIME',  0, None, None, 0)
B_WALK  = Beta('B_WALK',  0, None, None, 0)
B_AVAIL = Beta('B_AVAIL', 0, None, None, 0)
ASC_2 = Beta('ASC_2', 0, None, None, 0)
ASC_3 = Beta('ASC_3', 0, None, None, 0)
ASC_OPT_OUT = 0


In [4]:
# Identify opt-out
optout = df['alt'] == 4

# Zero out all attributes for opt-out
df.loc[optout, ['x1', 'x2', 'x3', 'x4']] = 0

# Utility functions
V = {
    1: B_PRICE * Price + B_TIME * Time + B_WALK * WalkTime + B_AVAIL * AvailProb,
    2: ASC_2 + B_PRICE * Price + B_TIME * Time + B_WALK * WalkTime + B_AVAIL * AvailProb,
    3: ASC_3 + B_PRICE * Price + B_TIME * Time + B_WALK * WalkTime + B_AVAIL * AvailProb,
    4: ASC_OPT_OUT,
}

# Availability (all available)
av = {1: 1, 2: 1, 3: 1, 4: 1}


In [5]:

# Logit model (your biogeme version: 3 args)
logprob = models.loglogit(V, av, Choice)

biogeme = bio.BIOGEME(database, logprob)
biogeme.model_name = 'mnl_NTC'

results = biogeme.estimate()
results.get_estimated_parameters()



  results.get_estimated_parameters()


Unnamed: 0,Name,Value,Robust std err.,Robust t-stat.,Robust p-value
0,B_PRICE,0.177768,0.055757,3.188251,0.001431364
1,B_TIME,0.178552,0.052651,3.391246,0.0006957549
2,B_WALK,0.032401,0.028083,1.153762,0.2485979
3,B_AVAIL,0.378718,0.341064,1.110401,0.2668263
4,ASC_2,0.581794,0.059003,9.860373,0.0
5,ASC_3,0.365385,0.061743,5.917781,3.263142e-09


In [7]:
df.groupby('CHOICE')[['x1','x2','x3','x4']].mean()


Unnamed: 0_level_0,x1,x2,x3,x4
CHOICE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,2.975472,1.921943,0.354088,3.679245
1,2.814885,3.392662,0.637736,3.930818


In [9]:
df[df["alt"] != 4].groupby("CHOICE")[["x1","x2","x3","x4"]].mean()

Unnamed: 0_level_0,x1,x2,x3,x4
CHOICE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,4.367077,2.820821,0.519692,5.4
1,2.944518,3.548904,0.667105,4.111842


In [8]:
df.groupby('alt')[['x1','x2','x4','x3']].mean()
df.groupby('alt')['CHOICE'].mean()


alt
1    0.255765
2    0.387841
3    0.312369
4    0.044025
Name: CHOICE, dtype: float64