In [1]:
import pandas as pd
from biogeme.database import Database
from biogeme import models
from biogeme.expressions import Beta, Variable
import biogeme.biogeme as bio

ArviZ is undergoing a major refactor to improve flexibility and extensibility while maintaining a user-friendly interface.
Some upcoming changes may be backward incompatible.
For details and migration guidance, visit: https://python.arviz.org/en/latest/user_guide/migration_guide.html
  warn(
  from tqdm.autonotebook import tqdm


In [2]:
# Load the data
df = pd.read_csv('MM_exp_biogeme.csv')
database = Database('MM_exp', df)

# Step 1: Identify the chosen alternative for each (RID, DESIGN_ROW) group
chosen_alternatives = df[df["CHOICE"] == 1].groupby(["RID", "Choice_set"])["alt"].first()

# Step 2: Map the chosen alternative back to all rows in the group
df["choice_biogeme"] = df.groupby(["RID", "Choice_set"])["alt"].transform(
    lambda x: chosen_alternatives.get((x.name[0], x.name[1])))
df.to_csv("MM_exp_biogeme_recoded.csv", index=False)
print(df)

      RID  SCENARIO  Choice_set  alt  CHOICE   x1   x2   x3  x4  \
0       5         1           1    1       0  6.0  0.8  0.5  10   
1       5         1           1    2       1  1.7  5.5  1.0   5   
2       5         1           1    3       0  6.0  3.6  0.2   0   
3       5         1           1    4       0  0.0  0.0  0.0   0   
4       5         2           2    1       0  6.0  3.6  0.5   5   
...   ...       ...         ...  ...     ...  ...  ...  ...  ..   
3811  568         5           2    4       0  0.0  0.0  0.0   0   
3812  568         6           3    1       0  1.7  5.5  0.2   5   
3813  568         6           3    2       0  6.0  1.8  1.0   5   
3814  568         6           3    3       1  3.0  1.8  0.5   5   
3815  568         6           3    4       0  0.0  0.0  0.0   0   

      choice_biogeme  
0                  2  
1                  2  
2                  2  
3                  2  
4                  3  
...              ...  
3811               2  
3812       

In [3]:
# Variables (use ONLY columns that exist)
Choice = Variable('choice_biogeme')   # chosen alternative id (per your biogeme loglogit signature)
Price = Variable('x1')
Time = Variable('x2')
AvailProb = Variable('x3')     # renamed to avoid confusion with old "Avail" variable
WalkTime = Variable('x4')

# Coefficients
B_PRICE = Beta('B_PRICE', 0, None, None, 0)
B_TIME  = Beta('B_TIME',  0, None, None, 0)
B_WALK  = Beta('B_WALK',  0, None, None, 0)
B_AVAIL = Beta('B_AVAIL', 0, None, None, 0)
ASC_2 = Beta('ASC_2', 0, None, None, 0)
ASC_3 = Beta('ASC_3', 0, None, None, 0)
ASC_OPT_OUT = 0


In [4]:
# Identify opt-out
optout = df['alt'] == 4

# Zero out all attributes for opt-out
df.loc[optout, ['x1', 'x2', 'x3', 'x4']] = 0

# Utility functions
V = {
    1: B_PRICE * Price + B_TIME * Time + B_WALK * WalkTime + B_AVAIL * AvailProb,
    2: ASC_2 + B_PRICE * Price + B_TIME * Time + B_WALK * WalkTime + B_AVAIL * AvailProb,
    3: ASC_3 + B_PRICE * Price + B_TIME * Time + B_WALK * WalkTime + B_AVAIL * AvailProb,
    4: ASC_OPT_OUT,
}

# Availability (all available)
av = {1: 1, 2: 1, 3: 1, 4: 1}


In [6]:

# Logit model (your biogeme version: 3 args)
logprob = models.loglogit(V, av, Choice)

biogeme = bio.BIOGEME(database, logprob)
biogeme.model_name = 'mnl_full'

results = biogeme.estimate()
results.get_estimated_parameters()



  results.get_estimated_parameters()


Unnamed: 0,Name,Value,Robust std err.,Robust t-stat.,Robust p-value
0,B_PRICE,0.200107,0.037138,5.388189,7.117133e-08
1,B_TIME,0.167257,0.038626,4.33012,1.490283e-05
2,B_WALK,0.021982,0.018873,1.164683,0.2441474
3,B_AVAIL,0.362669,0.223935,1.619523,0.1053348
4,ASC_2,0.436924,0.041397,10.55449,0.0
5,ASC_3,0.290929,0.042768,6.802417,1.028777e-11


In [9]:
df.groupby('CHOICE')[['x1','x2','x4','x3']].mean()


Unnamed: 0_level_0,x1,x2,x4,x3
CHOICE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,2.942767,1.981237,3.736897,0.349161
1,2.887945,3.212998,3.79979,0.652516


In [10]:
df[df["alt"] != 4].groupby("CHOICE")[["x1","x2","x3","x4"]].mean()

Unnamed: 0_level_0,x1,x2,x3,x4
CHOICE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,4.310235,2.901894,0.511412,5.473388
1,3.034251,3.375771,0.685573,3.992291


In [18]:
df.groupby('alt')[['x1','x2','x4','x3']].mean()
df.groupby('alt')['CHOICE'].mean()


alt
1    0.275681
2    0.362683
3    0.313417
4    0.048218
Name: CHOICE, dtype: float64

In [7]:
df["x1"].unique()
df["x2"].unique()
df["x3"].unique()
df["x4"].unique()


array([10,  5,  0])

In [8]:
for c in ["x1","x2","x3","x4"]:
    u = sorted(df[c].dropna().unique())
    print(c, u, "count=", len(u))


x1 [np.float64(0.0), np.float64(1.7), np.float64(3.0), np.float64(4.5), np.float64(6.0)] count= 5
x2 [np.float64(0.0), np.float64(0.8), np.float64(1.8), np.float64(3.6), np.float64(5.5)] count= 5
x3 [np.float64(0.0), np.float64(0.2), np.float64(0.5), np.float64(1.0)] count= 4
x4 [np.int64(0), np.int64(5), np.int64(10)] count= 3


In [11]:
df[["x1","x2","x3","x4"]].corr()


Unnamed: 0,x1,x2,x3,x4
x1,1.0,0.256373,0.490419,0.420487
x2,0.256373,1.0,0.440051,0.351667
x3,0.490419,0.440051,1.0,0.32365
x4,0.420487,0.351667,0.32365,1.0
