# Summary of time taken and brier scores for jaxlogit, xlogit, and biogeme
Where the estimation is taking place on the same data used to train

| | jaxlogit | xlogit | biogeme |
|---|---|---|---|
|Making Model | 43.9s | 18.22s | 4:24 |
|Estimating | 1.4s | 0.2s | 1:07 |
|Brier Score | 0.6275 | 0.6275 | 0.62714 |

# Setup

In [31]:
import pandas as pd
import numpy as np
import jax
import pathlib
import xlogit
import sklearn

from jaxlogit.mixed_logit import MixedLogit, ConfigData

#  64bit precision
jax.config.update("jax_enable_x64", True)

# Get the full electricity dataset

In [32]:
df = pd.read_csv(pathlib.Path.cwd() / "electricity_long.csv")
varnames = ['pf', 'cl', 'loc', 'wk', 'tod', 'seas']
n_draws = 600

In [33]:
X = df[varnames]
y = df['choice']

ids = df['chid']
alts = df['alt']
panels = df['id']
randvars = {'pf': 'n', 'cl': 'n', 'loc': 'n', 'wk': 'n', 'tod': 'n', 'seas': 'n'}

model_jax = MixedLogit()
model_x = xlogit.MixedLogit()

config = ConfigData(
    panels=panels,
    n_draws=n_draws,
    skip_std_errs=True,  # skip standard errors to speed up the example
    batch_size=None,
    optim_method="L-BFGS-B",
)
init_coeff = None

# Make the model in jaxlogit

In [34]:
model_jax.fit(
    X=X,
    y=y,
    varnames=varnames,
    ids=ids,
    alts=alts,
    randvars=randvars,
    config=config
)
display(model_jax.summary())
init_coeff_j = model_jax.coeff_

    Message: CONVERGENCE: RELATIVE REDUCTION OF F <= FACTR*EPSMCH
    Iterations: 80
    Function evaluations: 97
Estimation time= 41.4 seconds
---------------------------------------------------------------------------
Coefficient              Estimate      Std.Err.         z-val         P>|z|
---------------------------------------------------------------------------
pf                     -0.9972244     1.0000000    -0.9972244         0.319    
cl                     -0.2196763     1.0000000    -0.2196763         0.826    
loc                     2.2901926     1.0000000     2.2901926        0.0221 *  
wk                      1.6943196     1.0000000     1.6943196        0.0903 .  
tod                    -9.6753913     1.0000000    -9.6753913       6.4e-22 ***
seas                   -9.6962087     1.0000000    -9.6962087      5.24e-22 ***
sd.pf                  -1.3984445     1.0000000    -1.3984445         0.162    
sd.cl                  -0.6750223     1.0000000    -0.6750223       

None

# Make the model in xlogit

In [None]:
model_x.fit(
    X=X,
    y=y,
    varnames=varnames,
    ids=ids,
    alts=alts,
    randvars=randvars,
    panels=panels,
    n_draws=n_draws,
    skip_std_errs=True,  # skip standard errors to speed up the example
    batch_size=None,
    optim_method="L-BFGS-B",
)
display(model_x.summary())
init_coeff_x = model_x.coeff_

# Predict from the model using jaxlogit

In [None]:
model = model_jax 
config = ConfigData(
    panels=panels,
    n_draws=n_draws,
    skip_std_errs=True,  # skip standard errors to speed up the example
    batch_size=None,
    optim_method="L-BFGS-B",
)
config.init_coeff = init_coeff_j

In [None]:
prob_jj = model.predict(X, varnames, alts, ids, randvars, config)

# Predict from the model using xlogit

In [None]:
model = model_x
_, prob_xx = model.predict(X, varnames, alts, ids, isvars=None, panels=panels, n_draws=n_draws, return_proba=True)

# Setup Biogeme

In [None]:
import biogeme.biogeme_logging as blog
import biogeme.biogeme as bio
from biogeme import models
from biogeme.expressions import Beta, bioDraws, log, MonteCarlo, PanelLikelihoodTrajectory
import biogeme.database as db
from biogeme.expressions import Variable
import pandas as pd
import numpy as np
import pathlib
import sklearn

logger = blog.get_screen_logger()
logger.setLevel(blog.ERROR)

In [None]:
varnames = ['pf', 'cl', 'loc', 'wk', 'tod', 'seas']
df_long = pd.read_csv(pathlib.Path.cwd() / "electricity_long.csv")
choice_df = df_long.loc[df_long['choice'] == 1, ['id', 'chid', 'alt']]
choice_df = choice_df.rename(columns={'alt': 'choice'})
df_wide = df_long.pivot(index=['id', 'chid'], columns='alt', values=varnames)
df_wide.columns = [f'{var}_{alt}' for var, alt in df_wide.columns]
df_wide = df_wide.reset_index()
df = df_wide.merge(
    choice_df,
    on=['id', 'chid'],
    how='inner',
    validate='one_to_one'
)

database = db.Database('electricity', df)
database.panel('id')

In [None]:
X = {
    name: {
        j: Variable(f"{name}_{j}")
        for j in [1,2,3,4]
    }
    for name in varnames
}

alt_1 = Beta('alt_1', 0, None, None, 0)
alt_2 = Beta('alt_2', 0, None, None, 0)
alt_3 = Beta('alt_3', 0, None, None, 0)
alt_4 = Beta('alt_4', 0, None, None, 1)

pf_mean = Beta('pf_mean', 0, None, None, 0)
pf_sd = Beta('pf_sd', 1, None, None, 0)
cl_mean = Beta('cl_mean', 0, None, None, 0)
cl_sd = Beta('cl_sd', 1, None, None, 0)
loc_mean = Beta('loc_mean', 0, None, None, 0)
loc_sd = Beta('loc_sd', 1, None, None, 0)
wk_mean = Beta('wk_mean', 0, None, None, 0)
wk_sd = Beta('wk_sd', 1, None, None, 0)
tod_mean = Beta('tod_mean', 0, None, None, 0)
tod_sd = Beta('tod_sd', 1, None, None, 0)
seas_mean = Beta('seas_mean', 0, None, None, 0)
seas_sd = Beta('seas_sd', 1, None, None, 0)

pf_rnd = pf_mean + pf_sd * bioDraws('pf_rnd', 'NORMAL')
cl_rnd = cl_mean + cl_sd * bioDraws('cl_rnd', 'NORMAL')
loc_rnd = loc_mean + loc_sd * bioDraws('loc_rnd', 'NORMAL')
wk_rnd = wk_mean + wk_sd * bioDraws('wk_rnd', 'NORMAL')
tod_rnd = tod_mean + tod_sd * bioDraws('tod_rnd', 'NORMAL')
seas_rnd = seas_mean + seas_sd * bioDraws('seas_rnd', 'NORMAL')

choice = Variable('choice')

V = {
    j: pf_rnd * X['pf'][j] + cl_rnd * X['cl'][j] + loc_rnd * X['loc'][j] + wk_rnd * X['wk'][j] + tod_rnd * X['tod'][j] + seas_rnd * X['seas'][j]
    for j in [1,2,3,4]
}

# Make the Biogeme Model

In [None]:
prob = models.logit(V, None, choice)
logprob = log(MonteCarlo(PanelLikelihoodTrajectory(prob)))

the_biogeme = bio.BIOGEME(
    database, logprob, number_of_draws=n_draws, seed=999, generate_yaml=False, generate_html=False
)
the_biogeme.model_name = 'model_b'
results = the_biogeme.estimate()
print(results)

# Predict using Biogeme

In [None]:
database_sim = db.Database('electricity', df)

P = {
    j: MonteCarlo(models.logit(V, None, j))
    for j in [1, 2, 3, 4]
}

simulate = {
    f'Prob_alt{j}': P[j]
    for j in [1, 2, 3, 4]
}

biogeme_sim = bio.BIOGEME(database_sim, simulate)
biogeme_sim.model_name = 'per_choice_probs'

probs = biogeme_sim.simulate(results.get_beta_values())

# Test the results

Compare the probabilities and results

Format:
[prob prob ... prob] : [which chosen]

In [None]:
prob = prob_jj
for i in range(prob.shape[0]):
    print(f"{prob[i]} : {y[i]}")

Compute the brier score:

In [None]:
# Jaxlogit
y = np.reshape(y, (prob_jj.shape[0], -1))
print(sklearn.metrics.brier_score_loss(y, prob_jj))

In [None]:
# xlogit
y = np.reshape(y, (prob_xx.shape[0], -1))
print(sklearn.metrics.brier_score_loss(y, prob_xx))

In [None]:
# Biogeme
y = df['choice']
print(sklearn.metrics.brier_score_loss(y, probs))