# Setup

In [28]:
import pandas as pd
import numpy as np
import jax
import pathlib
import xlogit
import sklearn

from jaxlogit.mixed_logit import MixedLogit, ConfigData

In [29]:
#  64bit precision
jax.config.update("jax_enable_x64", True)

# Get the full electricity dataset

In [30]:
df = pd.read_csv(pathlib.Path.cwd() / "electricity_long.csv")
varnames = ['pf', 'cl', 'loc', 'wk', 'tod', 'seas']
n_draws = 600

In [31]:
X = df[varnames]
y = df['choice']

ids = df['chid']
alts = df['alt']
panels = df['id']
randvars = {'pf': 'n', 'cl': 'n', 'loc': 'n', 'wk': 'n', 'tod': 'n', 'seas': 'n'}

model = MixedLogit()

config = ConfigData(
    panels=panels,
    n_draws=n_draws,
    skip_std_errs=True,  # skip standard errors to speed up the example
    batch_size=None,
    optim_method="L-BFGS-B",
)
init_coeff = None

# Make the model in jaxlogit

In [32]:
model.fit(
    X=X,
    y=y,
    varnames=varnames,
    ids=ids,
    alts=alts,
    randvars=randvars,
    config=config
)
display(model.summary())
init_coeff = model.coeff_

    Message: CONVERGENCE: RELATIVE REDUCTION OF F <= FACTR*EPSMCH
    Iterations: 80
    Function evaluations: 97
Estimation time= 39.3 seconds
---------------------------------------------------------------------------
Coefficient              Estimate      Std.Err.         z-val         P>|z|
---------------------------------------------------------------------------
pf                     -0.9972244     1.0000000    -0.9972244         0.319    
cl                     -0.2196763     1.0000000    -0.2196763         0.826    
loc                     2.2901926     1.0000000     2.2901926        0.0221 *  
wk                      1.6943196     1.0000000     1.6943196        0.0903 .  
tod                    -9.6753913     1.0000000    -9.6753913       6.4e-22 ***
seas                   -9.6962087     1.0000000    -9.6962087      5.24e-22 ***
sd.pf                  -1.3984445     1.0000000    -1.3984445         0.162    
sd.cl                  -0.6750223     1.0000000    -0.6750223       

None

# Make the model in xlogit

In [27]:
model = xlogit.MixedLogit()

model.fit(
    X=X,
    y=y,
    varnames=varnames,
    ids=ids,
    alts=alts,
    randvars=randvars,
    panels=panels,
    n_draws=n_draws,
    skip_std_errs=True,  # skip standard errors to speed up the example
    batch_size=None,
    optim_method="L-BFGS-B",
)
display(model.summary())

Optimization terminated successfully.
    Message: CONVERGENCE: RELATIVE REDUCTION OF F <= FACTR*EPSMCH
    Iterations: 54
    Function evaluations: 60
Estimation time= 19.5 seconds
---------------------------------------------------------------------------
Coefficient              Estimate      Std.Err.         z-val         P>|z|
---------------------------------------------------------------------------
pf                     -0.9971860     1.0000000    -0.9971860         0.319    
cl                     -0.2196661     1.0000000    -0.2196661         0.826    
loc                     2.2902861     1.0000000     2.2902861        0.0221 *  
wk                      1.6943008     1.0000000     1.6943008        0.0903 .  
tod                    -9.6751588     1.0000000    -9.6751588      6.42e-22 ***
seas                   -9.6960039     1.0000000    -9.6960039      5.25e-22 ***
sd.pf                   0.2207141     1.0000000     0.2207141         0.825    
sd.cl                   0.4115

None

# Predict from the model

In [33]:
config = ConfigData(
    panels=panels,
    n_draws=n_draws,
    skip_std_errs=True,  # skip standard errors to speed up the example
    batch_size=None,
    optim_method="L-BFGS-B",
)

In [34]:
config.init_coeff = init_coeff
print(config.init_coeff)

[-0.99722437 -0.21967635  2.29019256  1.69431961 -9.67539134 -9.69620866
 -1.39844447 -0.67502234  1.60015514  0.8837811   2.16735603  1.22957323]


In [35]:
prob = model.predict(X, varnames, alts, ids, randvars, config)

Test the results

In [None]:
y = np.reshape(y, (prob.shape[0], -1))

total_counted = 0
correct = 0
for i in range(prob.shape[0]):
    y_index = np.argmax(y[i])
    if prob[i][y_index] == np.max(prob[i]):
        correct += 1
    total_counted += 1
print(f"percentage correct = {correct/total_counted}")

Compare the probabilities and results

Format:
[prob prob ... prob] : [which chosen]

In [None]:
for i in range(prob.shape[0]):
    print(f"{prob[i]} : {y[i]}")

In [None]:
y = np.reshape(y, (prob.shape[0], -1))
print(sklearn.metrics.brier_score_loss(y, prob))

ValueError: Found input variables with inconsistent numbers of samples: [4308, 17232]

# Don't forget to clear output when done