# Binary Logit Model

## Utility Functions
The utility for each alternative is defined as follows:

$$
U_{\text{car}} = \text{asc\_car} + b_{\text{time}} \cdot \text{auto\_time}
$$

$$
U_{\text{transit}} = b_{\text{time}} \cdot \text{transit\_time}
$$

### Normalization
- The alternative-specific constant $\text{asc\_transit}$ for the transit alternative is **normalized to 0** to ensure model identification. This is a standard practice in multinomial logit models to avoid redundancy in parameters.

## Choice Probability
The probability of choosing an alternative $i$ (either car or transit) is given by the multinomial logit model:

$$
P_i = \frac{\exp(U_i)}{\exp(U_{\text{car}}) + \exp(U_{\text{transit}})}
$$

Where:
- $P_{\text{car}}$ corresponds to the probability of choosing the car alternative.
- $P_{\text{transit}}$ corresponds to the probability of choosing the transit alternative.

## Log-Likelihood
The log-likelihood function is the sum of the log-probabilities of the chosen alternatives across all observations:

$$
\mathcal{L} = \sum_{n=1}^N \ln P_{i,n}
$$

Where:
- $N$: Total number of observations.
- $P_{i,n}$: Probability of the chosen alternative $i$ for observation $n$.

### Negative Log-Likelihood (for Optimization)
For parameter estimation, the negative log-likelihood is minimized:

$$
-\mathcal{L} = -\sum_{n=1}^N \ln P_{i,n}
$$

## Model Estimation
- **Parameters to Estimate:**  
  - $\text{asc\_car}$: Alternative-specific constant for the car alternative.
  - $b_{\text{time}}$: Coefficient representing the influence of travel time on the utility.
  
- **Optimization:**  
  The parameters are estimated using maximum likelihood estimation (MLE) by minimizing the negative log-likelihood.

In [18]:
import pandas as pd
from biogeme.database import Database
from biogeme.expressions import Variable, Beta
from biogeme.models import loglogit
from biogeme.biogeme import BIOGEME

# Create the dataset
data = {
    'ID': pd.Series([i + 1 for i in range(21)]),
    'auto_time': pd.Series(
        [
            52.9, 4.1, 4.1, 56.2, 51.8, 0.2, 27.6, 89.9, 41.5, 95.0,
            99.1, 18.5, 82.0, 8.6, 22.5, 51.4, 81.0, 51.0, 62.2, 95.1, 41.6,
        ]
    ),
    'transit_time': pd.Series(
        [
            4.4, 28.5, 86.9, 31.6, 20.2, 91.2, 79.7, 2.2, 24.5, 43.5,
            8.4, 84.0, 38.0, 1.6, 74.1, 83.8, 19.2, 85.0, 90.1, 22.2, 91.5,
        ]
    ),
    'choice': pd.Series(
        [1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0]
    ),
}
pandas_dataframe = pd.DataFrame(data)

# Initialize the Biogeme database
biogeme_database = Database('ben_akiva_lerman', pandas_dataframe)

# Define variables
auto_time = Variable('auto_time')
transit_time = Variable('transit_time')
choice = Variable('choice')

# Define parameters to be estimated
asc_car = Beta('asc_car', 0, None, None, 0)
b_time = Beta('b_time', 0, None, None, 0)

# Specify utility functions
utility_car = asc_car + b_time * auto_time
utility_transit = b_time * transit_time
utilities = {0: utility_car, 1: utility_transit}

# Define the log-likelihood
log_choice_probability = loglogit(utilities, None, choice)

# Create the BIOGEME object
biogeme_object = BIOGEME(biogeme_database, log_choice_probability)
biogeme_object.modelName = 'first_model'

# Estimate the model
results = biogeme_object.estimate()

# Print summary results
print(results.short_summary())

# Get detailed parameter estimates
results.getEstimatedParameters()

Results for model first_model
Nbr of parameters:		2
Sample size:			21
Excluded data:			0
Final log likelihood:		-6.166042
Akaike Information Criterion:	16.33208
Bayesian Information Criterion:	18.42113



Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_car,-0.237573,0.805174,-0.295058,0.76795
b_time,-0.05311,0.021672,-2.450673,0.014259


In [28]:
import jax
import jax.numpy as jnp
from jax import grad, hessian
from scipy.optimize import minimize
import pandas as pd
import numpy as np
from scipy.stats import norm

class BinaryLogitModel:
    def __init__(self, auto_time, transit_time, choice):
        self.auto_time = jnp.array(auto_time)
        self.transit_time = jnp.array(transit_time)
        self.choice = jnp.array(choice)
        self.n_obs = len(choice)
    
    def utility_functions(self, beta):
        asc_car, b_time = beta
        U_car = asc_car + b_time * self.auto_time
        U_transit = b_time * self.transit_time
        return U_car, U_transit

    def log_likelihood(self, beta):
        U_car, U_transit = self.utility_functions(beta)
        # Stack utilities so that axis=1 is the alternative dimension:
        # col 0: car, col 1: transit
        utilities = jnp.stack([U_car, U_transit], axis=1)
        
        # Calculate the exponential of the utilities
        exp_utilities = jnp.exp(utilities)
        
        # Probability for each chosen alternative
        sum_exp = jnp.sum(exp_utilities, axis=1, keepdims=True)
        probabilities = exp_utilities / sum_exp
        
        # Extract probability of chosen alternative
        chosen_prob = probabilities[jnp.arange(self.n_obs), self.choice]
        
        # Add a small epsilon for numerical stability
        eps = 1e-15
        log_likelihood = jnp.sum(jnp.log(chosen_prob + eps))
        
        # Return negative LL for minimization
        return -log_likelihood

    def fit(self):
        # Initial guess (like Biogeme: start at zero)
        initial_guess = jnp.array([0.5, 0.5])
        
        # Define gradient and Hessian for optimization
        grad_func = grad(self.log_likelihood)
        hess_func = hessian(self.log_likelihood)
        
        result = minimize(
            fun=lambda beta: float(self.log_likelihood(beta)),
            x0=np.array(initial_guess),
            method="L-BFGS-B",
            jac=lambda beta: np.array(grad_func(beta)),
            options={"gtol": 1e-8, "maxiter": 1000}
        )
        
        beta_hat = result.x
        hessian_matrix = np.array(hess_func(beta_hat))
        vcov = np.linalg.inv(hessian_matrix)
        std_errors = np.sqrt(np.diag(vcov))
        t_stats = beta_hat / std_errors
        p_values = 2 * (1 - norm.cdf(np.abs(t_stats)))
        
        # Compute final log-likelihood
        final_ll = -result.fun
        # Compute AIC and BIC
        k = len(beta_hat)
        n = self.n_obs
        aic = 2 * k - 2 * (final_ll)
        bic = k * np.log(n) - 2 * (final_ll)
        
        print(f"Nbr of parameters:\t{k}")
        print(f"Sample size:\t\t{n}")
        print(f"Final log likelihood:\t{final_ll:.6f}")
        print(f"AIC:\t\t\t{aic:.6f}")
        print(f"BIC:\t\t\t{bic:.6f}")
        print("\nParameter Estimates:")
        for i, param in enumerate(["asc_car", "b_time"]):
            print(f"{param}\t{beta_hat[i]:.6f}\t{std_errors[i]:.6f}\t{t_stats[i]:.6f}\t{p_values[i]:.6f}")
        
        return beta_hat, hessian_matrix, std_errors

# Example data (from the provided code)
data = {
    'ID': pd.Series([i + 1 for i in range(21)]),
    'auto_time': pd.Series(
        [
            52.9, 4.1, 4.1, 56.2, 51.8, 0.2, 27.6, 89.9, 41.5, 95.0,
            99.1, 18.5, 82.0, 8.6, 22.5, 51.4, 81.0, 51.0, 62.2, 95.1, 41.6,
        ]
    ),
    'transit_time': pd.Series(
        [
            4.4, 28.5, 86.9, 31.6, 20.2, 91.2, 79.7, 2.2, 24.5, 43.5,
            8.4, 84.0, 38.0, 1.6, 74.1, 83.8, 19.2, 85.0, 90.1, 22.2, 91.5,
        ]
    ),
    'choice': pd.Series(
        [1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0]
    ),
}

pandas_dataframe = pd.DataFrame(data)
auto_time = pandas_dataframe['auto_time'].values
transit_time = pandas_dataframe['transit_time'].values
choice = pandas_dataframe['choice'].values

# Fit the model using JAX
model = BinaryLogitModel(auto_time, transit_time, choice)
beta_hat, hessian_matrix, std_errors = model.fit()


Nbr of parameters:	2
Sample size:		21
Final log likelihood:	-6.166041
AIC:			16.332083
BIC:			18.421128

Parameter Estimates:
asc_car	-0.237589	0.750477	-0.316584	0.751559
b_time	-0.053110	0.020642	-2.572865	0.010086
