In [7]:
import numpy as np
import pandas as pd
import scipy
from scipy.optimize import minimize
import numdifftools
from tabulate import tabulate
import matplotlib.pyplot as plt
%matplotlib inline

A data "Mode" from mlogit is used.  
Choose a transportation from "car", "carpool", "bus", and "rail" based on "cost" and "time".

In [2]:
data_wide = pd.read_csv("../../data/mode_wide.csv", index_col=0)
data_wide.head()

Unnamed: 0,choice,cost.car,cost.carpool,cost.bus,cost.rail,time.car,time.carpool,time.bus,time.rail
1,car,1.50701,2.335612,1.800512,2.35892,18.5032,26.338233,20.867794,30.033469
2,rail,6.056998,2.896919,2.237128,1.855451,31.311107,34.256956,67.181889,60.293126
3,car,5.794677,2.137454,2.576385,2.747479,22.547429,23.255171,63.309057,49.171643
4,car,1.869144,2.572427,1.903518,2.268276,26.090282,29.896023,19.752704,13.472675
5,car,2.498952,1.72201,2.686,2.973866,4.69914,12.414084,43.092039,39.743252


In [3]:
data_long = pd.read_csv("../../data/mode_long.csv", index_col=0)
data_long.head(10)

Unnamed: 0,choice,alt,cost,time,chid
1.bus,False,bus,1.800512,20.867794,1
1.car,True,car,1.50701,18.5032,1
1.carpool,False,carpool,2.335612,26.338233,1
1.rail,False,rail,2.35892,30.033469,1
2.bus,False,bus,2.237128,67.181889,2
2.car,False,car,6.056998,31.311107,2
2.carpool,False,carpool,2.896919,34.256956,2
2.rail,True,rail,1.855451,60.293126,2
3.bus,False,bus,2.576385,63.309057,3
3.car,True,car,5.794677,22.547429,3


log-Likelihood

$$LL(\beta) = \sum_n \sum_i y_{in} \left\{\sum_k \beta_k x_{ik}-\log\sum_j \exp\left(\sum_k \beta_k x_{ik}\right) \right\}$$

In [13]:
# Log-likelihood
def LL(beta):
    data = data_long
    LL_ = 0
    a1, a2, a3, b1, b2 = beta
    term2 = 0
    for i in range(data.chid[-1]):
        if i%4 == 0:
            a = 0
        elif i%4 == 1:
            a = a1
        elif i%4 == 2:
            a = a2
        elif i%4 == 3:
            a = a3
            
        if data.choice[i] is np.bool_(True):
            term1 = b1*data.cost[i]+b2*data.time[i]+a
            each_alt = np.exp(b1*data.cost[i]+b2*data.time[i]+a)
            term2 += each_alt
        else:
            each_alt = np.exp(b1*data.cost[i]+b2*data.time[i]+a)
            term2 += each_alt
        if i%4 == 3:
            LL_ += term1 - np.log(term2)
            term2 = 0
            
    return -LL_

# Log-likelihood with only intercept
def LL0(beta):
    data = data_long
    LL_ = 0
    a1, a2, a3 = beta
    term2 = 0
    for i in range(data.chid[-1]):
        if i%4 == 0:
            a = 0
        elif i%4 == 1:
            a = a1
        elif i%4 == 2:
            a = a2
        elif i%4 == 3:
            a = a3
            
        if data.choice[i] is np.bool_(True):
            term1 = a
            each_alt = np.exp(a)
            term2 += each_alt
        else:
            each_alt = np.exp(a)
            term2 += each_alt
        if i%4 == 3:
            LL_ += term1 - np.log(term2)
            term2 = 0
    return -LL_

In [14]:
beta0 = [0, 0, 0, 0, 0]
result = minimize(LL, beta0, method="L-BFGS-B", options={"gtol":1e-18, "disp": True})

opt_beta = result.x
hess = numdifftools.core.Hessian(LL)(opt_beta)
stdev = np.sqrt(np.diagonal(np.linalg.inv(hess)))

beta0 = [0, 0, 0]
result0 = minimize(LL0, beta0, method="L-BFGS-B", options={"gtol":1e-18, "disp": True})

LL = -result.fun
LL0 = -result0.fun

In [15]:
print("log-Likelihood:", -result.fun)
print("McFadden R2:", 1-(LL/LL0))

headers = ["parameter", "estimated", "t-value", "p-value"]
table = [["car (intercept):", opt_beta[0], opt_beta[0]/stdev[0], 0],
         ["carpool (intercept):", opt_beta[1], opt_beta[1]/stdev[1], 0],
         ["rail (intercept):", opt_beta[2], opt_beta[2]/stdev[2], 0],
         ["cost:", opt_beta[3], opt_beta[3]/stdev[3], 0],
         ["time:", opt_beta[4], opt_beta[4]/stdev[4], 0]]
result = tabulate(table, headers, tablefmt="grid")
print(result)

log-Likelihood: -84.68914125204095
McFadden R2: 0.3659587359482097
+----------------------+-------------+-----------+-----------+
| parameter            |   estimated |   t-value |   p-value |
| car (intercept):     |   3.94002   |   5.65063 |         0 |
+----------------------+-------------+-----------+-----------+
| carpool (intercept): |  -0.798405  |  -1.73446 |         0 |
+----------------------+-------------+-----------+-----------+
| rail (intercept):    |   0.403236  |   1.14461 |         0 |
+----------------------+-------------+-----------+-----------+
| cost:                |  -0.977843  |  -4.91527 |         0 |
+----------------------+-------------+-----------+-----------+
| time:                |  -0.0877071 |  -5.65922 |         0 |
+----------------------+-------------+-----------+-----------+
