# Parametric multinomial choice

In [None]:
import numpy as np
import os
import pandas as pd
import string as str
import math
import sys
import time

from scipy import optimize, special
import gurobipy as grb

from sklearn.preprocessing import LabelEncoder
from IPython.display import display, HTML

In [None]:
thePath = os.getcwd().split("veteran_students_mec_optim\\Pauline")[0]
travelmode = pd.read_csv(thePath + 'data_mec_optim\\demand_travelmode\\travelmodedata.csv', sep=',')

In [None]:
lb = LabelEncoder() 
travelmode['choice'] = lb.fit_transform(travelmode['choice'])
#travelmode['mode'] = lb.fit_transform(travelmode['mode'])

In [None]:
nobs = travelmode.shape[0]
ncols = travelmode.shape[1]
nbchoices = 4
ninds = int(nobs/nbchoices)

In [None]:
muhat_i_y = travelmode['choice'].values.reshape(ninds,nbchoices).T
muhat_iy = muhat_i_y.flatten()

In [None]:
sorter = ['air', 'train', 'bus', 'car']
travelmode['mode'] = travelmode['mode'].astype("category")
travelmode['mode'].cat.set_categories(sorter, inplace=True)
travelmode.columns = travelmode.columns.str.strip()
travelmode.sort_values(['mode','individual'], inplace = True)

In [None]:
travelmode.head()

## Log likelihood estimation

In [None]:
Phi_iy_k = np.column_stack((np.kron(np.identity(4)[0:4,1:4],np.repeat(1, ninds).reshape(ninds,1)), - travelmode['travel'].values, - (travelmode['travel']*travelmode['income']).values, - travelmode['gcost'].values))

In [None]:
nbK = Phi_iy_k.shape[1]
phi_mean = Phi_iy_k.mean(axis = 0)
phi_stdev = Phi_iy_k.std(axis = 0, ddof = 1)
Phi_iy_k = ((Phi_iy_k - phi_mean).T/phi_stdev[:,None]).T

In [None]:
def log_likelihood(theta):
    nbK = np.asarray(theta).shape[0]
    Xtheta = Phi_iy_k.dot(theta)/sigma
    Xthetamat_iy = Xtheta.reshape(nbchoices, ninds).T
    max_i = np.amax(Xthetamat_iy, axis = 1)
    expPhi_iy = np.exp((Xthetamat_iy.T -max_i).T)
    d_i = np.sum(expPhi_iy, axis = 1)
    
    val = np.sum(np.multiply(Xtheta,muhat_iy))  - np.sum(max_i) - sigma * np.sum(np.log(d_i))

    return -val

In [None]:
def grad_log_likelihood(theta):
    nbK = np.asarray(theta).shape[0]
    Xtheta = Phi_iy_k.dot(theta)/sigma
    Xthetamat_iy = Xtheta.reshape(nbchoices, ninds).T
    max_i = np.amax(Xthetamat_iy, axis = 1)
    expPhi_iy = np.exp((Xthetamat_iy.T -max_i).T)
    d_i = np.sum(expPhi_iy, axis = 1)
    
    temp_mat = np.multiply(Phi_iy_k.T, expPhi_iy.T.flatten()).T
    list_temp = []
    for i in range(nbchoices):
        list_temp.append(temp_mat[i*ninds:(i+1)*ninds,])
    n_i_k = np.sum(list_temp,axis = 0)
    
    thegrad = muhat_iy.reshape(1,nbchoices*ninds).dot(Phi_iy_k).flatten() - np.sum(n_i_k.T/d_i, axis = 1)

    return -thegrad

In [None]:
theta0 = np.repeat(0,nbK)
sigma = 1
outcome = optimize.minimize(log_likelihood,method = 'CG',jac = grad_log_likelihood, x0 = theta0)

In [None]:
outcome

In [None]:
temp_mle = 1 / outcome['x'][0]
theta_mle = outcome['x']*temp_mle

## Fixed temperature log likelihood estimation

In [None]:
objList = [i for i in range(nbK+ninds)]
lenobj = len(objList)
c = np.concatenate((muhat_iy.reshape(1,nbchoices*ninds).dot(Phi_iy_k).flatten(),np.repeat(-1,ninds)))

m = grb.Model('lp')
m.ModelSense = -1
x = m.addVars(objList, obj = c, name='x', lb = -math.inf)

In [None]:
rhs = np.repeat(0,ninds*nbchoices)
id_ind = np.identity(ninds) 

for i in range(ninds*nbchoices):
    L = grb.LinExpr(np.concatenate((-Phi_iy_k[i,:],id_ind[i%210,:])),x.select('*'))
    m.addConstr(L,'>',rhs[i])
    
Last = grb.LinExpr(np.concatenate(([1],np.repeat(0,lenobj-1))),x.select('*'))
m.addConstr(Last, '=', 1)

In [None]:
m.optimize()

# Print the solution
if m.status == grb.GRB.Status.OPTIMAL:
    print("Value of the problem (Gurobi) =", m.objval)
    opt_x = m.getAttr('x',x).select('*')

In [None]:
theta_lp = np.array(opt_x[:nbK])
indMax=100
tempMax=temp_mle
outcomemat = np.zeros((indMax+1,nbK-1))

In [None]:
def log_likelihood_fixedtemp(subsetoftheta, *temp):
    val = log_likelihood(np.append(1/temp[0],subsetoftheta))
    
    return val

In [None]:
def grad_log_likelihood_fixedtemp(subsetoftheta,*temp):
    val = np.delete(grad_log_likelihood(np.append(1/temp[0],subsetoftheta)),[0])
    
    return val

In [None]:
outcomemat[0,:] = np.delete(theta_lp,[0])
iterMax = indMax+1
for k in range(2,iterMax+1,1):
    thetemp = tempMax * (k-1)/indMax
    outcomeFixedTemp = optimize.minimize(log_likelihood_fixedtemp,method = 'CG',jac = grad_log_likelihood_fixedtemp, args = (thetemp,),  x0 = theta0[:-1])
    outcomemat[k-1,:] = outcomeFixedTemp['x']*thetemp

The zero-temperature estimator is:

In [None]:
print(outcomemat[1,:])

The mle estimator is:

In [None]:
print(outcomemat[indMax,])

## Adding heterogeneities

In [None]:
nbB = 50
thetemp = 1

In [None]:
objListnew = [i for i in range(ninds*nbB+nbK)]
lenobj = len(objListnew)

newc = np.concatenate((muhat_iy.reshape(1,nbchoices*ninds).dot(Phi_iy_k).flatten(),np.repeat(-1/nbB,ninds*nbB)))
newm = grb.Model('new_lp')
newm.ModelSense = -1
newx = newm.addVars(objListnew, obj = newc, name='newx', lb = -math.inf)

In [None]:
id_ind = np.identity(ninds*nbB) 
epsilon_biy = special.digamma(1) -np.log(-np.log(np.random.uniform(0,1,ninds*nbchoices*nbB)))

In [None]:
ptm = time.time()
for i in range(ninds*nbchoices*nbB):
    L = grb.LinExpr(np.concatenate((-Phi_iy_k[i//nbB,:],id_ind[i%(ninds*nbB),:])),newx.select('*'))
    newm.addConstr(L,'>',epsilon_biy[i])
    
#newm.addConstrs(grb.LinExpr([np.concatenate((-Phi_iy_k[i//nbB,:],id_ind[i%(ninds*nbB),:])) for i in range(ninds*nbchoices*nbB)],[newx.select('*') for i in range(ninds*nbchoices*nbB)])<epsilon_biy[i])
    
diff = time.time() - ptm
print('Time elapsed = ', diff, 's.')

In [None]:
newm.optimize()

if newm.status == grb.GRB.Status.OPTIMAL:
    print("Value of the problem (Gurobi) =", newm.objval)
    opt_x = newm.getAttr('x',x).select('*')

In [None]:
newtheta_lp = np.asarray(opt_x[0:nbK-1])/opt_x[0]

The lp-simulated estimator is:

In [None]:
np.delete(newtheta_lp,[0])