Loading the data

id: household

Choices:  
- heinz41  
- heinz32  
- heinz28  
- hunts32  

variables  
- disp (1/0)  
- feat (1/0)  
- price (Log?)  

In [1]:
import numpy as np
import pandas as pd
df = pd.read_pickle("data.pkl") 
df.head()

Unnamed: 0,id,disp.heinz41,disp.heinz32,disp.heinz28,disp.hunts32,feat.heinz41,feat.heinz32,feat.heinz28,feat.hunts32,price.heinz41,price.heinz32,price.heinz28,price.hunts32,choice,choiceindex
0,1,0,0,0,0,0,0,0,0,4.6,3.7,5.2,3.4,heinz28,2
1,1,0,0,0,0,0,0,0,0,4.6,4.3,5.2,4.4,heinz28,2
2,1,0,0,0,0,0,1,0,0,4.6,2.5,4.6,4.8,heinz28,2
3,1,0,0,0,0,0,0,0,0,4.6,3.7,5.2,3.4,heinz28,2
4,1,0,0,0,0,0,0,1,0,4.6,3.0,4.6,4.8,heinz28,2


Defining the likelihood function using numba and numpy arrays and multiple draws

In [2]:
import numpy as np
import pandas as pd
import datetime
import time
from scipy.optimize import minimize
from numba import jit, prange

@jit(nopython=True, parallel=True)
def likelihood(c, data, draws):
    #print("evaluation likelihood")
    n_r = draws.shape[0]
    n_q = draws.shape[1]
#   n_k = draws.shape[2]
    
    #matrix with all simulations
    simulations = np.zeros((n_q, n_r))
    
    #iterate over households
    for q in prange(n_q):
        rows = data[np.where(data[:,0] == q+1)]
        n_rows = len(rows)
        
        #iterate over draws per household
        for r in prange(n_r):
            probabilities= np.zeros(n_rows)
            
            #iterate over oberservations per househould
            for t in prange(n_rows):
                choices = np.zeros(4)
                
                #itetate over probability of choices per observation
                for j in prange(4):
                    utility = 0; #start with alpha
                    if j < 3: utility = c[j]
                    x = [rows[t][1+j], rows[t][5+j], rows[t][9+j]]
                    mu =  c[3:6]
                    sigma = c[6:]
                    #np.exp(alpha +  np.dot(c[3:6],x) + np.dot(np.multiply(c[6:],draws[row[0]-1]),x))
                    for l in prange(3):
                          utility += mu[l] * x[l] + sigma[l] * draws[r][q][l] * x[l]

                    choices[j] = np.exp(utility)
            
                probabilities[t] = choices[int(rows[t][13])] / np.sum(choices)
                
            simulations[q,r] = np.exp(np.log(probabilities).sum())
            
    estimates = np.zeros(n_q)
    for q in prange(n_q):
        estimates[q] = np.sum(simulations[q,:]) / n_r #.mean()
    return -np.log(estimates).sum()

iterations = 1

   
def mixedlogit(data, drawtype, n_draws, c_0=False, method='BFGS', verbose=False):
    n_q = len(data.id.unique())
    
#     coefficients = [#alpha heinz41
#                     #alpha heinz32 
#                     #alpha heinz28
#                     #mu    display
#                     #mu    feat
#                     #mu    price
#                     #sigma dispay 
#                     #sigma feat 
#                     #sigma price 
#                     ]
    
    #generate inital values if neccesary
    if c_0 == False:
            #genereate random starting coefficients
            c_0 = np.random.rand(9)
    elif len(c_0) != 9:
        raise Exception("Incorrect initial coefficients")
    
    #generate draws
    if drawtype == 'pseudo':
        draws = np.random.randn(n_draws, n_q, 3); 
    else:
         raise Exception("Incorrect Drawtype")
    
    
    global iterations
    iterations = 1
    def logging(xk):
        global iterations
        if(verbose):
            print("Iteration %d, coefficients:" % (iterations))
            print(xk)
            print('\n')
            iterations += 1
    
    start = time.time()
    res =  minimize(likelihood, c_0, args=(data.drop(columns='choice').values, draws), method=method, callback=logging)
    end = time.time()
    duration = end-start 
    
    #if verbose: 
    print("Optimization done, time elapsed: %s" % str(datetime.timedelta(seconds=round(duration))))
    
    res['duration'] = duration
    return res

benchmarking and comparing pandas and numba implementation

In [None]:
np.random.seed(1234)

#example run with BFGS
print(mixedlogit(df, drawtype="pseudo", n_draws=100, method='BFGS', verbose=False))

#example run with Nelder-Mead
print(mixedlogit(df, drawtype="pseudo", n_draws=100, method='Nelder-Mead', verbose=False))