In [1]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from mmnl import MMNL
from numba import njit

In [2]:
d = pd.read_csv('./data/catsup_trainformat.csv', delimiter=',')
d.head(10)

Unnamed: 0,chosen,id,alt,display,feature,price,chid
0,0,1,0,0,0,4.6,0
1,0,1,1,0,0,3.7,0
2,1,1,2,0,0,5.2,0
3,0,1,3,0,0,3.4,0
4,0,1,0,0,0,4.6,1
5,0,1,1,0,0,4.3,1
6,1,1,2,0,0,5.2,1
7,0,1,3,0,0,4.4,1
8,0,1,0,0,0,4.6,2
9,0,1,1,0,1,2.5,2


In [3]:
X = d.values[:,1:-1]
X.shape

(11192, 5)

In [4]:
def summarize_data(model): 
    #model should be in method(draws) format
    result = []
    for file in os.listdir('.\\resultaten\\%s'%(model)):
        if file.endswith('%s_timgdp_results.p'%(model)):
            infile = open(os.path.join('.\\resultaten\\%s'%(model), file), 'rb')
            opt_list = pickle.load(infile)
            [result.append(i) for i in opt_list]
    
#     pickle.dump( result, open( "./resultaten/%s/mcdgp%s_utsdgp_all_results.p" %(model,model), "wb" ) )
    return result      
        

In [15]:
# @njit
def probs(X,model,theta):
    #performs matrix product to obtain the probability of every row
    #X should be in format [display, feature, price]
    method = model.split('(')[0]
    R = int(model.split('(')[1][:-1])
    if method == 'QMC':
        delta = QMC(300,3,R)
    elif method == 'SMC':
        delta = np.random.standard_normal((300,3,R))
        
    
    beta = theta[:3].reshape(-1,1) + delta * theta[3:].reshape(-1,1)
#     try:
#         assert(X.shape == (11192, 5) and beta.shape == (300,3,R))
        
#     except AssertionError:
#         raise AssertionError('Ga X ff in juiste format gooien. X: %s, beta: %s' %(X.shape,beta.shape))
    beta_choice = np.zeros((3,11192,R))
    for i in range(11192):
        id = int(X[i,0])
        beta_choice[:,i,:] = beta[id-1,:,:]
        
    P = np.zeros((11192,R))
    for r in range(R):
        P[:,r] = np.exp((X[:,2:]@ beta_choice[:,:,r])[:,0])
        
#     try:
#         assert(P.shape == (11192,R) or P.shape == (R,11192))
#     except AssertionError:
#         raise AssertionError('Product van X en beta gaat niet goed, P.shape is nu %g' % (P.shape))
#     try:
#         p = P[0:4]
#         check = p/np.sum(p)
#         assert(np.sum(check) == 1. or np.sum(check) == 1)
#     except AssertionError:
#         raise AssertionError('Kansen van eerste aankoop sommeren niet naar 1 %f'%(np.sum(check)))

    Y = np.zeros(11192,)
    for i in range(0,11192, 4):
        sum = (P[i:i+4,:]).sum(axis=0)
        t = (P[i:i+4]/sum).T
        Y[i:i+4] = t.mean()
#         p = P[i:i+4]
#         check = p/np.sum(p)
#         print(np.sum(check))
    return Y


In [None]:
Y_true = probs(X,'SMC(1000)',np.array([1.5,  1.,  -1.1,  0.4,  0.1,  0.6]))
pickle.dump(Y_true, open('Y_true_SMC(1000)', 'wb'))

In [16]:
def get_results(model_data,model,Y_true=None):
    #model data should be in list format containing OptimizeResult types
    theta_true = np.array([1.5,  1.,  -1.1,  0.4,  0.1,  0.6])
    results ={}
    theta_est = np.zeros((6,))
    mape = 0
    D = len(model_data)
    print(D)
    for run in model_data:
        theta_est[:3] += run.x[:3]/D
        t = np.exp(run.x[3:])/D
        theta_est[3:] += t
        
#     P = probs(X,method,R,delta,theta_est)
#     mape_choice = 100*np.sum(np.abs((Y_true-P)/Y_true))/11192
#     rmse_choice = np.sqrt(np.sum((Y_true-P)**2)/11192)
    mape_par = 100*np.sum(np.abs((theta_true-theta_est)/theta_true))/6
    rmse_par = np.sqrt(np.sum((theta_true-theta_est)**2)/6)

    results = { 'method': model,
                'theta': theta_est,
#                 'choice': {'mape': mape_choice,
#                            'rmse': rmse_choice,
#                           },
               'Parameter': {'mape': mape_par,
                             'rmse': rmse_par
                           }
            }
    return results

Get true probabilities based on big QMC model

In [7]:
smc250 = summarize_data('SMC(250)')
len(smc250)

100

In [17]:
models = ['SMC(250)','SMC(500)','SMC(1000)','QMC(25)','QMC(50)','QMC(75)','QMC(100)']
for m in models:
    m_data = summarize_data(m)
    res = get_results(m_data,m)
    print(res)

100
{'method': 'SMC(250)', 'theta': array([ 1.51833337,  0.99785936, -1.10529436,  0.35301856,  0.1878494 ,
        0.59768373]), 'Parameter': {'mape': 16.98306647124815, 'rmse': 0.04143038907196735}}
100
{'method': 'SMC(500)', 'theta': array([ 1.52048865,  1.00006172, -1.10530069,  0.32775663,  0.19806889,
        0.5964789 ]), 'Parameter': {'mape': 19.76175676704424, 'rmse': 0.050492399831148206}}
100
{'method': 'SMC(1000)', 'theta': array([ 1.52079287,  0.99890776, -1.10471063,  0.33855024,  0.18828259,
        0.59808493]), 'Parameter': {'mape': 17.647976921695697, 'rmse': 0.04477589070760512}}
90
{'method': 'QMC(25)', 'theta': array([ 1.51521008,  0.989352  , -1.09963449,  0.29983564,  0.16738908,
        0.60582723]), 'Parameter': {'mape': 15.91890229172809, 'rmse': 0.049921613790112736}}
100
{'method': 'QMC(50)', 'theta': array([ 1.52051922,  0.99775082, -1.10304376,  0.33435333,  0.17390309,
        0.60433061]), 'Parameter': {'mape': 15.484350287148771, 'rmse': 0.0412820960200