In [2]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from mmnl import MMNL
from numba import njit
from sklearn import metrics
from qmc import QMC

In [3]:
def load_data(path):
    dat = np.load(path)
    X = dat[:, :-1]
    Y = np.reshape(dat[:, -1], (-1, 1))
    return X, Y
X, Y = load_data('data/data.npy')

In [13]:
def summarize_data(model): 
    #model should be in method(draws) format
    result = []
    for file in os.listdir('.\\resultaten\\%s'%(model)):
        if file.endswith('%s_dgp_results.p'%(model)) and len(result)<200:
            infile = open(os.path.join('.\\resultaten\\%s'%(model), file), 'rb')
            opt_list = pickle.load(infile)
            [result.append(i) for i in opt_list]
    
#     pickle.dump( result, open( "./resultaten/%s/mcdgp%s_utsdgp_all_results.p" %(model,model), "wb" ) )
    return result      
# m_data = summarize_data('QMC(25)')
# res = get_results(m_data,'QMC(25)',all_probs)
# df = pd.DataFrame.from_dict(res)
# df

In [6]:
# @njit
def probs(X,model,theta):
    #performs matrix product to obtain the probability of every row
    #X should be in format [display, feature, price]
    method = model.split('(')[0]
    R = int(model.split('(')[1][:-1])
    if method == 'QMC':
        delta = QMC(300,3,R)
    elif method == 'SMC':
        delta = np.random.standard_normal((300,3,R))
    elif method == 'BQMC':
        delta = QMC(300,3,R)

    
    beta = theta[:3].reshape(-1,1) + delta * theta[3:].reshape(-1,1)
    P = []
    
    for i in range(X.shape[0]):
        if method == 'BQMC':
            kernel_mean, C_inv, det = MMNL.kernel_gauss(beta[int(X[i,0])-1,:,:],np.ones(4), theta,None)
        for j in range(4):
            if method == 'QMC' or method == 'SMC':
                P.append(np.mean(MMNL.softmax(X,beta[int(X[i,0])-1,:,:],i,j)))
            elif method == 'BQMC':
                f = MMNL.softmax(X, beta[int(X[i,0])-1,:,:], i, j)
                mean = kernel_mean.reshape(1,-1) @ C_inv @ f.T
                P.append(float(mean))

        

    return np.array(P)


In [7]:
# all_probs = probs(X,'QMC(20000)',np.array([1.5,  1.,  -1.1,  0.4,  0.1,  0.6]))
# pickle.dump(all_probs,open('all_probs_QMC(20000)','wb'))

In [8]:
all_probs = pickle.load(open('all_probs_QMC(20000)', 'rb'))

In [9]:
all_probs.shape

(11192,)

In [14]:
def get_results(model_data,model,probs_true=None):
    #model data should be in list format containing OptimizeResult types
    theta_true = np.array([1.5,  1.,  -1.1,  0.4,  0.1,  0.6])
    results ={}
    theta_est = np.zeros((6,))
    mape = 0
    D = len(model_data)
    print(D)
    for run in model_data:
        theta_est[:3] += run.x[:3]/D
        t = np.exp(run.x[3:])/D
        theta_est[3:] += t
    P = probs(X,model,theta_true)
    mape_choice = 100*np.mean(np.abs((probs_true - P)/ probs_true))
    rmse_choice = np.sqrt(metrics.mean_squared_error(probs_true,P))
    mape_par = np.mean(np.abs((theta_true - theta_est) / theta_true)) * 100
    rmse_par = np.sqrt(metrics.mean_squared_error(theta_true,theta_est))
    mape_par_adj = np.mean(np.abs((theta_true - theta_est) / ((theta_true+theta_est)/2))) * 100

    results = { 'method': model,
                'theta': theta_est,
                'mape_choice': mape_choice,
                'rmse_choice': rmse_choice,
                'mape_par': mape_par,
                'rmse_par': rmse_par,
                'mape_par_adj': mape_par_adj
                    
            }
    return results

Get true probabilities based on big QMC model

In [15]:
models = ['SMC(25)','SMC(250)','SMC(500)','SMC(1000)','SMC(2000)','SMC(3000)','QMC(25)','QMC(50)','QMC(75)','QMC(100)',"QMC(125)",'BQMC(10)','BQMC(15)','BQMC(20)','BQMC(25)','BQMC(30)', 'BQMC(50)',"BQMC(75)"]
allres = {}
for m in models:
    m_data = summarize_data(m)
    res = get_results(m_data,m,all_probs)
    print(res)
    allres[res['method']] = res

200
{'method': 'SMC(25)', 'theta': array([ 1.49481943,  0.99895101, -1.07903579,  0.2102621 ,  0.12201239,
        0.59921227]), 'mape_choice': 7.278632626827668, 'rmse_choice': 0.014970935689673379, 'mape_par': 11.989043457317472, 'rmse_par': 0.07847831093806867, 'mape_par_adj': 14.086462351165343}
200
{'method': 'SMC(250)', 'theta': array([ 1.5117817 ,  1.00506381, -1.10560798,  0.34002675,  0.17902321,
        0.60034943]), 'mape_choice': 2.408101937314859, 'rmse_choice': 0.005113476092625508, 'mape_par': 15.979401296163012, 'rmse_par': 0.04090126484268389, 'mape_par_adj': 12.450894858212374}
200
{'method': 'SMC(500)', 'theta': array([ 1.51480633,  1.00700921, -1.10500041,  0.34174011,  0.17654853,
        0.59960052]), 'mape_choice': 1.645927440462804, 'rmse_choice': 0.003363973049286718, 'mape_par': 15.553778582351347, 'rmse_par': 0.039890290333324266, 'mape_par_adj': 12.211628053411316}
200
{'method': 'SMC(1000)', 'theta': array([ 1.51594188,  1.00626154, -1.10489827,  0.35822338

In [12]:
df = pd.DataFrame.from_dict(allres)
df

Unnamed: 0,SMC(25),SMC(250),SMC(500),SMC(1000),SMC(2000),SMC(3000),QMC(25),QMC(50),QMC(75),QMC(100),QMC(125),BQMC(10),BQMC(15),BQMC(20),BQMC(25),BQMC(30),BQMC(50),BQMC(75)
mape_choice,7.32196,2.40491,1.65857,1.14964,0.865231,0.671229,2.1608,1.55173,1.06741,0.688507,0.575586,48.9476,43.3191,36.5395,32.5221,30.953,27.4858,25.9759
mape_par,100,14.0681,16.1988,16.8073,15.1478,15.8625,15.9253,15.1471,17.9322,16.1081,16.2965,100,100,100,100,100,100,100
mape_par_adj,200,12.485,13.5871,13.6167,12.7377,13.0904,14.975,13.2643,14.499,13.156,13.3552,200,200,200,200,200,200,200
method,SMC(25),SMC(250),SMC(500),SMC(1000),SMC(2000),SMC(3000),QMC(25),QMC(50),QMC(75),QMC(100),QMC(125),BQMC(10),BQMC(15),BQMC(20),BQMC(25),BQMC(30),BQMC(50),BQMC(75)
rmse_choice,0.0153539,0.00487949,0.00343074,0.00243338,0.00183043,0.00140395,0.00364563,0.00272578,0.00187727,0.00120827,0.000947892,0.176554,0.162994,0.145154,0.132418,0.126233,0.108052,0.0925536
rmse_par,0.911958,0.0468711,0.0486198,0.0475398,0.0448591,0.0455515,0.0621963,0.0497001,0.0516576,0.0456346,0.0467303,0.911958,0.911958,0.911958,0.911958,0.911958,0.911958,0.911958
theta,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.5130442689171848, 1.0048606614730489, -1.10...","[1.5151441489415702, 1.0058307311794001, -1.10...","[1.5161542557062289, 1.0053924551348175, -1.10...","[1.516960587931211, 1.005985885474773, -1.1034...","[1.5162790965658175, 1.004921884146282, -1.103...","[1.5112117661737274, 1.0051910563311917, -1.09...","[1.5153339294150618, 1.0044123008916435, -1.10...","[1.5160631945241185, 1.0054878961014666, -1.10...","[1.5166179241194382, 1.0058246098256434, -1.10...","[1.516272944649789, 1.0053004447097544, -1.103...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
