In [2]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from mmnl import MMNL
from numba import njit
from sklearn import metrics
from qmc import QMC

In [3]:
def load_data(path):
    dat = np.load(path)
    X = dat[:, :-1]
    Y = np.reshape(dat[:, -1], (-1, 1))
    return X, Y
X, Y = load_data('data/data.npy')

In [5]:
def summarize_data(model): 
    #model should be in method(draws) format
    result = []
    for file in os.listdir('.\\resultaten\\%s'%(model)):
        if file.endswith('%s_dgptest_results.p'%(model)) and len(result)<200:
            infile = open(os.path.join('.\\resultaten\\%s'%(model), file), 'rb')
            opt_list = pickle.load(infile)
            [result.append(i) for i in opt_list]
    
#     pickle.dump( result, open( "./resultaten/%s/mcdgp%s_utsdgp_all_results.p" %(model,model), "wb" ) )
    return result      
# m_data = summarize_data('QMC(25)')
# res = get_results(m_data,'QMC(25)',all_probs)
# df = pd.DataFrame.from_dict(res)
# df

In [6]:
# @njit
def probs(X,model,theta):
    #performs matrix product to obtain the probability of every row
    #X should be in format [display, feature, price]
    method = model.split('(')[0]
    R = int(model.split('(')[1][:-1])
    if method == 'QMC':
        delta = QMC(300,3,R)
    elif method == 'SMC':
        delta = np.random.standard_normal((300,3,R))
    elif method == 'BQMC':
        delta = QMC(300,3,R)

    
    beta = theta[:3].reshape(-1,1) + delta * theta[3:].reshape(-1,1)
    P = []
    
    for i in range(X.shape[0]):
        if method == 'BQMC':
            kernel_mean, C_inv, det = MMNL.kernel_gauss(beta[int(X[i,0])-1,:,:],np.ones(4), theta,None)
        for j in range(4):
            if method == 'QMC' or method == 'SMC':
                P.append(np.mean(MMNL.softmax(X,beta[int(X[i,0])-1,:,:],i,j)))
            elif method == 'BQMC':
                f = MMNL.softmax(X, beta[int(X[i,0])-1,:,:], i, j)
                mean = kernel_mean.reshape(1,-1) @ C_inv @ f.T
                P.append(float(mean))

        

    return np.array(P)


In [7]:
# all_probs = probs(X,'QMC(20000)',np.array([1.5,  1.,  -1.1,  0.4,  0.1,  0.6]))
# pickle.dump(all_probs,open('all_probs_QMC(20000)','wb'))

In [8]:
all_probs = pickle.load(open('all_probs_QMC(20000)', 'rb'))

In [9]:
all_probs.shape

(11192,)

In [10]:
def get_results(model_data,model,probs_true=None):
    #model data should be in list format containing OptimizeResult types
    theta_true = np.array([1.5,  1.,  -1.1,  0.4,  0.1,  0.6])
    results ={}
    theta_est = np.zeros((6,))
    mape = 0
    D = len(model_data)
    print(D)
    for run in model_data:
        theta_est[:3] += run.x[:3]/D
        t = np.exp(run.x[3:])/D
        theta_est[3:] += t
    P = probs(X,model,theta_true)
    mape_choice = 100*np.mean(np.abs((probs_true - P)/ probs_true))
    rmse_choice = np.sqrt(metrics.mean_squared_error(probs_true,P))
    mape_par = np.mean(np.abs((theta_true - theta_est) / theta_true)) * 100
    rmse_par = np.sqrt(metrics.mean_squared_error(theta_true,theta_est))
    mape_par_adj = np.mean(np.abs((theta_true - theta_est) / ((theta_true+theta_est)/2))) * 100

    results = { 'method': model,
                'theta': theta_est,
                'mape_choice': mape_choice,
                'rmse_choice': rmse_choice,
                'mape_par': mape_par,
                'rmse_par': rmse_par,
                'mape_par_adj': mape_par_adj
                    
            }
    return results

Get true probabilities based on big QMC model

In [11]:
models = ['SMC(25)','SMC(250)','SMC(500)','SMC(1000)','SMC(2000)','SMC(3000)','QMC(25)','QMC(50)','QMC(75)','QMC(100)',"QMC(125)",'BQMC(10)','BQMC(15)','BQMC(20)','BQMC(25)','BQMC(30)', 'BQMC(50)',"BQMC(75)"]
allres = {}
for m in models:
    m_data = summarize_data(m)
    res = get_results(m_data,m,all_probs)
    print(res)
    allres[res['method']] = res

0
{'method': 'SMC(25)', 'theta': array([0., 0., 0., 0., 0., 0.]), 'mape_choice': 7.32196141580759, 'rmse_choice': 0.015353926165974988, 'mape_par': 100.0, 'rmse_par': 0.9119576013536301, 'mape_par_adj': 200.0}
200
{'method': 'SMC(250)', 'theta': array([ 1.51304427,  1.00486066, -1.10282877,  0.30182849,  0.15772183,
        0.59681436]), 'mape_choice': 2.404909564654521, 'rmse_choice': 0.004879489108645996, 'mape_par': 14.068082872351212, 'rmse_par': 0.046871072125671334, 'mape_par_adj': 12.484959050192817}
200
{'method': 'SMC(500)', 'theta': array([ 1.51514415,  1.00583073, -1.10248947,  0.30615546,  0.17139585,
        0.59689965]), 'mape_choice': 1.6585658996242214, 'rmse_choice': 0.003430736081134607, 'mape_par': 16.198783936766215, 'rmse_par': 0.04861981250435495, 'mape_par_adj': 13.587141950541806}
200
{'method': 'SMC(1000)', 'theta': array([ 1.51615426,  1.00539246, -1.10218672,  0.31485846,  0.17755669,
        0.59887929]), 'mape_choice': 1.1496412806793963, 'rmse_choice': 0.0

In [12]:
df = pd.DataFrame.from_dict(allres)
df

Unnamed: 0,SMC(25),SMC(250),SMC(500),SMC(1000),SMC(2000),SMC(3000),QMC(25),QMC(50),QMC(75),QMC(100),QMC(125),BQMC(10),BQMC(15),BQMC(20),BQMC(25),BQMC(30),BQMC(50),BQMC(75)
mape_choice,7.32196,2.40491,1.65857,1.14964,0.865231,0.671229,2.1608,1.55173,1.06741,0.688507,0.575586,48.9476,43.3191,36.5395,32.5221,30.953,27.4858,25.9759
mape_par,100,14.0681,16.1988,16.8073,15.1478,15.8625,15.9253,15.1471,17.9322,16.1081,16.2965,100,100,100,100,100,100,100
mape_par_adj,200,12.485,13.5871,13.6167,12.7377,13.0904,14.975,13.2643,14.499,13.156,13.3552,200,200,200,200,200,200,200
method,SMC(25),SMC(250),SMC(500),SMC(1000),SMC(2000),SMC(3000),QMC(25),QMC(50),QMC(75),QMC(100),QMC(125),BQMC(10),BQMC(15),BQMC(20),BQMC(25),BQMC(30),BQMC(50),BQMC(75)
rmse_choice,0.0153539,0.00487949,0.00343074,0.00243338,0.00183043,0.00140395,0.00364563,0.00272578,0.00187727,0.00120827,0.000947892,0.176554,0.162994,0.145154,0.132418,0.126233,0.108052,0.0925536
rmse_par,0.911958,0.0468711,0.0486198,0.0475398,0.0448591,0.0455515,0.0621963,0.0497001,0.0516576,0.0456346,0.0467303,0.911958,0.911958,0.911958,0.911958,0.911958,0.911958,0.911958
theta,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.5130442689171848, 1.0048606614730489, -1.10...","[1.5151441489415702, 1.0058307311794001, -1.10...","[1.5161542557062289, 1.0053924551348175, -1.10...","[1.516960587931211, 1.005985885474773, -1.1034...","[1.5162790965658175, 1.004921884146282, -1.103...","[1.5112117661737274, 1.0051910563311917, -1.09...","[1.5153339294150618, 1.0044123008916435, -1.10...","[1.5160631945241185, 1.0054878961014666, -1.10...","[1.5166179241194382, 1.0058246098256434, -1.10...","[1.516272944649789, 1.0053004447097544, -1.103...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"


In [10]:
df = pd.DataFrame.from_dict(allres)
df

Unnamed: 0,SMC(25),SMC(250),SMC(500),SMC(1000),SMC(2000),SMC(3000),QMC(25),QMC(50),QMC(75),QMC(100),QMC(125),BQMC(10),BQMC(15),BQMC(20),BQMC(25),BQMC(30),BQMC(50),BQMC(75)
mape_choice,7.51502,2.28141,1.66479,1.1723,0.893245,0.746915,2.1608,1.55173,1.06741,0.688507,0.575586,48.9476,43.3191,36.5395,32.5221,30.953,27.4858,25.9759
mape_par,11.989,15.9794,15.5538,14.7908,12.7657,13.4221,12.5061,14.4483,16.4124,13.7825,11.4913,54.6094,36.7214,50.8889,49.7998,36.2172,58.27,115.448
mape_par_adj,14.0865,12.4509,12.2116,11.397,10.236,10.3504,11.4053,11.8663,12.4065,10.9164,9.38344,80.9015,58.2089,63.0457,56.8034,41.9265,47.8296,48.8149
method,SMC(25),SMC(250),SMC(500),SMC(1000),SMC(2000),SMC(3000),QMC(25),QMC(50),QMC(75),QMC(100),QMC(125),BQMC(10),BQMC(15),BQMC(20),BQMC(25),BQMC(30),BQMC(50),BQMC(75)
rmse_choice,0.0154065,0.00476235,0.00352809,0.00248625,0.00181565,0.00155958,0.00364563,0.00272578,0.00187727,0.00120827,0.000947892,0.176554,0.162994,0.145154,0.132418,0.126233,0.108052,0.0925536
rmse_par,0.0784783,0.0409013,0.0398903,0.0361482,0.0316712,0.032234,0.0432644,0.0391499,0.0401477,0.0342092,0.0284587,0.411355,0.267927,0.267037,0.258557,0.218939,0.283795,0.310526
theta,"[1.494819434474842, 0.9989510052014902, -1.079...","[1.51178169673906, 1.0050638090977702, -1.1056...","[1.5148063329060164, 1.0070092141952791, -1.10...","[1.5159418791393937, 1.006261543813022, -1.104...","[1.5171064780894166, 1.0067743573014045, -1.10...","[1.5170804030290201, 1.0054916708132966, -1.10...","[1.507030789812405, 1.0056444992991307, -1.100...","[1.5145184050789082, 1.0046447229220143, -1.10...","[1.5160030358592522, 1.0066038390544645, -1.10...","[1.5165651172936356, 1.0062042712362975, -1.10...","[1.5167757938262643, 1.0060453446990365, -1.10...","[2.14533570532408, 1.376277316604454, -0.96376...","[1.37552835093776, 0.9704884534932602, -1.0042...","[1.430651206566372, 0.9918760050436678, -0.995...","[1.3725426238257055, 1.0493037675755321, -1.03...","[1.633876779026274, 1.2306623011514815, -1.022...","[1.79091999707328, 1.4183487729395123, -1.0001...","[1.4886183492482308, 1.3657401215569174, -1.03..."


In [24]:
with open('mytable.tex', 'w') as tf:
     tf.write(df.to_latex())

In [19]:
models = ['SMC(250)','SMC(500)','SMC(1000)','SMC(2000)','QMC(25)','QMC(50)','QMC(75)','QMC(100)',"QMC(125)",'BQMC(10)','BQMC(15)','BQMC(20)','BQMC(25)','BQMC(30)', 'BQMC(50)',"BQMC(75)"]
for model in models:
    for file in os.listdir('.\\resultaten\\%s'%(model)):
        if file.endswith('%s_timgdp_results.p'%(model)):
            old_name = os.path.join('.\\resultaten\\%s'%(model), file)
            new_name = old_name[:-16]+'dgp_results.p'
            os.rename(old_name,new_name)