In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from matplotlib import pyplot as plt
import numpy as np

In [3]:
import GPy, pickle

In [4]:
import pandas as pd

In [5]:
import time

In [6]:
import sys
sys.path.insert(0, '../source')

In [7]:
from CPoE_script_real import SCRIPT1, meanPD, sdPD, sdmPD

This notebook shows how to run the CPoE for the real world datasets with deterministic optimization.

IMPORTANT: Note that, we do not provide the dataset directly here due to non-authoship reasons. This means, you have to run first download_data.ipynb so that the datasets are available in the folder datasets.

# set parameters

In [8]:
# number of repetitions of training/test data splits
Nrep = 10        # in the paper, we used Nrep = 10, which takes quite some time

In [9]:
# choose name of dataset
dataset_names = ['concrete', 'mg', 'abalone', 'space_ga', 'kin8nm']
name = dataset_names[0]

In [10]:
# path to datasets (run download_data.ipynb before) and location to store the results
path = 'datasets/'
path_results = 'results/'

In [11]:
# set parameters for each dataset

# sparsity parameter
p = 1    

# degree of correlation
PPs = [0,1,2,3]

# K0: number of experts
# MMs: number of inducing points for sparse GPs

args = {'path_results':path_results}
if name=='concrete':
    K0 = 2**2
    MMs = np.array([25, 50, 100])
if name=='mg':
    K0 = 2**3
    MMs = np.array([25, 50, 100]) 
if name=='abalone':
    K0 = 2**4
    MMs = np.array([20, 50, 100])
if name=='space_ga':
    K0 = 2**2
    MMs = np.array([50, 100, 150]) 
if name=='kin8nm':
    K0 = 2**4
    MMs = np.array([50, 100, 200, 300]) 
    args.update( {'NtestFIX':True, 'Ntestmax':3000} )

In [12]:
SCRIPT = SCRIPT1(path+'DAT'+name+'.csv', Nreps=Nrep, name=name, **args)

datasets/DATconcrete.csv
concrete
D= 8
Ntrain= 927
Ntest= 103


# run full GP, CPoE, PoE, SGP

In [13]:
# the results are automatically stored in folder results

In [23]:
# run full GP
resFull, pathFull = SCRIPT.runfullGP()

In [24]:
# run correlated PoE
resCPoE, pathCPoE = SCRIPT.runCPoE(K0, PPs, p)

In [25]:
# run independent PoEs
resPoE, pathPoE = SCRIPT.runPoE(K0)

In [26]:
# run sparse GP
resSGP, pathSGP = SCRIPT.runSparseGP(MMs)

# reload results

In [18]:
# reload computed results
resFull_load = pickle.load( open( path_results+name+'_fullGP', 'rb' ) )
resCPoE_loads =  [pickle.load( open( path_results+name+'_CPoE_K'+str(K0)+'_P'+str(P)+'_p1', 'rb' ) ) for P in PPs]
resPoE_load = pickle.load( open(path_results+name+ '_PoE_K'+str(K0), 'rb' ) )
resSGP_load = pickle.load( open(path_results+name+ '_SGP', 'rb' ) )

In [19]:
# compute mean and std over the repetitions
Mfull, SDfull, SDMfull = sdPD(resFull_load)
Mindep, SDindep, SDMindep = sdPD(resPoE_load)
Msparse, SDsparse, SDMsparse = sdPD(resSGP_load)
Mcpoe = pd.concat([ meanPD(x) for x in resCPoE_loads])
SDMcpoe = pd.concat([ sdmPD(x) for x in resCPoE_loads])

In [39]:
# make nice output of all results
MMM = pd.concat([Mfull, Msparse, Mindep, Mcpoe])
SDMMM = pd.concat([SDMfull, SDMsparse, SDMindep, SDMcpoe])   

# rename and round
MMM.columns = SDMMM.columns = np.array(['time', 'LML', 'KL','ERR', 'CRPS', 'RMSE', 'ABSE', 'NLP', 'COV'])
dictA = {'time': 1, 'LML': 1, 'KL':1, 'ERR':3, 'CRPS':3, 'RMSE':3, 'ABSE':3, 'NLP':2, 'COV':2}
MMMr = MMM.round(dictA)
SDMMMr = SDMMM.round(dictA)

# combine results
FF = MMMr.applymap(str) + ' $\pm$ '+ SDMMMr.applymap(str)
FF

Unnamed: 0,time,LML,KL,ERR,CRPS,RMSE,ABSE,NLP,COV
fullGP,13.7 $\pm$ 1.1,-314.2 $\pm$ 5.1,0.0 $\pm$ 0.0,0.0 $\pm$ 0.0,0.162 $\pm$ 0.004,0.311 $\pm$ 0.011,0.218 $\pm$ 0.005,0.47 $\pm$ 0.12,0.92 $\pm$ 0.01
sparse25,6.0 $\pm$ 0.6,-595.8 $\pm$ 10.7,439.2 $\pm$ 20.0,0.313 $\pm$ 0.009,0.234 $\pm$ 0.005,0.422 $\pm$ 0.01,0.324 $\pm$ 0.005,1.11 $\pm$ 0.04,0.96 $\pm$ 0.01
sparse50,8.7 $\pm$ 1.1,-539.7 $\pm$ 10.1,405.4 $\pm$ 31.2,0.291 $\pm$ 0.012,0.222 $\pm$ 0.004,0.402 $\pm$ 0.008,0.308 $\pm$ 0.005,1.01 $\pm$ 0.03,0.95 $\pm$ 0.01
sparse100,15.2 $\pm$ 1.5,-494.5 $\pm$ 7.9,352.6 $\pm$ 29.6,0.264 $\pm$ 0.011,0.211 $\pm$ 0.004,0.385 $\pm$ 0.007,0.292 $\pm$ 0.006,0.92 $\pm$ 0.03,0.95 $\pm$ 0.01
minVar,2.8 $\pm$ 0.1,-389.8 $\pm$ 2.9,122.2 $\pm$ 13.1,0.156 $\pm$ 0.012,0.175 $\pm$ 0.004,0.335 $\pm$ 0.011,0.236 $\pm$ 0.005,0.61 $\pm$ 0.09,0.92 $\pm$ 0.01
GPoE-scaled,2.7 $\pm$ 0.1,-389.8 $\pm$ 2.9,174.4 $\pm$ 9.4,0.166 $\pm$ 0.01,0.186 $\pm$ 0.004,0.342 $\pm$ 0.01,0.255 $\pm$ 0.007,0.68 $\pm$ 0.05,0.96 $\pm$ 0.01
BCM,2.7 $\pm$ 0.1,-389.8 $\pm$ 2.9,338.1 $\pm$ 32.7,0.185 $\pm$ 0.012,0.195 $\pm$ 0.005,0.354 $\pm$ 0.01,0.265 $\pm$ 0.007,1.16 $\pm$ 0.12,0.82 $\pm$ 0.01
RBCM,2.7 $\pm$ 0.1,-389.8 $\pm$ 2.9,427.9 $\pm$ 35.0,0.166 $\pm$ 0.013,0.187 $\pm$ 0.005,0.342 $\pm$ 0.011,0.249 $\pm$ 0.006,1.43 $\pm$ 0.21,0.79 $\pm$ 0.01
GRBCM,3.2 $\pm$ 0.1,-465.0 $\pm$ 3.1,224.6 $\pm$ 30.3,0.202 $\pm$ 0.011,0.19 $\pm$ 0.004,0.352 $\pm$ 0.01,0.262 $\pm$ 0.006,0.71 $\pm$ 0.05,0.92 $\pm$ 0.01
CPoE(1),2.2 $\pm$ 0.2,-397.0 $\pm$ 2.8,113.0 $\pm$ 12.1,0.149 $\pm$ 0.011,0.175 $\pm$ 0.004,0.333 $\pm$ 0.011,0.236 $\pm$ 0.006,0.59 $\pm$ 0.09,0.92 $\pm$ 0.01


In [21]:
# save .tex results

In [22]:
#file1 = open(path_results+name+'_s.tex','w')
#file1.write(GG.to_latex(escape=False))
#file1.close();