In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from matplotlib import pyplot as plt
import numpy as np

In [3]:
import GPy, pickle

In [4]:
import pandas as pd

In [5]:
import time

In [6]:
import sys
sys.path.insert(0, '../source')

In [7]:
from CPoE_script_real import SCRIPT1, meanPD, sdPD, sdmPD

This notebook shows how to run the CPoE for the real world datasets with stochastic optimization.

In [None]:
# IMPORTANT: We do not provide the dataset directly here due to non-authoship reasons. 
# This means, you have to run first download_data.ipynb so that the datasets are available 
# in the folder datasets.

In [None]:
%run ./download_data.ipynb

# set parameters

In [8]:
# number of repetitions of training/test data splits
Nrep = 2        # in the paper, we used Nrep = 5, which takes quite some time

In [9]:
# choose name of dataset
dataset_names = ['kin8nm', 'cadata', 'sarcos', 'casp']
name = dataset_names[0]

In [10]:
# path to datasets (run download_data.ipynb before) and location to store the results
path = 'datasets/'
path_results = 'results/'

In [11]:
# set parameters for each dataset

# sparsity parameter
p = 1    

# degree of correlation
PPs = [0,1,2]

# MMs: number of inducing points for sparse GPs
MMs = np.array([ 250, 500, 1000]) 

# K0: number of experts
# Nepochs: number of epochs of stochastic optimization
# gamma: learning rate in SGD (adam) optimization

if name=='kin8nm':
    K0 = 2**4
    Nepochs = 15
    gamma = 0.03
if name=='cadata':
    K0 = 2**5
    Nepochs = 15
    gamma = 0.01
if name=='sarcos':
    K0 = 2**7
    Nepochs = 10
    gamma = 0.01
if name=='casp':
    K0 = 2**7
    Nepochs = 10
    gamma = 0.01
args = {'path_results':path_results}

In [12]:
SCRIPT = SCRIPT1(path+'DAT'+name+'.csv', Nreps=Nrep, name=name, FULL=False, **args)

datasets/DATkin8nm.csv
kin8nm
D= 8
Ntrain= 7373
Ntest= 819


# run CPoE, PoE, SGP

In [13]:
# run correlated PoEs
_ = SCRIPT.runCPoE(K0, PPs, p, HYPERS='STOCH',TRACE=False, gamma=gamma, E=Nepochs)

'Epoch 14 likelihood: -3418.0319727373358 rel: 0.0014058051673002507 stop?: False'

In [14]:
# run independent PoEs
_ = SCRIPT.runPoE(K0, HYPERS='STOCH',TRACE=False, gamma=gamma, E=Nepochs)

'Epoch 14 likelihood: -3418.0319727373358 rel: 0.0014058051673002507 stop?: False'

In [17]:
# run stochastic sparse GP
_ = SCRIPT.runSparseGPfact(MMs, K0, HYPERS='STOCH',TRACE=False, gamma=gamma, E=Nepochs, rec=True)#

# reload results

In [14]:
# reload computed results
resPoE_stoch_load = pickle.load( open(path_results+name+ '_PoE_K'+str(K0)+'stoch', 'rb' ) )
resSGP_stoch_load = pickle.load( open(path_results+name+ '_SGP'+'stochstoch', 'rb' ) )
resCPoE_stoch_loads =  [pickle.load( open( path_results+name+'_CPoE_K'+str(K0)+'_P'+str(P)+'_p1'+'stoch', 'rb' ) ) for P in PPs]

In [15]:
# compute mean and std over the repetitions
Mindep, SDindep, SDMindep = sdPD(resPoE_stoch_load)
Msparse, SDsparse, SDMsparse = sdPD(resSGP_stoch_load)
Mcpoe = pd.concat([ meanPD(x) for x in resCPoE_stoch_loads])
SDMcpoe = pd.concat([ sdmPD(x) for x in resCPoE_stoch_loads])

In [16]:
# make nice output of all results
MMM = pd.concat([Msparse, Mindep, Mcpoe])
SDMMM = pd.concat([SDMsparse, SDMindep, SDMcpoe])   

# rename and round
MMM.columns = SDMMM.columns = np.array(['time', 'LML', 'CRPS', 'RMSE', 'ABSE', 'NLP', 'COV'])
dictA = {'time': 1, 'LML': 1, 'CRPS':3, 'RMSE':3, 'ABSE':3, 'NLP':2, 'COV':2}
MMMr = MMM.round(dictA)
SDMMMr = SDMMM.round(dictA)

# combine both together
FF = MMMr.applymap(str) + ' $\pm$ '+ SDMMMr.applymap(str)
FF

Unnamed: 0,time,LML,CRPS,RMSE,ABSE,NLP,COV
sparseFactstoch,40.7 $\pm$ 0.9,-4162.3 $\pm$ 30.7,0.213 $\pm$ 0.003,0.377 $\pm$ 0.006,0.292 $\pm$ 0.003,0.96 $\pm$ 0.02,0.98 $\pm$ 0.0
sparseFactstoch,99.7 $\pm$ 2.5,-3232.8 $\pm$ 12.1,0.188 $\pm$ 0.001,0.334 $\pm$ 0.003,0.26 $\pm$ 0.001,0.7 $\pm$ 0.01,0.98 $\pm$ 0.0
sparseFactstoch,333.9 $\pm$ 16.4,-2533.0 $\pm$ 13.9,0.171 $\pm$ 0.001,0.303 $\pm$ 0.002,0.236 $\pm$ 0.001,0.48 $\pm$ 0.01,0.98 $\pm$ 0.0
minVar,22.1 $\pm$ 0.4,-3401.1 $\pm$ 5.3,0.171 $\pm$ 0.001,0.309 $\pm$ 0.002,0.239 $\pm$ 0.002,0.45 $\pm$ 0.01,0.95 $\pm$ 0.0
GPoE-scaled,22.1 $\pm$ 0.4,-3401.1 $\pm$ 5.3,0.194 $\pm$ 0.001,0.342 $\pm$ 0.003,0.267 $\pm$ 0.002,0.76 $\pm$ 0.01,0.99 $\pm$ 0.0
BCM,22.0 $\pm$ 0.5,-3401.1 $\pm$ 5.3,0.21 $\pm$ 0.001,0.352 $\pm$ 0.003,0.267 $\pm$ 0.002,3.67 $\pm$ 0.1,0.63 $\pm$ 0.0
RBCM,21.9 $\pm$ 0.5,-3401.1 $\pm$ 5.3,0.188 $\pm$ 0.002,0.319 $\pm$ 0.003,0.244 $\pm$ 0.001,2.42 $\pm$ 0.11,0.69 $\pm$ 0.0
GRBCM,26.1 $\pm$ 0.5,0.0 $\pm$ 0.0,0.163 $\pm$ 0.001,0.293 $\pm$ 0.003,0.228 $\pm$ 0.002,0.36 $\pm$ 0.02,0.94 $\pm$ 0.0
CPoE(1),21.7 $\pm$ 1.2,-3407.3 $\pm$ 5.4,0.162 $\pm$ 0.001,0.291 $\pm$ 0.003,0.225 $\pm$ 0.002,0.36 $\pm$ 0.01,0.96 $\pm$ 0.0
CPoE(2),29.3 $\pm$ 1.3,-2106.2 $\pm$ 13.2,0.155 $\pm$ 0.001,0.278 $\pm$ 0.002,0.217 $\pm$ 0.002,0.26 $\pm$ 0.01,0.95 $\pm$ 0.0
