In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from matplotlib import pyplot as plt
import numpy as np

In [3]:
import GPy, pickle

In [4]:
import pandas as pd

In [None]:
import time

In [None]:
import sys
sys.path.insert(0, '../source')

In [None]:
from CPoE_script_real import SCRIPT1, meanPD, sdPD, sdmPD

This notebook shows how to run the CPoE for the real world datasets with stochastic optimization.

IMPORTANT: Note that, we do not provide the dataset directly here due to non-authoship reasons. This means, you have to run first download_data.ipynb so that the datasets are available in the folder datasets.

# set parameters

In [None]:
# number of repetitions of training/test data splits
Nrep = 2        # in the paper, we used Nrep = 10, which takes quite some time

In [None]:
# choose name of dataset
dataset_names = ['kin8nm', 'cadata', 'sarcos', 'casp']
name = dataset_names[0]

In [None]:
# path to datasets (run download_data.ipynb before) and location to store the results
path = 'datasets/'
path_results = 'results/'

In [None]:
# set parameters for each dataset

# sparsity parameter
p = 1    

# degree of correlation
PPs = [0,1,2]

# MMs: number of inducing points for sparse GPs
MMs = np.array([ 250, 500, 1000]) 

# K0: number of experts
# Nepochs: number of epochs of stochastic optimization
# gamma: learning rate in SGD (adam) optimization

if name=='kin8nm':
    K0 = 2**4
    Nepochs = 15
    gamma = 0.03
if name=='cadata':
    K0 = 2**5
    Nepochs = 15
    gamma = 0.01
if name=='sarcos':
    K0 = 2**7
    Nepochs = 10
    gamma = 0.01
if name=='casp':
    K0 = 2**7
    Nepochs = 10
    gamma = 0.01

In [None]:
SCRIPT = SCRIPT1(path+'DAT'+name+'.csv', Nreps=5, name=name, FULL=False)

datasets/DATkin8nm.csv
kin8nm
D= 8
Ntrain= 7373
Ntest= 819


# run CPoE, PoE, SGP

In [None]:
# run correlated PoEs
_ = SCRIPT.runCPoE(K0, PPs, p, HYPERS='STOCH',TRACE=False, gamma=gamma, E=Nepochs)

'Epoch 14 likelihood: -3418.0319727373358 rel: 0.0014058051673002507 stop?: False'

In [None]:
# run independent PoEs
_ = SCRIPT.runPoE(K0, HYPERS='STOCH',TRACE=False, gamma=gamma, E=Nepochs)

'Epoch 14 likelihood: -3418.0319727373358 rel: 0.0014058051673002507 stop?: False'

In [None]:
# run stochastic sparse GP
_ = SCRIPT.runSparseGPfact(MMs, K0, HYPERS='STOCH',TRACE=False, gamma=gamma, E=Nepochs, rec=True)

'Epoch 10 likelihood: [[-5892.50955622]] rel: [[0.0002844]] stop?: [[False]]'

# reload results

In [None]:
# reload computed results
resPoE_stoch_load = pickle.load( open(nam+ '_PoE_K'+str(K0)+'stoch', 'rb' ) )
resSGP_stoch_load = pickle.load( open(nam+ '_SGP'+'factstoch', 'rb' ) )
resCPoE_stoch_loads =  [pickle.load( open( name+'_CPoE_K'+str(K0)+'_P'+str(P)+'_p1'+'stoch', 'rb' ) ) for P in PPs]

In [None]:
# compute mean and std over the repetitions
Mindep, SDindep, SDMindep = sdPD(resPoE_stoch_load)
Msparse, SDsparse, SDMsparse = sdPD(resSGP_stoch_load)
Mcpoe = pd.concat([ meanPD(x) for x in resCPoE_stoch_loads])
SDMcpoe = pd.concat([ sdmPD(x) for x in resCPoE_stoch_loads])

In [None]:
# make nice output of all results
MMM = pd.concat([Msparse, Mindep, Mcpoe])
SDMMM = pd.concat([SDMsparse, SDMindep, SDMcpoe])   

# rename and round
MMM.columns = SDMMM.columns = np.array(['time', 'LML', 'CRPS', 'RMSE', 'ABSE', 'NLP', 'COV'])
dictA = {'time': 1, 'LML': 1, 'CRPS':3, 'RMSE':3, 'ABSE':3, 'NLP':2, 'COV':2}
MMMr = MMM.round(dictA)
SDMMMr = SDMMM.round(dictA)

# combine both together
FF = MMMr.applymap(str) + ' $\pm$ '+ SDMMMr.applymap(str)
FF