In [1]:
from prettytable import PrettyTable
import numpy as np
import scipy.stats
from sklearn.metrics import mean_squared_error, r2_score
import emukit.examples.multi_fidelity_dgp

In [2]:
from emukit.core import ContinuousParameter, ParameterSpace
from emukit.core.initial_designs import LatinDesign
from emukit.examples.multi_fidelity_dgp.multi_fidelity_deep_gp import MultiFidelityDeepGP

from emukit.test_functions.multi_fidelity import (multi_fidelity_borehole_function,multi_fidelity_park_function,multi_fidelity_hartmann_3d)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  from ._conv import register_converters as _register_converters


In [3]:
from collections import namedtuple

Function = namedtuple('Function', ['name', 'y_scale', 'noise_level', 'do_x_scaling', 'num_data', 'fcn'])

borehole = Function(name='borehole', y_scale=100, noise_level=[0.05, 0.1], do_x_scaling=True, num_data=[60, 5], 
                    fcn=multi_fidelity_borehole_function)

park = Function(name='park', y_scale=1, noise_level=[0., 0.], do_x_scaling=False, num_data=[30, 5], 
                    fcn=multi_fidelity_park_function)

hartmann_3d = Function(name='hartmann', y_scale=1, noise_level=[0., 0., 0.], do_x_scaling=False, num_data=[80, 40, 20], 
                    fcn=multi_fidelity_hartmann_3d)



In [4]:
def generate_data(fcn_tuple, n_test_points):
    """
    Generates train and test data for
    """
    
    # A different definition of the parameter space for the branin function was used in the paper
    if fcn_tuple.name == 'branin':
        fcn, space = fcn_tuple.fcn()
        new_space = ParameterSpace([ContinuousParameter('x1', -5., 0.), ContinuousParameter('x2', 10., 15.)])
    else:
        fcn, space = fcn_tuple.fcn()
        new_space = ParameterSpace(space._parameters[:-1])
    
    do_x_scaling = fcn_tuple.do_x_scaling
    
    
    # Generate training data
    
    latin = LatinDesign(new_space)
    X = [latin.get_samples(n) for n in fcn_tuple.num_data]
    
    # Scale X if required
    if do_x_scaling:
        scalings = X[0].std(axis=0)
    else:
        scalings = np.ones(X[0].shape[1])
        
    for x in X:
        x /= scalings
    
    Y = []
    for i, x in enumerate(X):
        Y.append(fcn.f[i](x * scalings))
    
    y_scale = fcn_tuple.y_scale
    
    # scale y and add noise if required
    noise_levels = fcn_tuple.noise_level
    if any([n > 0 for n in noise_levels]):
        for y, std_noise in zip(Y, noise_levels):
            y /= y_scale + std_noise * np.random.randn(y.shape[0], 1)
    
    # Generate test data
    x_test = latin.get_samples(n_test_points)
    x_test /= scalings
    y_test = fcn.f[-1](x_test * scalings)
    y_test /= y_scale

    i_highest_fidelity = (len(fcn_tuple.num_data) - 1) * np.ones((x_test.shape[0], 1))
    x_test = np.concatenate([x_test, i_highest_fidelity], axis=1)
    print(X[1].shape)
    return x_test, y_test, X, Y

In [5]:
np.random.seed(123)

x_test_p, y_test_p, Xp, Yp = generate_data(park, 1000)

x_test_b, y_test_b, Xb, Yb = generate_data(borehole, 1000)

x_test_h, y_test_h, Xh, Yh = generate_data(hartmann_3d, 1000)

(5, 4)
(5, 8)
(40, 3)


In [6]:
mf_dgp_fix_lf_p = MultiFidelityDeepGP(Xp, Yp, n_iter=5000)
mf_dgp_fix_lf_p.optimize()

In [7]:
mf_dgp_fix_lf_b = MultiFidelityDeepGP(Xb, Yb, n_iter=5000)
mf_dgp_fix_lf_b.optimize()

In [8]:
mf_dgp_fix_lf_h = MultiFidelityDeepGP(Xh, Yh, n_iter=5000)
mf_dgp_fix_lf_h.optimize()

In [9]:
y_mean_p, y_var_p = mf_dgp_fix_lf_p.predict(x_test_p)

In [10]:
y_mean_b, y_var_b = mf_dgp_fix_lf_b.predict(x_test_b)

In [11]:
y_mean_h, y_var_h = mf_dgp_fix_lf_h.predict(x_test_h)

In [12]:
def calculate_metrics(y_test, y_mean_prediction, y_var_prediction):
    # R2
    r2 = r2_score(y_test, y_mean_prediction)
    # RMSE
    rmse = np.sqrt(mean_squared_error(y_test, y_mean_prediction))
    # Test log likelihood
    mnll = -np.sum(scipy.stats.norm.logpdf(y_test, loc=y_mean_prediction, scale=np.sqrt(y_var_prediction)))/len(y_test)
    return {'r2': r2, 'rmse': rmse, 'mnll': mnll}

In [13]:
print('Park dataset')
print(calculate_metrics(y_test_p, y_mean_p, y_var_p))

Park dataset
{'r2': 0.9875121534687764, 'rmse': 0.544717781326374, 'mnll': 1.2517090226412366}


In [14]:
print('Borehole dataset')
print(calculate_metrics(y_test_b, y_mean_b, y_var_b))

Borehole dataset
{'r2': 0.9989646030741597, 'rmse': 0.014891265047316136, 'mnll': -1.8715485258500661}


In [15]:
print('Hartmann3D dataset')
print(calculate_metrics(y_test_h, y_mean_h, y_var_h))

Hartmann3D dataset
{'r2': 0.9898548201294145, 'rmse': 0.0951690730336623, 'mnll': -0.7745369397081976}
