In [None]:
import numpy as np
import scipy.stats
import GPy
import matplotlib.pyplot as plt
%matplotlib inline

import sys
sys.path.append("..")
from data_generator_2d import TARGET_SIGNALS
sys.path.append("../../PyDeepGP")
import deepgp

## Showing signal to optimize

In [None]:
len(TARGET_SIGNALS)

In [None]:
x1 = np.linspace(0, 1, 100)
x2 = np.linspace(0, 1, 100)

X1, X2 = np.meshgrid(x1,x2)
grid_flat = np.vstack([X1.ravel(), X2.ravel()]).transpose()

In [None]:
f_pick = 37
f = TARGET_SIGNALS[f_pick].fun
print TARGET_SIGNALS[f_pick].x_opt
print TARGET_SIGNALS[f_pick].desc

z = np.array([f(grid_flat[i, :]) for i in range(grid_flat.shape[0])])
plt.pcolor(X1, X2, z.reshape((100, 100)), cmap='RdBu')
plt.colorbar()
plt.show()

## Fetching DeepGP and using it for Bayesian optimization (example)

In [None]:
sample_x = np.random.random((50, 2))

sample_y = np.apply_along_axis(TARGET_SIGNALS[1].fun, 1, sample_x)
sample_y.std()

In [None]:
kern1 = GPy.kern.RBF(5, ARD=True) + GPy.kern.Bias(5)
kern2 = GPy.kern.RBF(2, ARD=True) + GPy.kern.Bias(2)

model = deepgp.DeepGP(nDims=[1, 5, 2],
                      Y=sample_y.reshape(-1, 1),
                      X=sample_x,
                      kernels=[kern1,kern2], 
                      num_inducing=10, 
                      back_constraint=False
                     )

In [None]:
model.optimize(max_iters=500, messages=True)

In [None]:
pred[0]

In [None]:
pred = model.predict(grid_flat)
pred[0].std()

In [None]:
plt.pcolor(X1, X2, pred[0].reshape((100, 100)), cmap='RdBu')
plt.colorbar()
plt.show()

In [None]:
next_x, ei_std = select_next_point(model, min(sample_y), eps=0.1)
print next_x, ei_std

In [None]:
sample_obs(TARGET_SIGNALS[1], next_x, 0.1)

In [None]:
model.obslayer.kern.plot_ARD()
model.layer_1.kern.plot_ARD()

## Fetching DeepGP and using it for Bayesian optimization (function form)

In [None]:
def sample_obs(data_generator, x, sigma_obs):
    # Sampling a new point from data_generator
    return data_generator.sample(x) + sigma_obs * np.random.randn(x.shape[0])

In [None]:
def train_deepgp_model(sample_x, sample_y, n_hidden, hidden_width, num_inducing, max_iters):
    # The function to construct a deep Gaussian process model
    kernels = []
    nDims = [1]
    
    for _ in range(n_hidden):
        kernels.append(GPy.kern.RBF(hidden_width, ARD=True) + GPy.kern.Bias(hidden_width))
        nDims.append(hidden_width)
        
    # Using the fact we are dealing with 2D domains
    kernels.append(GPy.kern.RBF(2, ARD=True) + GPy.kern.Bias(2))
    nDims.append(2)
    
    # Constructing and training a model
    model = deepgp.DeepGP(nDims=nDims,
                          Y=sample_y.reshape(-1, 1),
                          X=sample_x,
                          kernels=kernels, 
                          num_inducing=num_inducing, 
                          back_constraint=False
                         )    
    
    model.optimize(max_iters=max_iters, messages=True)
    
    return model

In [None]:
def select_next_point(model, best_score, eps=0.1):
    # The function to select which point to explore in Bayesian optimization
    # The criterion I select is expected improvement
    # The way to find a best point to explore is iterative naive search:
    # I start with a exhaustive search over a coarse grid over [0, 1] x [0, 1] interval
    # Then I do a secondary exhaustive search over a smaller-eps grid centered on optimum from first run

    x1 = np.linspace(0, 1, 100)
    x2 = np.linspace(0, 1, 100)

    X1, X2 = np.meshgrid(x1, x2)
    grid_flat = np.vstack([X1.ravel(), X2.ravel()]).transpose()
    
    # First iteration of exhaustive search, shuffling to avoid initial stickiness to a constant with noise
    np.random.shuffle(grid_flat)
    mu, std_1d = model.predict(grid_flat)

    z = (best_score - mu) / std_1d
    ei = std_1d * scipy.stats.norm.pdf(z) + z * std_1d * scipy.stats.norm.cdf(z)
    
    # Recording a sanity metric: how variable ei is assumed to be
    ei_std = ei.std()
    
    # Fetching the most promising point and iterating further
    x1_center_refined, x2_center_refined = grid_flat[np.argmax(ei, axis=0), :][0]

    x1 = np.linspace(max(x1_center_refined - eps, 0), min(x1_center_refined + eps, 1), 100)
    x2 = np.linspace(max(x2_center_refined - eps, 0), min(x2_center_refined + eps, 1), 100)

    X1, X2 = np.meshgrid(x1, x2)
    grid_flat = np.vstack([X1.ravel(), X2.ravel()]).transpose()
    
    np.random.shuffle(grid_flat)
    mu, std_1d = model.predict(grid_flat)

    z = (best_score - mu) / std_1d
    ei = std_1d * scipy.stats.norm.pdf(z) + z * std_1d * scipy.stats.norm.cdf(z)
    
    return grid_flat[np.argmax(ei, axis=0), :][0].reshape(1, -1), ei_std
    

In [None]:
def deep_bayes_opt_run(data_generator, n_samples_total, n_samples_init, 
                       sigma_obs, n_hidden, hidden_width, num_inducing, max_iters):
    # Starting with a random sample to iterate firther
    # (Fixing seed to have a more relevant benchmark)
    np.random.seed(123456)
    sample_x = np.random.random((n_samples_init, 2))
    sample_y = sample_obs(data_generator, sample_x, sigma_obs).reshape(-1, 1)
    best_score = np.min(sample_y)
    
    ei_std_list = []
    
    for _ in range(n_samples_total - n_samples_init):
        deepgp_model = train_deepgp_model(sample_x, sample_y, n_hidden, hidden_width, num_inducing, max_iters)
        next_x, ei_std = select_next_point(deepgp_model, best_score)
        # print "Current best: ", 
        # print "Next x: ", next_x
        ei_std_list.append(ei_std)
        
        next_y = sample_obs(data_generator, next_x, sigma_obs)
        
        sample_x = np.vstack([sample_x, next_x])
        sample_y = np.vstack([sample_y, next_y])
        
        best_score = min(best_score, next_y)

    return sample_y, ei_std_list

In [None]:
y, ei_std = deep_bayes_opt_run(TARGET_SIGNALS[3], 150, 25, 0.01, 3, 3, 25, 100)

In [None]:
y