In [None]:
import numpy as np
import scipy.stats
import GPy

import json
import matplotlib.pyplot as plt
%matplotlib inline

import sys
sys.path.append("..")
from data_generator_2d import TARGET_SIGNALS
sys.path.append("../../PyDeepGP")
import deepgp

## Showing signal to optimize

In [None]:
len(TARGET_SIGNALS)

In [None]:
x1 = np.linspace(0, 1, 100)
x2 = np.linspace(0, 1, 100)

X1, X2 = np.meshgrid(x1,x2)
grid_flat = np.vstack([X1.ravel(), X2.ravel()]).transpose()

In [None]:
f_pick = 3
f = TARGET_SIGNALS[f_pick].fun
print TARGET_SIGNALS[f_pick].x_opt
print TARGET_SIGNALS[f_pick].desc

z = np.array([f(grid_flat[i, :]) for i in range(grid_flat.shape[0])])
plt.pcolor(X1, X2, z.reshape((100, 100)), cmap='RdBu')
plt.colorbar()
plt.show()

In [None]:
picks = range(0, 36, 3)


In [None]:
picks = range(0, 36, 3)
counter = 0

fig, axes = plt.subplots(nrows=3, ncols=4, figsize=(16, 9))
for ax in axes.flat:
    f_pick = picks[counter]
    f = TARGET_SIGNALS[f_pick].fun
    
    z = np.array([f(grid_flat[i, :]) for i in range(grid_flat.shape[0])])
    
    ax.set_axis_off()
    im = ax.pcolor(X1, X2, z.reshape((100, 100)), cmap='RdBu')
    
    counter += 1

fig.colorbar(im, ax=axes.ravel().tolist())

plt.suptitle("Examples of test functions", fontsize=24)
plt.show()

## Fetching DeepGP and using it for Bayesian optimization (example)

In [None]:
sample_x = np.random.random((50, 2))

sample_y = np.apply_along_axis(TARGET_SIGNALS[f_pick].fun, 1, sample_x)
sample_y.std()

In [None]:
kern1 = GPy.kern.RBF(5, ARD=True) + GPy.kern.Bias(5)
kern2 = GPy.kern.RBF(2, ARD=True) + GPy.kern.Bias(2)

model = deepgp.DeepGP(nDims=[1, 5, 2],
                      Y=sample_y.reshape(-1, 1),
                      X=sample_x,
                      kernels=[kern1,kern2], 
                      num_inducing=25, 
                      back_constraint=False
                     )

In [None]:
model.optimize(max_iters=500, messages=True)

In [None]:
pred[0]

In [None]:
pred = model.predict(grid_flat)
pred[0].std()

In [None]:
plt.pcolor(X1, X2, pred[0].reshape((100, 100)), cmap='RdBu')
plt.colorbar()
plt.show()

In [None]:
next_x, ei_std = select_next_point(model, min(sample_y), eps=0.1)
print next_x, ei_std

In [None]:
sample_obs(TARGET_SIGNALS[1], next_x, 0.1)

In [None]:
model.obslayer.kern.plot_ARD()
model.layer_1.kern.plot_ARD()

In [None]:
kernel = GPy.kern.RBF(input_dim=2, ARD=True)
model = GPy.models.GPRegression(sample_x, sample_y.reshape(-1, 1), kernel)
model.optimize()

model.plot()
plt.show()

## Fetching DeepGP and using it for Bayesian optimization (function form)

In [None]:
def sample_obs(data_generator, x, sigma_obs):
    # Sampling a new point from data_generator
    return data_generator.sample(x) + sigma_obs * np.random.randn(x.shape[0])

In [None]:
def train_deepgp_model(sample_x, sample_y, n_hidden, hidden_width, num_inducing, max_iters):
    # The function to construct a deep Gaussian process model
    kernels = []
    nDims = [1]
    
    for _ in range(n_hidden):
        kernels.append(GPy.kern.RBF(hidden_width, ARD=True) + GPy.kern.Bias(hidden_width))
        nDims.append(hidden_width)
        
    # Using the fact we are dealing with 2D domains
    kernels.append(GPy.kern.RBF(2, ARD=True) + GPy.kern.Bias(2))
    nDims.append(2)
    
    # Constructing and training a model
    model = deepgp.DeepGP(nDims=nDims,
                          Y=sample_y.reshape(-1, 1),
                          X=sample_x,
                          kernels=kernels, 
                          num_inducing=num_inducing, 
                          back_constraint=False
                         )    
    
    model.optimize(max_iters=max_iters, messages=False)
    
    return model

In [None]:
def train_sparsegp_model(sample_x, sample_y, num_inducing, max_iters):
    # The function to construct and train sparse Gaussian process model  
    kern = GPy.kern.RBF(2, ARD=True) + GPy.kern.Bias(2)
    
    model = GPy.models.SparseGPRegression(X=sample_x, Y=sample_y, kernel=kern, num_inducing=num_inducing)
    
    model.optimize(max_iters=max_iters, messages=False)
    
    return model

In [None]:
def select_next_point(model, best_score, eps=0.1):
    # The function to select which point to explore in Bayesian optimization
    # The criterion I select is expected improvement
    # The way to find a best point to explore is iterative naive search:
    # I start with a exhaustive search over a coarse grid over [0, 1] x [0, 1] interval
    # Then I do a secondary exhaustive search over a smaller-eps grid centered on optimum from first run

    x1 = np.linspace(0, 1, 100)
    x2 = np.linspace(0, 1, 100)

    X1, X2 = np.meshgrid(x1, x2)
    grid_flat = np.vstack([X1.ravel(), X2.ravel()]).transpose()
    
    # First iteration of exhaustive search, shuffling to avoid initial stickiness to a constant with noise
    np.random.shuffle(grid_flat)
    mu, std_1d = model.predict(grid_flat)

    z = (best_score - mu) / std_1d
    ei = std_1d * scipy.stats.norm.pdf(z) + z * std_1d * scipy.stats.norm.cdf(z)
    
    # Recording a sanity metric: how variable ei is assumed to be
    ei_std = ei.std()
    
    # Fetching the most promising point and iterating further
    x1_center_refined, x2_center_refined = grid_flat[np.argmax(ei, axis=0), :][0]

    x1 = np.linspace(max(x1_center_refined - eps, 0), min(x1_center_refined + eps, 1), 100)
    x2 = np.linspace(max(x2_center_refined - eps, 0), min(x2_center_refined + eps, 1), 100)

    X1, X2 = np.meshgrid(x1, x2)
    grid_flat = np.vstack([X1.ravel(), X2.ravel()]).transpose()
    
    np.random.shuffle(grid_flat)
    mu, std_1d = model.predict(grid_flat)

    z = (best_score - mu) / std_1d
    ei = std_1d * scipy.stats.norm.pdf(z) + z * std_1d * scipy.stats.norm.cdf(z)
    
    return grid_flat[np.argmax(ei, axis=0), :][0].reshape(1, -1), ei_std
    

In [None]:
def select_x_opt(model, eps=0.1):
    # The function to select the potential optimum in the end of Bayesian optimization
    
    x1 = np.linspace(0, 1, 100)
    x2 = np.linspace(0, 1, 100)

    X1, X2 = np.meshgrid(x1, x2)
    grid_flat = np.vstack([X1.ravel(), X2.ravel()]).transpose()

    np.random.shuffle(grid_flat)
    mu, _ = model.predict(grid_flat)

    # Fetching the most promising point and iterating further
    x1_center_refined, x2_center_refined = grid_flat[np.argmin(mu, axis=0), :][0]

    x1 = np.linspace(max(x1_center_refined - eps, 0), min(x1_center_refined + eps, 1), 100)
    x2 = np.linspace(max(x2_center_refined - eps, 0), min(x2_center_refined + eps, 1), 100)

    X1, X2 = np.meshgrid(x1, x2)
    grid_flat = np.vstack([X1.ravel(), X2.ravel()]).transpose()

    np.random.shuffle(grid_flat)
    mu, _ = model.predict(grid_flat)
        
    return grid_flat[np.argmin(mu, axis=0), :][0].reshape(-1, )

In [None]:
def bayes_opt_run(data_generator, n_samples_total, n_samples_init, is_deep,
                       sigma_obs, n_hidden, hidden_width, num_inducing, max_iters):
    # Starting with a random sample to iterate firther
    # (Fixing seed to have a more relevant benchmark)
    np.random.seed(123456)
    sample_x = np.random.random((n_samples_init, 2))
    sample_y = sample_obs(data_generator, sample_x, sigma_obs).reshape(-1, 1)
    best_score = np.min(sample_y)
    
    ei_std_list = []
    
    for _ in range(n_samples_total - n_samples_init):
        if is_deep:
            model = train_deepgp_model(sample_x, sample_y, n_hidden, hidden_width, num_inducing, max_iters)
        else:
            model = train_sparsegp_model(sample_x, sample_y, num_inducing, max_iters)
            
        next_x, ei_std = select_next_point(model, best_score)
        ei_std_list.append(ei_std)
        
        next_y = sample_obs(data_generator, next_x, sigma_obs)
        
        sample_x = np.vstack([sample_x, next_x])
        sample_y = np.vstack([sample_y, next_y])
        
        best_score = min(best_score, next_y)
        
    best_x = select_x_opt(model)

    return best_x, sample_y, ei_std_list

In [None]:
x, y, ei_std = bayes_opt_run(TARGET_SIGNALS[3], 35, 25, 0, 0.01, 3, 3, 25, 100)

## Running simulations for all signals

In [None]:
with open("deepGP_1_5_opt_results.txt", "a") as f:
    
    for signal in TARGET_SIGNALS:
                
        print "Optimizing " + signal.desc

        n_samples_init = 25
        n_samples_total = 200
        n_hidden = 1
        hidden_width = 5
        sigma_obs = 1e-8

        best_x, y, ei_std = bayes_opt_run(signal, n_samples_total, n_samples_init, 1, sigma_obs, 
                                          n_hidden, hidden_width, 25, 100)

        print "Best x: "
        print best_x

#         x_opt_distance = min(np.linalg.norm(x[n_samples_init:, :] - signal.x_opt, ord=2, axis=1))
        x_opt_distance = np.sqrt((best_x[0] - signal.x_opt[0]) ** 2 + (best_x[1] - signal.x_opt[1]) ** 2)

        sim_results = {}
        sim_results["model"] = signal.desc
        sim_results["n_samples_init"] = n_samples_init
        sim_results["n_samples_total"] = n_samples_total
        sim_results["sigma_obs"] = sigma_obs
        sim_results["n_hidden"] = n_hidden
        sim_results["hidden_width"] = hidden_width
        sim_results["x_opt_distance"] = x_opt_distance
        sim_results["scores"] = list(y.reshape(-1,))
        sim_report = json.dumps(sim_results)

        f.write(sim_report + "\n")


## Comparing deep and sparse models

In [None]:
deep_1_5_rewards = np.zeros((40, 200))
counter = 0

with open("deepGP_1_5_opt_results.txt", "r") as f:
    for line in f.readlines():
        sim_results = json.loads(line)
        deep_1_5_rewards[counter, :] = sim_results["scores"]
        counter += 1

In [None]:
deep_1_2_rewards = np.zeros((39, 200))
counter = 0

with open("deepGP_1_2_opt_results.txt", "r") as f:
    for line in f.readlines():
        sim_results = json.loads(line)
        deep_1_2_rewards[counter, :] = sim_results["scores"]
        counter += 1

In [None]:
deep_5_2_rewards = np.zeros((40, 200))
counter = 0

with open("deepGP_opt_results.txt", "r") as f:
    for line in f.readlines():
        sim_results = json.loads(line)
        deep_5_2_rewards[counter, :] = sim_results["scores"]
        counter += 1

In [None]:
sparse_rewards = np.zeros((40, 200))
counter = 0

with open("sparseGP_opt_results.txt", "r") as f:
    for line in f.readlines():
        sim_results = json.loads(line)
        sparse_rewards[counter, :] = sim_results["scores"]
        counter += 1

In [None]:
def moving_average(a, n=3) :
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

plt.title("Average regrets during Bayesian optimization")
plt.plot(moving_average(sparse_rewards.mean(axis=0), 10), label="baseline (ma)")
plt.plot(moving_average(deep_1_2_rewards.mean(axis=0), 10), label="deep_1h_2w (ma)")
plt.plot(moving_average(deep_1_5_rewards.mean(axis=0), 10), label="deep_1h_5w (ma)")
plt.plot(moving_average(deep_5_2_rewards.mean(axis=0), 10), label="deep_5h_2w (ma)")
plt.xlabel("timestamp")
plt.legend()
plt.show()

In [None]:
plt.scatter(deep_rewards.mean(axis=1), sparse_rewards.mean(axis=1))
plt.show()

In [None]:
x1 = np.linspace(0, 1, 100)
x2 = np.linspace(0, 1, 100)

X1, X2 = np.meshgrid(x1,x2)
grid_flat = np.vstack([X1.ravel(), X2.ravel()]).transpose()

In [None]:
f_pick = 3
f = TARGET_SIGNALS[f_pick].fun
print TARGET_SIGNALS[f_pick].x_opt
print TARGET_SIGNALS[f_pick].desc

z = np.array([f(grid_flat[i, :]) for i in range(grid_flat.shape[0])])
plt.pcolor(X1, X2, z.reshape((100, 100)), cmap='RdBu')
plt.colorbar()
plt.show()

In [None]:
N_SAMPLES = 5000
NUM_INDUCING = 50
MAX_ITERS = 1000
f_pick = 3

np.random.seed(123456)
sample_x = np.random.random((N_SAMPLES, 2))
sample_y = np.apply_along_axis(TARGET_SIGNALS[f_pick].fun, 1, sample_x)


z = np.array([TARGET_SIGNALS[f_pick].fun(grid_flat[i, :]) for i in range(grid_flat.shape[0])])

plt.figure(figsize=(12, 5))
plt.subplot(1, 3, 1)
plt.pcolor(X1, X2, z.reshape((100, 100)), cmap='RdBu')


model = train_sparsegp_model(sample_x, sample_y.reshape(-1, 1), 
                             num_inducing=NUM_INDUCING, max_iters=MAX_ITERS)
pred = model.predict(grid_flat)

plt.subplot(1, 3, 2)
plt.pcolor(X1, X2, pred[0].reshape((100, 100)), cmap='RdBu')

model = train_deepgp_model(sample_x, sample_y.reshape(-1, 1), n_hidden=1, hidden_width=2, 
                           num_inducing=NUM_INDUCING, max_iters=MAX_ITERS)
pred = model.predict(grid_flat)

plt.subplot(1, 3, 3)
plt.pcolor(X1, X2, pred[0].reshape((100, 100)), cmap='RdBu')
plt.colorbar()


plt.show()