In [None]:
# to automatically reload modules who's content has changed
%load_ext autoreload
%autoreload 2

# configure matplotlib
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'

In [None]:
import time
import numpy as np
import GPy
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

import ipywidgets as widgets
from IPython.display import display
from IPython.core.debugger import set_trace

In [None]:
import function_bo as fbo
from function_bo_plotting import *

import sys
sys.path.append('../')
import maze

In [None]:
sim = maze.Simulation()

In [None]:
domain_bounds = ('h', 0, 1) # y-position (height)
range_bounds = (0, 1)   # x-position

world_num = 2

def objective(f):
    def control(height):
        return f(height/sim.h)*sim.w
    R_g, trail = sim.run(fps=400, ball_control=control, quiet=True, world_num=world_num)
    reached_h = (sim.h-trail[-1][1])/sim.h
    print(reached_h)
    _, xmin, xmax = domain_bounds
    reward_hs = np.linspace(xmin, xmax, num=40)
    #TODO: idea, reward for staying close to the center?
    
    # need to reward for surviving and also important is to inform the surrogate where the bad regions are
    # and so negative examples have to be added
    R_ls = []
    for h in reward_hs:
        if h > reached_h:
            break
        R_ls.append((h, 1.0))
    if h != reward_hs[-1]:
        R_ls.append((h, 0.0))
    return R_ls, R_g

def plot_walls(ax):
    world = sim.get_world(world_num)
    for wall in world:
        r = wall.rect
        x, y, w, h = r.x/sim.w, 1-r.y/sim.h, r.w/sim.w, -r.h/sim.h
        rect = mpl.patches.Rectangle((y, x), h, w, facecolor='green', alpha=0.4)
        ax.add_patch(rect)
    for y in (0,1):
        ax.axhline(y=y, linestyle='--', color='green', alpha=0.4)

In [None]:
class Coordinator(fbo.Coordinator):
    def get_pre_phase_config(self, trial_num):
        c = fbo.RandomSelectConfig(self.domain_bounds)
        #c.mu = lambda x: 0.5 # bias
        #c.kernel = GPy.kern.RBF(input_dim=1, variance=0.1, lengthscale=0.15)
        return c

    def get_bayes_config(self, trial_num):
        c = fbo.BayesSelectConfig(self.domain_bounds)
        c.surrogate_model_params = dict(
            kernel = GPy.kern.RBF(input_dim=2, ARD=False)
        )
        c.surrogate_optimise_params = dict(
            num_restarts = 4,
            parallel = True,
            verbose = True
        )
        c.tracking_l = 0.4
        return c
    
coordinator = Coordinator(domain_bounds, 20, 25)

In [None]:
np.random.seed(0)
op = fbo.Optimiser(objective, domain_bounds, range_bounds, desired_extremum='max', coordinator=coordinator)
op.run()
plot_convergence(op, best_R_g=sim.h)
plot_trials(op, op.trials, color_by_reward=True)
fig = plot_surrogate_with_trials(op, -1)
plot_walls(fig.axes[0])

inc_i, inc = op.get_incumbent()
print('incumbent = trial {}'.format(inc_i))
plot_trials(op, [inc], color_by_reward=True)
#plot_trial_area(op, inc, to_fit)


In [None]:
plot_trials(op, op.trials, color_by_reward=True)

In [None]:
fig = plot_surrogate_with_trials(op, -1)
plot_walls(fig.axes[0])

In [None]:
plot_surrogate_3D(op, op.trials[-1].surrogate)