# Experiments to show the effect of querying a GP one point at a time or all together

In [None]:
# to automatically reload modules who's content has changed
%load_ext autoreload
%autoreload 2

# configure matplotlib
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'

In [None]:
from task_utils import *

In [None]:
import funbo as fb
import funbo.plotting as fp
import distributed_gp as dgp

In [None]:
class ExampleFunction1D:
    def __init__(self, noise, num_samples, x_range, exclude_ranges, predict_num):
        self.noise = noise

        np.random.seed(0)
        self.xmin, self.xmax = x_range
        X = np.random.uniform(self.xmin, self.xmax, size=(num_samples, 1))
        if exclude_ranges is not None:
            cond = np.logical_not(np.logical_or.reduce([np.logical_and(a < X, X < b) for a, b in exclude_ranges]))
            X = X[np.where(cond)].reshape(-1, 1)
        self.X = X
        self.y = self.__call__(X)
        self.xs = np.linspace(self.xmin, self.xmax, num=predict_num)
        self.ys = self.__call__(self.xs, apply_noise=False)

    def __call__(self, x, apply_noise=True):
        v = np.sin(x*2) * 0.2*x**2 + 4*np.cos(x)
        if apply_noise and self.noise != 0:
            v += np.random.normal(loc=0, scale=self.noise, size=np.asarray(x).shape)
        return v

    def plot(self, ax=None, show_data_points=True):
        if ax is None:
            fig, ax = plt.subplots(figsize=(20, 8))
        ax.plot(self.xs, self.ys, '--', color='grey')
        if show_data_points:
            ax.scatter(self.X, self.y, marker='.', color='blue', alpha=0.2, zorder=5)

In [None]:
f = ExampleFunction1D(noise=2, num_samples=1000, x_range=(0, 10), exclude_ranges=None, predict_num=100)
f.plot(show_data_points=True)

In [None]:
def test_query(model, f, n, batch_size, repeats=5, gradient=False):
    np.random.seed(0)
    X = np.random.uniform(f.xmin, f.xmax, size=(n, 1))
    xs = np.array_split(X, n//batch_size)
    t = fb.utils.Timer()
    for i in range(repeats):
        for x in xs:
            if gradient:
                model.predict_gradients(x)
            else:
                model.predict(x)
        print('{}/{}'.format(i+1, repeats))
    return t.stop()/repeats

In [None]:
def plot_results(sizes, times, n):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))

    fig.suptitle('The effect of batch size when querying at {} points'.format(n))
    
    ax1.bar(np.arange(len(times)), np.array(times)/n*1000, tick_label=[str(s) for s in sizes])
    ax1.set_ylabel('computation time per point (ms)')
    ax2.set_xlabel('batch size')

    ax2.plot(sizes, np.array(times)/n*1000, 'o-')
    ax2.set_ylabel('computation time per point (ms)')
    ax2.set_xlabel('batch size')
    fig.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()

In [None]:
model = fb.GPySurrogate(
    init_params=dict(
        kernel=GPy.kern.RBF(input_dim=1, ARD=False),
        normalizer=True
    ), optimise_params=dict(
        num_restarts=1
))
t = fb.utils.Timer()
np.random.seed(0)
model.fit(f.X, f.y, initial_hyper_params=None)
print('fit in {}'.format(t.stop()))

In [None]:
n = 10_000
sizes = (1, 2, 10, 100, 200, 500, 1000, 4000, 8000, 10_000)
#sizes = (1, 10_000)
times = []
for batch_size in sizes:
    time = test_query(model, f, n, batch_size, gradient=False)
    print('batch size {}: {:.3f} seconds'.format(batch_size, time))
    times.append(time)

In [None]:
plot_results(sizes, times, n)

In [None]:
worst, best = np.argmax(times), np.argmin(times)
print('sampling in batches of {} is {} times faster than batches of {}'.format(sizes[best], times[worst]/times[best], sizes[worst]))

In [None]:
n2 = 10_000
f_time = test_query(model, f, n2, n2, gradient=False)
grad_time = test_query(model, f, n2, n2, gradient=True)
print('computing the function is {}x faster than computing the gradient'.format(grad_time/f_time))

In [None]:
def _():
    from scipy.optimize import approx_fprime
    
    def derivative(func):
        def df(X):
            dfs = []
            f = lambda x: np.asscalar(func(np.array([x])))
            for x in X:
                dfs.append(approx_fprime(x, f, epsilon=np.sqrt(np.finfo(float).eps)))
            return np.array(dfs).reshape(-1, 1)
        return df
    
    eval_count = [0] # must be mutable, so use list
    def f(x):
        print(x)
        eval_count[0] += x.shape[0]
        return np.array(1)
    df = derivative(f)
    df(np.array([[0,1,2,5,6]]))
    print(eval_count)
_()

to approximate the gradient at x in D dimensions requires D+1 evaluations, one at x and D more at x + pertubation in each dimension

# More Realistic Scenario (using real optimisation data set)

In [None]:
domain_bounds = [('x1', 0, 6), ('x2', 0, 6)]
range_bounds = (-1, 2.5)

In [None]:
noisy=False
def to_fit(X):
    ''' from https://github.com/fmfn/BayesianOptimization/issues/18 '''
    x, y = X[:,0], X[:,1]
    a = np.exp(-( (x - 2)**2/0.7 + (y - 4)**2/1.2) + (x - 2)*(y - 4)/1.6 )
    b = np.exp(-( (x - 4)**2/3 + (y - 2)**2/2.) )
    c = np.exp(-( (x - 4)**2/0.5 + (y - 4)**2/0.5) + (x - 4)*(y - 4)/0.5 )
    d = np.sin(3.1415 * x)
    e = np.exp(-( (x - 5.5)**2/0.5 + (y - 5.5)**2/.5) )
    val = 2*a + b - c + 0.17 * d + 2*e
    if noisy:
        val += np.random.normal(0, 0.2, size=None if isinstance(x, float) else x.shape)
    #return val.reshape(-1, 1)
    return val

In [None]:
def make_objective(to_fit, sample_num):
    def objective(f):
        bounds = [b[1:] for b in domain_bounds]
        R_g = 0.0
        def g(x):
            return (f(x) - to_fit(x))**2
        # local rewards
        R_ls = []
        print('calculating the local rewards')
        for x in fb.utils.RegularGrid(sample_num, bounds):
            R_l = g(x)
            R_ls.append((x, R_l))
            R_g += R_l
        return R_ls, R_g
    return objective

In [None]:
class Coordinator(fb.Coordinator):
    def get_pre_phase_config(self, trial_num):
        #c = fb.GPPriorSelectConfig(self.optimiser)
        c = fb.RandomCPSelectConfig(self.optimiser)
        return c

    def get_bayes_config(self, trial_num):
        c = fb.BayesSelectConfig(self.optimiser)
        return c

In [None]:
np.random.seed(0)
objective = make_objective(to_fit, sample_num=15)
opt = fb.Optimiser(objective, domain_bounds, range_bounds, desired_extremum='min')
opt.run(Coordinator(5, 5))

In [None]:
X, y = opt.get_training_data(fb.BayesSelectConfig(opt))

In [None]:
#TODO: do same timing tests using the dataset gathered above