# This notebook illustrates the Gaussian Process Regression, in 2D search region.

In [1]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor as GPR
from matplotlib import pyplot as plt
from matplotlib.animation import FuncAnimation
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from jax import numpy as jnp
from jax import jit, grad,vmap
from InfoGrad import InfoGrad
from time import time
import pickle as pkl

%load_ext autoreload
%autoreload 2

In [2]:
# The underlying function
def f(P):
    X = P[:,0]
    Y = P[:,1]
    return -(np.exp(-2*(X+1.5)**2 - 0.3*(Y+1.5)**2-0.3*X*Y+0.4)+np.exp(-(X-0.5)**2 - (Y-1.2)**2-0.6*X*Y)+np.exp(-0.7*(X-1.5)**2 - 0.3*(Y+1.5)**2-0.*X*Y))

# The differentiable kernel function with parameters c,l not filled.
def k(x1,x2,c,l):
    small_sig = 1e-10 # This is needed for numerical stability.
    return c * jnp.exp(-(jnp.linalg.norm(x1-x2+small_sig,axis = -1)**2) / l)

def f_sampler(X):
    Y = f(X)
    return Y+np.random.randn(*Y.shape)*0.1

def random_sample_locs(n_locs,dim_lims):
    '''
        dim_lims = [(u1,l1),(u2,l2),...,(u{space_dim},l{space_dim})]
    '''
    return (np.random.rand(n_locs, len(dim_lims)))*(dim_lims[:,-1]-dim_lims[:,0]) + dim_lims[:,0]
    

In [5]:
space_dim = 2
step_size = 0.2
n_test = 100
D  = 1
dim_lims = np.array([(-D,D),(-D,D)])
var_0  = 0.01
N_iteration = 50
N_trials = 10

xlist = np.linspace(-D, D, 100)
ylist = np.linspace(-D, D, 100)
full_X = np.meshgrid(xlist, ylist)
full_X = np.hstack([full_X[0].reshape(-1,1),full_X[1].reshape(-1,1)])


X_0 = np.array([-1,1,
              0,0,
              1,1,
              1,-1,
              -1,-1]).reshape(-1,space_dim)

Y_0 =  f_sampler(X_0)
random_states = np.random.randint(0,10000,N_trials)
# for mode in ['greedy','InfoGrad','randomWalk']:
for mode in ['InfoGrad','randomWalk']:
    
    # mode = 'InfoGrad'
    # mode = 'randomWalk'
    
    data = {}
    data['mu_hist'] = []
    data['std_hist'] = []
    data['X'] = []
    data['noisy_Y'] = []
    data['n_0'] = []
    data['n_robot'] = []
    for _ in range(N_trials):
        
        X = np.array(X_0)
        noisy_Y = np.array(Y_0)


        kernel = C(1.0, (1e-3, 1e3)) * RBF(100, (1e-6, 1e6))

        model = GPR(kernel,alpha = var_0,n_restarts_optimizer=20,random_state = random_states[_])

#         model = GPR(kernel,alpha = var_0,optimizer= None)

        model.fit(X,noisy_Y)

        curr_x = np.array([0.1,0,-0.1,0,0,-0.1]).reshape(-1,space_dim)

        gradCalc = InfoGrad(k)

        n_0 = len(X)
        

        x_hist = []

        mu_hist = []

        std_hist = []
        


        
        t = time()
        for n in range(N_iteration):
            
            mu,std= model.predict(full_X.reshape(-1,space_dim),return_std=True)
            mu = mu.flatten()

            mu_hist.append(mu)
            std_hist.append(std)
            
            new_x = []
            if mode == 'InfoGrad':
    #             dx=0.1
                
                t_start = time()
                dx = gradCalc.dIdx(model,curr_x,X.reshape(-1,space_dim)).reshape(curr_x.shape)
                print('Gradient calc time',time()-t_start)

                new_x = curr_x + (dx/np.linalg.norm(dx)) * step_size + np.random.randn(*curr_x.shape)*0.1 # Add some stochasticity to shake away from the corners.

                new_x = np.array(new_x)
                new_x[new_x>D] = D
                new_x[new_x<-D] = -D

                # Collect new data
                new_y = f_sampler(new_x)

                # Model update is mandatory, as required by sklearn.GPR

                X = np.vstack([X,new_x])
                noisy_Y = np.hstack([noisy_Y,new_y])
                
                t_start = time()
                model.fit(X,noisy_Y)
                print('model fit time',time()-t_start)
            elif mode == 'greedy':

                for i in range(len(curr_x)):

                    x_test = random_sample_locs(n_test,dim_lims)

                    _,std = model.predict(x_test,return_std = True)

                    x_dest = x_test[np.argmax(std)]

                    x_next = curr_x[i] + step_size * (x_dest-curr_x[i])/np.linalg.norm(x_dest-curr_x[i])

                    x_next[x_next>D] = D
                    x_next[x_next<-D] = -D

                    y_next = f_sampler(x_next.reshape(-1,space_dim)).flatten()

                    new_x.append(x_next)

                    # Model update is mandatory, as required by sklearn.GPR

                    X = np.vstack([X,x_next])
                    noisy_Y = np.hstack([noisy_Y,y_next])

                    model.fit(X,noisy_Y)
            elif mode == 'randomWalk':
    #             new_x = random_sample_locs(1,np.hstack([curr_x+step_size,curr_x-step_size]).T).reshape(curr_x.shape)
                new_x = random_sample_locs(1,np.hstack([curr_x+step_size,curr_x-step_size]).reshape(-1,space_dim)).reshape(curr_x.shape)
                new_x = np.array(new_x)
                new_x[new_x>D] = D
                new_x[new_x<-D] = -D



                 # Collect new data
                new_y = f_sampler(new_x)

                # Model update is mandatory, as required by sklearn.GPR

                X = np.vstack([X,new_x])
                noisy_Y = np.hstack([noisy_Y,new_y])

                model.fit(X,noisy_Y)
            else:
                print('Mode {} does not exist.'.format(mode))
                break
            
            
            

            # Update curr_x

            curr_x = np.array(new_x)

#             x_hist.append(np.array(new_x))

    
            
        
        print(mode,n,'time:',time()-t)
        data['mu_hist'].append(mu_hist)
        data['std_hist'].append(std_hist)
        data['X'].append(X)
        data['noisy_Y'].append(noisy_Y)
        data['n_0'].append(n_0)
        data['n_robot'].append(curr_x)

    with open('{}2D-REP.pkl'.format(mode),'wb') as file:
        pkl.dump(data,file)




Gradient calc time 0.7297458648681641
model fit time 0.3482632637023926
Gradient calc time 0.40169644355773926
model fit time 0.2989614009857178
Gradient calc time 0.3077123165130615
model fit time 0.33503031730651855
Gradient calc time 0.32546019554138184
model fit time 0.38565683364868164
Gradient calc time 0.38106560707092285
model fit time 0.3753962516784668
Gradient calc time 0.4149439334869385
model fit time 0.35867857933044434
Gradient calc time 0.5149393081665039
model fit time 0.2997457981109619
Gradient calc time 0.3747680187225342
model fit time 0.3049602508544922
Gradient calc time 0.4459233283996582
model fit time 0.28008008003234863
Gradient calc time 0.33914613723754883
model fit time 0.3429591655731201
Gradient calc time 0.4314994812011719
model fit time 0.3582456111907959
Gradient calc time 0.46437692642211914
model fit time 0.42131972312927246
Gradient calc time 0.3666725158691406
model fit time 0.4370710849761963
Gradient calc time 0.5462911128997803
model fit time 0

KeyboardInterrupt: 