In [1]:
%cd /home/q123/Desktop/explo

### local imports 
from src.environment import EnvironmentObjective
from src.optim import step
from src.policy import MLP

### botorch
from botorch.fit import fit_gpytorch_model
from botorch.models import SingleTaskGP
from gpytorch.mlls import ExactMarginalLogLikelihood

from botorch.acquisition import ExpectedImprovement
from botorch.optim import optimize_acqf

### general imports
import numpy as np
import gpytorch
import torch
import gym

### Logging 
import logging
logger = logging.getLogger('__main__')
logger.setLevel(logging.CRITICAL)

/home/q123/Desktop/explo


  from .autonotebook import tqdm as notebook_tqdm


# Imports and kernels


In [2]:
### Toy kernel for warningging

class MyKernel(gpytorch.kernels.RBFKernel):
   
    def forward(self,x1,x2,**params):
        
        logger.warning(f'x1 {x1.shape} / x2 {x2.shape}')
        kernel = super().forward(x1,x2,**params)
        logger.warning(f'pair kernel {kernel.shape}')
        return kernel

In [3]:
from gpytorch.kernels import *
from gpytorch.priors.torch_priors import GammaPrior

class GridKernel(Kernel):
    
    def __init__(self,
                 mlp,states):
        
        super().__init__()
        
        tester = PolicyTester(mlp,states)
        rbf_module = ScaleKernel(RBFKernel(ard_num_dims=states.shape[0]))

        # save params to instance
        self.__dict__.update(locals())
        
        
    def forward(self,x1,x2,**params):
        
        logger.warning(f'x1 {x1.shape} / x2 {x2.shape}')
        
        #Evaluate current parameters
        actions1 = self.tester(x1)
        actions2 = self.tester(x2)
        
        logger.warning(f'actions1 {actions1.shape} actions2 {actions2.shape} ')
        
        # Compute pairwise pairwise kernel 
        
        kernel = self.rbf_module(actions1, actions2, **params)
        logger.warning(f'pair kernel {kernel.shape}')
        
        return kernel
        
        
class PolicyTester():
    
    def __init__(self,mlp,states):
        
        self.__dict__.update(locals())
    

    def __call__(self,params_batch):
        
        actions = self.mlp(params_batch,self.states)
        first_dims = params_batch.shape[:-1]
        last_dim = actions.shape[-1]
        return actions.reshape(*first_dims,last_dim)
                             


In [4]:
from gpytorch.models import ExactGP
from botorch.models.gpytorch import GPyTorchModel
# We will use the simplest form of GP model, exact inference

class GridGPModel(ExactGP,GPyTorchModel):
    
    _num_outputs = 1
    
    def __init__(self, train_x, train_y, likelihood,
                 mlp,states):
        
        ExactGP.__init__(self, train_x, train_y, likelihood)
        #self.covar_module = GridKernel(mlp,states)
        self.covar_module = MyKernel()
        self.mean_module = gpytorch.means.ConstantMean()

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [5]:
### initialize policy
mlp = MLP([3,1],add_bias=True)

# Initialize environment

objective_env = EnvironmentObjective(
  env=gym.make("Pendulum-v1"),
  mlp=mlp,
  manipulate_state=None,
  manipulate_reward=None,
)

### initialize train_x, train_y
train_x = torch.rand(100,mlp.len_params) ## [n_trials,n_params]
train_y = [objective_env.run(p) for p in train_x]
train_y = torch.Tensor(train_y).reshape(-1)  ## [n_trials,1]

# initialize likelihood and model

states = objective_env.get_grid()
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = GridGPModel(train_x, train_y, likelihood,
                    mlp,states)

observation box low : [-1. -1. -8.]/ high [1. 1. 8.]


In [6]:
### now we loop :
max_iter = 1000

for i in range(max_iter):

  step(model,objective_env)

  if i % 100 == 0:

    best_val = model.train_targets.max()
    curr_val = model.train_targets[-1]
    print(f'curr {curr_val} max {best_val}')
    print(f'model.train_inputs.shape{model.train_inputs[0].shape}')

curr -1363.0400390625 max -1175.70458984375
model.train_inputs.shapetorch.Size([101, 4])
curr -1508.8712158203125 max -1060.53076171875
model.train_inputs.shapetorch.Size([201, 4])




curr -1567.2501220703125 max -1060.53076171875
model.train_inputs.shapetorch.Size([301, 4])
curr -1632.0350341796875 max -1060.53076171875
model.train_inputs.shapetorch.Size([401, 4])
curr -1510.19091796875 max -1060.53076171875
model.train_inputs.shapetorch.Size([501, 4])




curr -1601.305419921875 max -675.6351318359375
model.train_inputs.shapetorch.Size([601, 4])




curr -1447.5928955078125 max -675.6351318359375
model.train_inputs.shapetorch.Size([701, 4])
curr -1396.5802001953125 max -675.6351318359375
model.train_inputs.shapetorch.Size([801, 4])
curr -1671.491943359375 max -675.6351318359375
model.train_inputs.shapetorch.Size([901, 4])
curr -1176.1405029296875 max -675.6351318359375
model.train_inputs.shapetorch.Size([1001, 4])


# Manually fitting GP (maximizing likelihood)

In [None]:
# training_iter = 100 

# # Find optimal model hyperparameters
# model.train()
# likelihood.train()

# # Use the adam optimizer
# optimizer = torch.optim.SGD(model.parameters(), lr=0.25)  # Includes GaussianLikelihood parameters

# # "Loss" for GPs - the marginal log likelihood
# mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

# for i in range(3):
#     # Zero gradients from previous iteration
#     optimizer.zero_grad()
#     # Output from model
#     output = model(train_x)
#     # Calc loss and backprop gradients
#     loss = -mll(output, train_y)
#     logger.warning(f'Loss {loss.shape}')
#     loss.backward()
#     print('Iter %d/%d - Loss: %.3f noise: %.3f' % 
#         (
#         i + 1, training_iter, loss.item(),
#         model.likelihood.noise.item())
#         )
#     optimizer.step()

In [None]:
# max_iter = 1
# for i in range(max_iter):
      
    
#   ### fit hypers of GP
#   mll = ExactMarginalLogLikelihood(model.likelihood, model)
#   fit_gpytorch_model(mll)
  
#   # logger.setLevel(logging.WARNING)
#   # logger.warning("Optimized hypers")

#   ### optimize acqf
  
#   best_value = model.train_targets.max()
#   len_params = objective_env.policy.len_params
#   EI = ExpectedImprovement(model=model, best_f=best_value)
  
#   new_x, _ = optimize_acqf(
#     acq_function=EI,
#     bounds=torch.tensor([[0.0] * len_params, [1.0] * len_params]),
#     q=1,
#     num_restarts=3,
#     raw_samples=5,
#     options={},
#   )
  
#   logger.setLevel(logging.WARNING)
#   logger.warning("Acquisition function finished")

#   new_y = objective_env(new_x)

#   ### Update training points.
#   train_x = torch.cat([model.train_inputs[0], new_x])
#   train_y = torch.cat([model.train_targets, new_y])
#   model.set_train_data(inputs=train_x, targets=train_y, strict=False)

#   if i % 10 == 0:

#     best_val = model.train_targets.max()
#     curr_val = model.train_targets[-1]
#     print(f'curr {curr_val} max {best_val}')