In [1]:
%cd /home/q123/Desktop/explo

### local imports 
from src.environment import EnvironmentObjective
from src.vanillagp import step
from src.policy import MLP

### botorch
from botorch.fit import fit_gpytorch_model
from botorch.models import SingleTaskGP
from gpytorch.mlls import ExactMarginalLogLikelihood

from botorch.acquisition import ExpectedImprovement
from botorch.optim import optimize_acqf

### general imports
import numpy as np
import gpytorch
import torch
import gym

### Logging 
import logging
logger = logging.getLogger('__main__')
logger.setLevel(logging.CRITICAL)

/home/q123/Desktop/explo


  from .autonotebook import tqdm as notebook_tqdm


# Imports and kernels


In [2]:
### Toy kernel for warningging

class MyKernel(gpytorch.kernels.RBFKernel):
   
    def forward(self,x1,x2,**params):
        
        logger.warning(f'x1 {x1.shape} / x2 {x2.shape}')
        kernel = super().forward(x1,x2,**params)
        logger.warning(f'pair kernel {kernel.shape}')
        return kernel

In [3]:
class GridKernel(gpytorch.kernels.Kernel):
    
    def __init__(self,mlp,actions_metric,
                 states,states_w=None):
        
        super().__init__()
        
        if states_w is None:
            states_w = torch.ones(states.size(0))
            
        rbf_module =  gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

        ## save params to instance
        self.__dict__.update(locals())
    
    def compute_actions(self,states,params_batch):
        
        
        logger.warning(f'params_batch {params_batch.shape}')
        
        rslt = [self.mlp(states,p).squeeze() 
                for p in params_batch.flatten(end_dim=-2)
                #for p in params_batch.squeeze()
                ]
        
        
        #### WARNING THIS MIGHT BE A SOURCE OF ERROR
        first_dims = params_batch.size()[:-1]
        last_dim = rslt[0].size(-1)
        rslt = torch.stack(rslt).reshape(*first_dims,last_dim) ## hotfix
        ###############################################""""
        
        return rslt
            

    def forward(self,x1,x2,**params):
        
        states,states_w = self.states,self.states_w
        
        logger.warning(f'x1 {x1.shape} / x2 {x2.shape}')
        #Evaluate current parameters
        actions1 = self.compute_actions(states,x1)
        actions2 = self.compute_actions(states,x2)
        logger.warning(f'actions1 {actions1.shape} actions2 {actions2.shape} ')
        
        # Compute pairwise pairwise kernel 
        kernel = self.rbf_module(actions1, actions2, **params)
        logger.warning(f'pair kernel {kernel.shape}')
        
        return kernel

In [4]:
from gpytorch.models import ExactGP
from botorch.models.gpytorch import GPyTorchModel
# We will use the simplest form of GP model, exact inference

class GridGPModel(ExactGP,GPyTorchModel):
    
    _num_outputs = 1
    
    def __init__(self, train_x, train_y, likelihood,
                 mlp,actions_metric,states):
        
        ExactGP.__init__(self, train_x, train_y, likelihood)
        self.covar_module = GridKernel(mlp,actions_metric,states)
        #self.covar_module = MyKernel()
        self.mean_module = gpytorch.means.ConstantMean()

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [9]:
### initialize policy
mlp = MLP(*[3,1])
mlp.requires_grad = True

# Initialize environment

objective_env = EnvironmentObjective(
  env=gym.make("Pendulum-v1"),
  policy=mlp,
  manipulate_state=None,
  manipulate_reward=None,
)

### initialize train_x, train_y
train_x = torch.rand(100,mlp.len_params) ## [n_trials,n_params]
train_y = [objective_env.run(p) for p in train_x]
train_y = torch.Tensor(train_y).reshape(-1)  ## [n_trials,1]

# initialize likelihood and model

states = objective_env.get_grid()
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = GridGPModel(train_x, train_y, likelihood,
                    mlp,torch.linalg.norm,states)

AttributeError: 'MLP' object has no attribute 'set_grad_enabled'

In [10]:
mlp.grad_fn

AttributeError: 'MLP' object has no attribute 'grad_fn'

In [6]:
### fit hypers of GP
mll = ExactMarginalLogLikelihood(model.likelihood, model)
fit_gpytorch_model(mll)

logger.setLevel(logging.WARNING)
logger.warning("Optimized hypers")

### optimize acqf

best_value = model.train_targets.max()
len_params = objective_env.policy.len_params
EI = ExpectedImprovement(model=model, best_f=best_value)

new_x, _ = optimize_acqf(
  acq_function=EI,
  bounds=torch.tensor([[0.0] * len_params, [1.0] * len_params]),
  q=1,
  num_restarts=3,
  raw_samples=5,
  options={},
)


new_y = objective_env(new_x)

### Update training points.
train_x = torch.cat([model.train_inputs[0], new_x])
train_y = torch.cat([model.train_targets, new_y])
model.set_train_data(inputs=train_x, targets=train_y, strict=False)


Optimized hypers
x1 torch.Size([5, 1, 3]) / x2 torch.Size([5, 101, 3])
params_batch torch.Size([5, 1, 3])
params_batch torch.Size([5, 101, 3])
actions1 torch.Size([5, 1, 1000]) actions2 torch.Size([5, 101, 1000]) 
pair kernel torch.Size([5, 1, 101])
x1 torch.Size([100, 3]) / x2 torch.Size([100, 3])
params_batch torch.Size([100, 3])
params_batch torch.Size([100, 3])
actions1 torch.Size([100, 1000]) actions2 torch.Size([100, 1000]) 
pair kernel torch.Size([100, 100])
x1 torch.Size([3, 1, 3]) / x2 torch.Size([3, 101, 3])
params_batch torch.Size([3, 1, 3])
params_batch torch.Size([3, 101, 3])
actions1 torch.Size([3, 1, 1000]) actions2 torch.Size([3, 101, 1000]) 
pair kernel torch.Size([3, 1, 101])


RuntimeError: One of the differentiated Tensors appears to not have been used in the graph. Set allow_unused=True if this is the desired behavior.

In [8]:
for param in model.parameters():
    print(param)

Parameter containing:
tensor([42849.5586], requires_grad=True)
Parameter containing:
tensor([-1531.3634], requires_grad=True)


# Manually fitting GP (maximizing likelihood)

In [None]:
# training_iter = 100 

# # Find optimal model hyperparameters
# model.train()
# likelihood.train()

# # Use the adam optimizer
# optimizer = torch.optim.SGD(model.parameters(), lr=0.25)  # Includes GaussianLikelihood parameters

# # "Loss" for GPs - the marginal log likelihood
# mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

# for i in range(3):
#     # Zero gradients from previous iteration
#     optimizer.zero_grad()
#     # Output from model
#     output = model(train_x)
#     # Calc loss and backprop gradients
#     loss = -mll(output, train_y)
#     logger.warning(f'Loss {loss.shape}')
#     loss.backward()
#     print('Iter %d/%d - Loss: %.3f noise: %.3f' % 
#         (
#         i + 1, training_iter, loss.item(),
#         model.likelihood.noise.item())
#         )
#     optimizer.step()

In [None]:
max_iter = 1
for i in range(max_iter):
      
    
  ### fit hypers of GP
  mll = ExactMarginalLogLikelihood(model.likelihood, model)
  fit_gpytorch_model(mll)
  
  # logger.setLevel(logging.WARNING)
  # logger.warning("Optimized hypers")

  ### optimize acqf
  
  best_value = model.train_targets.max()
  len_params = objective_env.policy.len_params
  EI = ExpectedImprovement(model=model, best_f=best_value)
  
  new_x, _ = optimize_acqf(
    acq_function=EI,
    bounds=torch.tensor([[0.0] * len_params, [1.0] * len_params]),
    q=1,
    num_restarts=3,
    raw_samples=5,
    options={},
  )
  
  logger.setLevel(logging.WARNING)
  logger.warning("Acquisition function finished")

  new_y = objective_env(new_x)

  ### Update training points.
  train_x = torch.cat([model.train_inputs[0], new_x])
  train_y = torch.cat([model.train_targets, new_y])
  model.set_train_data(inputs=train_x, targets=train_y, strict=False)

  if i % 10 == 0:

    best_val = model.train_targets.max()
    curr_val = model.train_targets[-1]
    print(f'curr {curr_val} max {best_val}')

x1 torch.Size([101, 3]) / x2 torch.Size([101, 3])
pair kernel torch.Size([101, 101])
x1 torch.Size([101, 3]) / x2 torch.Size([101, 3])
pair kernel torch.Size([101, 101])
x1 torch.Size([5, 1, 3]) / x2 torch.Size([5, 102, 3])
pair kernel torch.Size([5, 1, 102])
x1 torch.Size([101, 3]) / x2 torch.Size([101, 3])
pair kernel torch.Size([101, 101])
x1 torch.Size([3, 1, 3]) / x2 torch.Size([3, 102, 3])
pair kernel torch.Size([3, 1, 102])
x1 torch.Size([3, 1, 3]) / x2 torch.Size([3, 102, 3])
pair kernel torch.Size([3, 1, 102])
Acquisition function finished


curr -1590.959228515625 max -768.005859375
