In [1]:
%cd /home/q123/Desktop/explo

### local imports 
from src.environment import EnvironmentObjective
from src.vanillagp import step
from src.policy import MLP

### botorch
from botorch.fit import fit_gpytorch_model
from botorch.models import SingleTaskGP
from gpytorch.mlls import ExactMarginalLogLikelihood

from botorch.acquisition import ExpectedImprovement
from botorch.optim import optimize_acqf

### general imports
import numpy as np
import gpytorch
import torch
import gym

### Logging 
import logging
logger = logging.getLogger('output shapes')
logger.setLevel(logging.DEBUG)

/home/q123/Desktop/explo


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
### Toy kernel for debugging

class MyKernel(gpytorch.kernels.RBFKernel):
   
    def forward(self,x1,x2,**params):
        
        logger.debug(f'x1 {x1.shape}')
        kernel = super().forward(x1,x2,**params)
        logger.debug(f'pair kernel {kernel.shape}')
        return kernel

In [3]:
class GridKernel(gpytorch.kernels.Kernel):
    
    def __init__(self,mlp,actions_metric,
                 states,states_w=None):
        
        super().__init__()
        
        if states_w is None:
            states_w = torch.ones(states.size(0))
            
        rbf_module =  gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

        ## save params to instance
        self.__dict__.update(locals())
    
    def compute_actions(self,states,params_batch):
        
        rslt = [self.mlp(states,p).squeeze() 
                for p in params_batch.squeeze()]
        
        
        #### WARNING THIS MIGHT BE A SOURCE OF ERROR
        first_dims = params_batch.size()[:-1]
        last_dim = rslt[0].size(-1)
        rslt = torch.stack(rslt).reshape(*first_dims,last_dim) ## hotfix
        ###############################################""""
        
        return rslt
            

    def forward(self,x1,x2,**params):
        
        logger.debug(f'x1 {x1.shape}')
        states,states_w = self.states,self.states_w
    
        #Evaluate current parameters
        actions1 = self.compute_actions(states,x1).squeeze()
        actions2 = self.compute_actions(states,x2).squeeze()
        logger.debug(f'actions size {actions1.shape} ')
        
        
        # Compute pairwise pairwise kernel 
        #distances = self.covar_dist(actions1, actions2, **params)
        kernel = self.rbf_module(actions1, actions2, **params)
        logger.debug(f'pair kernel {kernel.shape}')
        
        return kernel

In [10]:
# We will use the simplest form of GP model, exact inference
class GridGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood,
                 mlp,actions_metric,states):
        
        
        super(GridGPModel, self).__init__(train_x, train_y, likelihood)
        self.covar_module = GridKernel(mlp,actions_metric,states)
        #self.covar_module = MyKernel()
        self.mean_module = gpytorch.means.ConstantMean()

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [13]:
### initialize policy
mlp = MLP(*[3,1])
mlp.requires_grad = False

# Initialize environment

objective_env = EnvironmentObjective(
  env=gym.make("Pendulum-v1"),
  policy=mlp,
  manipulate_state=None,
  manipulate_reward=None,
)

### initialize train_x, train_y
train_x = torch.rand(100,mlp.len_params) ## [n_trials,n_params]
train_y = [objective_env.run(p) for p in train_x]
train_y = torch.Tensor(train_y).reshape(-1)  ## [n_trials,1]

# initialize likelihood and model

states = objective_env.get_grid()
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = GridGPModel(train_x, train_y, likelihood,
                    mlp,torch.linalg.norm,states)

# Further test

In [14]:
training_iter = 100 

# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.25)  # Includes GaussianLikelihood parameters

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

for i in range(100):
    # Zero gradients from previous iteration
    optimizer.zero_grad()
    # Output from model
    output = model(train_x)
    # Calc loss and backprop gradients
    loss = -mll(output, train_y)
    logger.debug(f'Loss {loss.shape}')
    loss.backward()
    print('Iter %d/%d - Loss: %.3f noise: %.3f' % 
        (
        i + 1, training_iter, loss.item(),
        model.likelihood.noise.item())
        )
    optimizer.step()

Iter 1/100 - Loss: 844349.688 noise: 0.693
Iter 2/100 - Loss: 16.964 noise: 76119.500
Iter 3/100 - Loss: 16.964 noise: 76119.500
Iter 4/100 - Loss: 16.964 noise: 76119.500
Iter 5/100 - Loss: 16.964 noise: 76119.500
Iter 6/100 - Loss: 16.964 noise: 76119.500
Iter 7/100 - Loss: 16.964 noise: 76119.500
Iter 8/100 - Loss: 16.964 noise: 76119.500
Iter 9/100 - Loss: 16.964 noise: 76119.500
Iter 10/100 - Loss: 16.964 noise: 76119.500
Iter 11/100 - Loss: 16.964 noise: 76119.500
Iter 12/100 - Loss: 16.964 noise: 76119.500
Iter 13/100 - Loss: 16.964 noise: 76119.500
Iter 14/100 - Loss: 16.963 noise: 76119.500
Iter 15/100 - Loss: 16.963 noise: 76119.500
Iter 16/100 - Loss: 16.963 noise: 76119.500
Iter 17/100 - Loss: 16.963 noise: 76119.500
Iter 18/100 - Loss: 16.963 noise: 76119.500
Iter 19/100 - Loss: 16.963 noise: 76119.500
Iter 20/100 - Loss: 16.963 noise: 76119.500
Iter 21/100 - Loss: 16.963 noise: 76119.500
Iter 22/100 - Loss: 16.963 noise: 76119.500
Iter 23/100 - Loss: 16.963 noise: 76119.5

In [7]:
# states_grid = objective_env.get_grid()
# kernel = GridKernel(mlp,torch.linalg.norm,states_grid)
# kernel.forward(torch.rand(3),torch.rand(3))