In [7]:
%cd /home/q123/Desktop/explo

### local imports 
from src.environment import EnvironmentObjective
from src.optim import step
from src.policy import MLP

### botorch
from botorch.fit import fit_gpytorch_model
from botorch.models import SingleTaskGP
from botorch.acquisition import ExpectedImprovement
from botorch.optim import optimize_acqf
from botorch.models.gpytorch import GPyTorchModel

### gpytorch 
from gpytorch.mlls import ExactMarginalLogLikelihood
from gpytorch.kernels import RBFKernel,ScaleKernel,Kernel
from gpytorch.models import ExactGP

### general imports
import numpy as np
import gpytorch
import torch
import gym

### Logging 
import logging
logger = logging.getLogger('__main__')
logger.setLevel(logging.INFO)

/home/q123/Desktop/explo


# Imports and kernels


In [8]:
### Toy kernel for warningging

class MyKernel(gpytorch.kernels.RBFKernel):
   
    def forward(self,x1,x2,**params):
        
        logger.warning(f'x1 {x1.shape} / x2 {x2.shape}')
        kernel = super().forward(x1,x2,**params)
        logger.warning(f'pair kernel {kernel.shape}')
        return kernel

In [9]:
class MyGP(ExactGP,GPyTorchModel):
    
    
    def __init__(self, train_x, train_y,train_s, likelihood,
                 kernel=None,mlp=None):
        
        ExactGP.__init__(self,train_x, train_y, likelihood)
        
        self.mean_module = gpytorch.means.ConstantMean()
        
        if kernel is None:
            self.covar_module = MyKernel()
        else :
            self.covar_module = kernel(mlp,train_s)
            
        ### necessary attribute for gpytorch to function
        #self.num_outputs = 1
    
    def set_train_data(train_x, train_y,train_s,strict=False):
        
        super().set_train_data(self,inputs=train_x,targets=train_y,strict=strict)
        ### update state kernels with new states
        if isinstance(self.kernel,StateKernel):
            self.kernel.update(train_s)
        
    
    def forward(self, x):
        
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [10]:
class StateKernel(gpytorch.kernels.Kernel):
    
    """Abstract class for a kernel that uses state action pairs metric
    """
    
    def __init__(self,mlp,train_s):
        
        super().__init__()
        
        self.update_states(train_s)
        self.mlp = mlp
        self.rbf_module = ScaleKernel(RBFKernel(ard_num_dims=train_s.shape[0]))
        

    def test_policy(self,params_batch,states):
        
        actions = self.mlp(params_batch,states)
        first_dims = params_batch.shape[:-1]
        last_dim = actions.shape[-1]
        return actions.reshape(*first_dims,last_dim)
        
        
    def forward(self,x1,x2,**params):
        
        logger.warning(f'x1 {x1.shape} / x2 {x2.shape}')
        
        #Evaluate current parameters
        actions1 = self.test_policy(x1,self.states)
        actions2 = self.test_policy(x2,self.states)
        logger.warning(f'actions1 {actions1.shape} actions2 {actions2.shape} ')
        # Compute pairwise pairwise kernel 
        kernel = self.rbf_module(actions1, actions2, **params)
        logger.warning(f'pair kernel {kernel.shape}')
        
        return kernel 
    
    def update(self,new_s):
        
        raise NotImplementedError
    
        
class GridKernel(StateKernel):
    
    
    def get_grid(self,low,high,samples_per_dim):
        
        
        state_dims = low.shape[0]
        points = [torch.linspace(low[i],high[i],samples_per_dim) 
                    for i in range(state_dims)]
        grid = torch.meshgrid(*points)
        grid = torch.stack(grid)
        grid = torch.flatten(grid,start_dim=1).T ## [n_states,state_dim]
        
        return grid
    
    def update_states(self,new_s):
        
        self.high= torch.amax(new_s,dim=(0,1))
        self.low= torch.amin(new_s,dim=(0,1))
        print(f'observation box : \n low {self.low} \n high :{self.high}')
        self.states = self.get_grid(self.low,self.high,
                                    samples_per_dim=10)
    
    def update(self,new_s):
        
        
        tmp_buff = torch.cat([self.states, new_s])
        print(f'BUffer shape {tmp_buff.shape}')
        high= torch.amax(tmp_buff,dim=(0,1))
        low= torch.amin(tmp_buff,dim=(0,1))
        
        ### update only if be
        if any(high>self.high) or any(low<self.low):
            self.update_states(tmp_buff)

# Experiment 

In [11]:
### initialize policy

mlp = MLP([3,1],add_bias=True) ## pendulum
#mlp = MLP([8,2],add_bias=True)

# Initialize environment

objective_env = EnvironmentObjective(
  #env=gym.make("MountainCarContinuous-v0"),
  env=gym.make("Pendulum-v1"),
  #env=gym.make("Swimmer-v3"),
  mlp=mlp,
  manipulate_state=None,
  manipulate_reward=None,
)

### initialize train_x, train_y
train_x = torch.rand(50,mlp.len_params) ## [n_trials,n_params]
train_data = [objective_env.run(p) for p in train_x]
train_y = torch.Tensor([d[0] for d in train_data]).reshape(-1)  ## [n_trials,1]
train_s = torch.stack( [d[1] for d in train_data])  ## [n_trials,max_len,state_dim]


# initialize likelihood and model
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = MyGP(train_x, train_y,train_s,likelihood,
                    kernel=GridKernel,mlp=mlp)

observation box : 
 low tensor([-0.9998, -0.9996, -5.0630]) 
 high :tensor([1.0000, 0.9998, 8.0000])


In [12]:
### now we loop :
max_iter = 10

for i in range(max_iter):

  step(model,objective_env)

  if i % 1 == 0 and i> 100:

    max = model.train_targets.max()
    batch_mean = model.train_targets[i-100:i].mean()
    batch_max = model.train_targets[i-100:i].max()
    curr = model.train_targets[-1]
    print(f'current {curr} / max {max} /batch_mean {batch_mean} /batch_max {batch_max} ')
    #print(f'model.train_inputs.shape{model.train_inputs[0].shape}')

x1 torch.Size([50, 4]) / x2 torch.Size([50, 4])
actions1 torch.Size([50, 1000]) actions2 torch.Size([50, 1000]) 


RuntimeError: The size of tensor a (1000) must match the size of tensor b (50) at non-singleton dimension 1

# Manually fitting GP (maximizing likelihood)

In [None]:
# training_iter = 100 

# # Find optimal model hyperparameters
# model.train()
# likelihood.train()

# # Use the adam optimizer
# optimizer = torch.optim.SGD(model.parameters(), lr=0.25)  # Includes GaussianLikelihood parameters

# # "Loss" for GPs - the marginal log likelihood
# mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

# for i in range(3):
#     # Zero gradients from previous iteration
#     optimizer.zero_grad()
#     # Output from model
#     output = model(train_x)
#     # Calc loss and backprop gradients
#     loss = -mll(output, train_y)
#     logger.warning(f'Loss {loss.shape}')
#     loss.backward()
#     print('Iter %d/%d - Loss: %.3f noise: %.3f' % 
#         (
#         i + 1, training_iter, loss.item(),
#         model.likelihood.noise.item())
#         )
#     optimizer.step()