In [1]:
%cd /home/q123/Desktop/explo

### local imports 
from src.environment import EnvironmentObjective
from src.optim import Optimizer
from src.policy import MLP
from src.gp import MyGP
from src.kernels import *
### botorch
from botorch.fit import fit_gpytorch_model
from botorch.models import SingleTaskGP
from botorch.acquisition import ExpectedImprovement
from botorch.optim import optimize_acqf


### gpytorch 
from gpytorch.mlls import ExactMarginalLogLikelihood
from gpytorch.kernels import MaternKernel,RBFKernel,ScaleKernel,Kernel
from gpytorch.priors.torch_priors import GammaPrior


### general imports
import numpy as np
import gpytorch
import torch
import gym

# ### Logging 
# import logging
# logger = logging.getLogger('__main__')
# logger.setLevel(logging.CRITICAL)

import logging
import logging.config

logging.config.fileConfig('logging.conf')

# create logger
logger = logging.getLogger()

/home/q123/Desktop/explo


  from .autonotebook import tqdm as notebook_tqdm


# Imports and kernels


In [2]:
class GridKernel(StateKernel):
    
    
    def get_grid(self,low,high,samples_per_dim):
        
        
        state_dims = low.shape[0]
        points = [torch.linspace(low[i],high[i],samples_per_dim) 
                    for i in range(state_dims)]
        grid = torch.meshgrid(*points)
        grid = torch.stack(grid)
        grid = torch.flatten(grid,start_dim=1).T ## [n_states,state_dim]
        
        logger.info(f' grid shape {grid.shape}')
        
        return grid
    
    def update_states(self,new_s):
        
        self.high,_= torch.max(new_s,dim=0)
        self.low,_= torch.min(new_s,dim=0)
        states = self.get_grid(self.low,self.high,
                                    samples_per_dim=5)
        
        n_states = states.shape[0]
        self.states = states
        return n_states
        
        #print(f'observation box : \n low {self.low} \n high :{self.high} \n grid shape {self.states.shape}')
    
    def update(self,new_s):
        
        
        tmp_buff = torch.cat([self.states, new_s])
        high,_= torch.max(tmp_buff,dim=0)
        low,_= torch.min(tmp_buff,dim=0)
        
        logger.info(f'Buffer shape {tmp_buff.shape}')
        
        ### update only if be
        if any(high>self.high) or any(low<self.low):
            self.update_states(tmp_buff)

# Experiment 

In [3]:
### pendulum
# mlp = MLP([3,1],add_bias=True) 
# env = gym.make("Pendulum-v1")

### Swimmer 
# mlp = MLP([8,2],add_bias=True) 
# env = gym.make("Swimmer-v3")

### Inverted pendulum

mlp = MLP([4,1],add_bias=True) 
env = gym.make("InvertedPendulum-v2")

### CartPole

# mlp = MLP([4,1],add_bias=True)
# env = gym.make("CartPole-v1")

# Initialize environment

objective_env = EnvironmentObjective(
  env=env,
  mlp=mlp,
  manipulate_state=None,
  manipulate_reward=None,
)

### initialize train_x, train_y
train_x = torch.rand(10,mlp.len_params) ## [n_trials,n_params]
train_data = [objective_env.run(p) for p in train_x]
train_y = torch.Tensor([d[0] for d in train_data]).reshape(-1)  ## [n_trials,1]
train_s = torch.stack( [d[1] for d in train_data])  ## [n_trials,max_len,state_dim]
train_s = torch.flatten(train_s,start_dim=0,end_dim=1) ## [n_trials*max_len,state_dim]

# initialize likelihood and model
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = MyGP(train_x, train_y,train_s,likelihood,
                    kernel=None,mlp=mlp)
opt = Optimizer()


In [4]:
### now we loop :
max_iter = 1


for i in range(max_iter):

  opt.step(model,objective_env)

  if i % 1 == 0 and i>=1:
  

    max = model.train_targets.max()
    batch_mean = model.train_targets[i-1:i].mean()
    batch_max = model.train_targets[i-1:i].max()
    curr = model.train_targets[-1]
    print(f'current {curr} / max {max} /batch_mean {batch_mean} /batch_max {batch_max} ')
    opt.print_hypers(model)

    #print(f'model.train_inputs.shape{model.train_inputs[0].shape}')

# Manually fitting GP (maximizing likelihood)

In [5]:
# training_iter = 100 

# # Find optimal model hyperparameters
# model.train()
# likelihood.train()

# # Use the adam optimizer
# optimizer = torch.optim.SGD(model.parameters(), lr=0.25)  # Includes GaussianLikelihood parameters

# # "Loss" for GPs - the marginal log likelihood
# mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

# for i in range(3):
#     # Zero gradients from previous iteration
#     optimizer.zero_grad()
#     # Output from model
#     output = model(train_x)
#     # Calc loss and backprop gradients
#     loss = -mll(output, train_y)
#     logger.warning(f'Loss {loss.shape}')
#     loss.backward()
#     print('Iter %d/%d - Loss: %.3f noise: %.3f' % 
#         (
#         i + 1, training_iter, loss.item(),
#         model.likelihood.noise.item())
#         )
#     optimizer.step()