# Import Libraries

In [41]:
from src.capstone.evaluation import Evaluator
from src.capstone.cbf import CBF
from src.capstone.settings import Env

from bound_propagation.linear import FixedLinear
from bound_propagation.polynomial import MultivariateMonomial, UnivariateMonomial

import numpy as np
import gymnasium as gym

import torch
import torch.nn as nn

# Gym wrapper for dynamics

In [2]:
class PendulumNoise(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        self.dt = 0.01
        
    def step(self, action):
        th, thdot = self.env.unwrapped.state
        _, _, _, truncated, _ = self.env.step(action)
        
        u = np.clip(action, -self.env.unwrapped.max_torque, 
                    self.env.unwrapped.max_torque)[0]
        
        newthdot = thdot + np.sin(th) * self.dt + u * self.dt + np.random.normal(0., 0.025)
        newth = th + thdot * self.dt + np.random.normal(0., 0.005)
        
        self.env.unwrapped.state = np.array([newth, newthdot])
        
        return self.env.unwrapped.state, 0., False, truncated, {}
    
    def reset(self, state=None):
        # allow the user to set the initial state directly, otherwise random state
        self.env.reset()
        
        if state is not None:
            self.env.unwrapped.state = np.array(state)
        return self.env.unwrapped.state, {}

# NNDM

We assume the dynamics are known, to be able to directly compare our results to the paper. The NNDM outputs the EXPECTED next state (Gaussian noise term disappears).

In [3]:
class NNDM(nn.Module):
    def __init__(self):
        super(NNDM, self).__init__()
        self.dt = 0.01
        
    def forward(self, x):
        th, thdot, u = x
        
        newthdot = thdot + torch.sin(th) * self.dt + u * self.dt
        newth = th + thdot * self.dt
        
        return torch.tensor([newth, newthdot])

In [4]:
nndm = NNDM()

# Agent

In [5]:
class Agent(nn.Module):
    # dummy agent of linear form a = c1 * s1 + c2 * s2 + c3
    
    def __init__(self):
        super(Agent, self).__init__()
        
        self.layer = nn.Linear(2, 1)
        
        # dummy output of u=0
        self.layer.weight = nn.Parameter(torch.tensor([[0., 0.]]))
        self.layer.bias = nn.Parameter(torch.tensor([0.]))
    
    def forward(self, x):
        return self.layer(x)
    
    def select_action(self, x, exploration=False):
        if exploration:
            raise ValueError('This model is not implemented for exploration')
        else:
            return self.forward(x)

In [6]:
policy = Agent()

# Env settings

In [7]:
class Pendulum(Env):
    def __init__(self):
        env = gym.make('Pendulum-v1')
        self.is_discrete = False
        
        # h as defined in the paper
        self.h_function = nn.Sequential(
            ...
        )
        
        self.h_ids = [0, 1]
        self.std = [0.005, 0.025]
        self.env = PendulumNoise(env)

In [43]:
test = torch.tensor([2., 3.])

t = nn.Sequential(
    UnivariateMonomial(
        [
            (1, 3)
        ]
    )
)

t(test)

TypeError: pow() received an invalid combination of arguments - got (list), but expected one of:
 * (Tensor exponent)
      didn't match because some of the arguments have invalid types: (!list of [int]!)
 * (Number exponent)
      didn't match because some of the arguments have invalid types: (!list of [int]!)


# CBF

In [ ]:
env = Pendulum()

cbf = CBF(env, nndm, policy,
          alpha=[],
          delta=[],
          no_action_partitions=2,
          no_noise_partitions=2,
          stochastic=True)