In [1]:
from src.capstone.evaluation import Evaluator
from src.capstone.cbf import CBF
from src.capstone.settings import Env

from double_integrator import DoubleIntegratorEnv
import gymnasium as gym

from bound_propagation.polynomial import Pow
from bound_propagation.linear import FixedLinear

import numpy as np

import torch
import torch.nn as nn

import matplotlib.pyplot as plt

$$B = \begin{bmatrix}
\frac{1}{2}\Delta t^2 & 0\\
0 & \frac{1}{2}\Delta t^2\\
\Delta t & 0\\
0 & \Delta t
\end{bmatrix}$$

$$Q = B B^T = \begin{bmatrix}
\frac{1}{2}\Delta t^2 & 0\\
0 & \frac{1}{2}\Delta t^2\\
\Delta t & 0\\
0 & \Delta t
\end{bmatrix} 

\begin{bmatrix}
\frac{1}{2}\Delta t^2 & 0 & \Delta t & 0\\
0 & \frac{1}{2}\Delta t^2 & 0 & \Delta t
\end{bmatrix} 

= \begin{bmatrix}
\frac{1}{4}\Delta t^4 & 0 & \frac{1}{2}\Delta t^3 & 0\\
0 & \frac{1}{4}\Delta t^4 & 0 & \frac{1}{2}\Delta t^3\\
\frac{1}{2}\Delta t^3 & 0 & \Delta t^2 & 0\\
0 & \frac{1}{2}\Delta t^3 & 0 & \Delta t^2
\end{bmatrix}$$

In [4]:
class IntegratorNoise(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        
        dt = 0.05
        self.cov = np.array([[0.25 * dt**4, 0., 0.5 * dt**3, 0.],
                           [0., 0.25 * dt**4, 0., 0.5 * dt**3],
                           [0.5 * dt**3, 0., dt**2, 0.],
                           [0., 0.5 * dt**3, 0., dt**2]])
        
    def step(self, action):
        state, _, _, truncated, _ = self.env.step(action)
        noise = np.random.multivariate_normal(mean=np.zeros(4), cov=self.cov)
        
        return (state + noise).astype(np.float32), 0., False, truncated, {}
    
    def reset(self, seed=None):
        return self.env.reset(seed=seed)

In [6]:
class NNDM(nn.Sequential):
    # input [x pos, y pos, x speed, y speed, ux, uy]
    
    def __init__(self):
        super(NNDM, self).__init__()
        self.dt = 0.05
        
        layer = nn.Linear(6, 4, bias=False)
        weights = torch.tensor([[1., 0., self.dt, 0., ((self.dt**2)/2), 0], 
                                [0., 1., 0., self.dt, 0., ((self.dt**2)/2)], 
                                [0., 0., 1., 0., self.dt, 0.], 
                                [0., 0., 0., 1., 0., self.dt]])
        layer.weight = nn.Parameter(weights)
        
        super(NNDM, self).__init__(layer)

nndm = NNDM()

In [20]:
class Agent(nn.Module):
    def __init__(self):
        super(Agent, self).__init__()
        
        self.layer = nn.Linear(2, 2)
        
        # dummy output of u=0
        self.layer.weight = nn.Parameter(torch.tensor([[0., 0., 0., 0.], [0., 0., 0., 0.]]))
        self.layer.bias = nn.Parameter(torch.tensor([50., 0.]))
    
    def forward(self, x):
        return self.layer(x)
    
    def select_action(self, x, exploration=False):
        if exploration:
            raise ValueError('This model is not implemented for exploration')
        else:
            return self.forward(x)

In [21]:
policy = Agent()

In [46]:
# 100 steps for dt = 0.05 yields 5 second episodes
gym.register('DoubleIntegrator', DoubleIntegratorEnv, max_episode_steps=100)

In [47]:
class DoubleIntegrator(Env):
    def __init__(self):
        env = gym.make('DoubleIntegrator')
        
        self.is_discrete = False
        
        self.settings = {
            'noise': [],
            'max_frames': 999
        }
        
        self.h_function = nn.Sequential(
            FixedLinear(
                torch.tensor([
                    [1., 0, 0, 0],
                    [0, 1, 0, 0]
                ]),
                torch.tensor([0., 0.])
            ),
            Pow(2),
            FixedLinear(
                torch.tensor([
                    [-1 / 0.5 ** 2, 0],
                    [0, -1 / 0.5 ** 2]
                ]),
                torch.tensor([1., 1.])
            )
        )

        self.h_ids = [0, 1]
        self.std = [0.05**2/2, 0.05**2/2]
        self.env = IntegratorNoise(env)

In [48]:
env = DoubleIntegrator()

In [49]:
cbf = CBF(env, nndm, policy,
          alpha=[0.9, 0.9],
          delta=[0., 0.],
          no_action_partitions=2,
          no_noise_partitions=2,
          stochastic=True)

In [50]:
evaluator = Evaluator(env, cbf)
f, h = evaluator.mc_simulate(policy, 100, cbf_enabled=True, seed=[0., 0., 0., 0.])

  0%|          | 0/100 [00:00<?, ?it/s]

In [51]:
f

[80, 53, 76, 78, 70]