# Import Libraries

In [34]:
from src.capstone.evaluation import Evaluator
from src.capstone.cbf import CBF

import numpy as np
import gymnasium as gym

import torch
import torch.nn as nn
import torch.optim as optim

# Gym wrapper for dynamics

In [40]:
class Pendulum(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        self.dt = 0.01
        
    def step(self, action):
        th, thdot = self.env.unwrapped.state
        _, _, _, truncated, _ = self.env.step(action)
        
        u = np.clip(action, -self.env.unwrapped.max_torque, 
                    self.env.unwrapped.max_torque)[0]
        
        newthdot = thdot + np.sin(th) * self.dt + u * self.dt + np.random.normal(0., 0.025)
        newth = th + thdot * self.dt + np.random.normal(0., 0.005)
        
        self.env.unwrapped.state = np.array([newth, newthdot])
        
        return self.env.unwrapped.state, 0., False, truncated, {}

In [41]:
env = gym.make('Pendulum-v1')
env._max_episode_steps = 100  # in the paper, MC simulation for 1 second - dt = 0.01
env = Pendulum(env)

In [42]:
while not done:
    state, reward, terminated, truncated, _ = env.step(np.array([2.]))
    done = (terminated or truncated)
env.close()

# NNDM

A NNDM must be learnt as the pendulum environment contains non-linear dynamics (we can't craft an analytically perfect neural network)

In [53]:
class NNDM(nn.Sequential):
    def __init__(self):
        layers = [
            nn.Linear(3, 64),
            nn.Tanh(),
            nn.Linear(64, 2)
        ]
        
        super(NNDM, self).__init__(*layers)
        
        self.criterion = nn.MSELoss()
        self.optimizer = optim.Adam(self.parameters(), lr=1e-3)
        
    def update(self, state, action, next_state):
        pass

In [54]:
nndm = NNDM()

In [55]:
# 100 runs of 100 frames to train the NNDM
for _ in range(100):
    state, _ = env.reset()
    done = False
    
    while not done:
        action = env.action_space.sample()
        next_state, reward, terminated, truncated, _ = env.step(action)
        
        done = (terminated or truncated)
        nndm.update(state, action, next_state)
    
env.close()

# Dummy controller

In [25]:
class Agent(nn.Module):
    def __init__(self):
        super(Agent, self).__init__()
        
        self.layer = nn.Linear(2, 1)
        
        # dummy output of u=0
        self.layer.weight = nn.Parameter(torch.tensor([[0., 0.]]))
        self.layer.bias = nn.Parameter(torch.tensor([0.]))
    
    def forward(self, x):
        return self.layer(x)

In [27]:
policy = Agent()

# CBF