In [2]:
import src.capstone as capstone
import numpy as np
import gymnasium as gym

In [11]:
class Pendulum(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        self.dt = 0.01
        
    def step(self, action):
        th, thdot = self.env.unwrapped.state
        _, _, _, truncated, _ = self.env.step(action)
        
        u = np.clip(action, -self.env.unwrapped.max_torque, 
                    self.env.unwrapped.max_torque)[0]
        
        newthdot = thdot + np.sin(th) * self.dt + u * self.dt + np.random.normal(0., 0.025)
        newth = th + thdot * self.dt + np.random.normal(0., 0.005)
        
        self.env.unwrapped.state = np.array([newth, newthdot])
        
        return self.env.unwrapped.state, 0., False, truncated, {}

In [13]:
env = gym.make('Pendulum-v1', render_mode='human')
env._max_episode_steps = 100  # in the paper, MC simulation for 1 second - dt = 0.01
env = Pendulum(env)

In [9]:
state, _ = env.reset(seed=42)
done = False

In [10]:
while not done:
    state, reward, terminated, truncated, _ = env.step(np.array([2.]))
    done = (terminated or truncated)
env.close()