In [None]:
from framework import StandardScaler
from dql import DQLAgent, DQLTrainer
from ddpg import DDPGAgent, DDPGTrainer, LichtenbergAgent

from PIL import Image
import gym
import numpy as np

def to_gif(matrices, filepath, duration=25):
    frames = []
    for matrix in matrices:
        image = Image.fromarray(matrix)
        frames.append(image)
    frames[0].save(filepath, save_all=True, append_images=frames[1:], duration=duration, loop=0)

## Submarine

In [None]:
from environments.submarine import ContinuousSubmarine
import time

episodes = []
time_taken = []
env = ContinuousSubmarine(delta_t=1, randomize=False) 
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
featurizer = StandardScaler(state_dim, learn=False)
featurizer.mean = np.array([ 7.73739964, 11.54841878,  1.6379028 ,  1.92430292])
featurizer.var = np.array([ 580.12531577, 1460.05771442,   36.57866531,   37.08183991])
# for training comparison stability, ensure that the featurizer scaling is constant

In [None]:
for _ in range(30):
    # agent = DDPGAgent(state_dim, action_dim, batch_size=32, tau=0.05)
    agent = LichtenbergAgent(state_dim, action_dim, "figure2d.npy", batch_size=32, tau=0.05, n_iter=3, pop=15)

    trainer = DDPGTrainer(env, agent, featurizer, until_convergence=True, convergence_reward=100)
    start = time.perf_counter()
    trainer.train(episodes=50000)
    while not trainer.has_converged(100):
        trainer.train(episodes=100)
    episodes.append(len(trainer.episode_rewards))
    time_taken.append(int(round(time.perf_counter() - start)))
    print(episodes[-1], time_taken[-1])

Visualize Action-Value Function alongside policy

In [None]:
import matplotlib.pyplot as plt

env.delta_t = 0.1
env.randomize = False
env.initial_state = [1, 25, 2, -5]

trainer = DDPGTrainer(env, "lichtenberg_agents/sub.pkl", featurizer)
state, info = trainer.env.reset()
state = trainer.featurizer.transform_state(state, info)
frames = []
func = []
actions = []
total_reward = 0.0
it = 0

done = False
while not done:
    it += 1
    action = trainer.agent.act(state, explore=False)
    a = trainer.featurizer.transform_action(action, trainer.env.action_space.low, trainer.env.action_space.high)
    actions.append(a)
    next_state, reward, terminated, truncated, info = trainer.env.step(a)

    done = terminated or truncated
    total_reward += reward
    state = trainer.featurizer.transform_state(next_state, info)
    trainer.agent.q_approximator.state = state

    frames.append(trainer.env.render())
    x = np.linspace(trainer.agent.q_approximator.lower_bound[0], trainer.agent.q_approximator.upper_bound[0], 50)
    y = np.linspace(trainer.agent.q_approximator.lower_bound[1], trainer.agent.q_approximator.upper_bound[1], 50)
    X, Y = np.meshgrid(x, y)
    Z = np.fromiter((trainer.agent.q_approximator.evaluate(np.array([x, y])) for x, y in zip(np.ravel(X), np.ravel(Y))), dtype=float, count=X.size).reshape(X.shape)
    
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    # ax.set_zlim(70, 130)
    ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap='viridis', alpha=0.7)
    ax.set_xlabel('angle $\phi$')
    ax.set_ylabel('magnitude')
    ax.set_zlabel('expected value')
    ax.set_title(f'$Q(s_t, a)$ for t={it}')
    fig.canvas.draw()
    img = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
    img = img.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    func.append(img)

to_gif(frames, 'cow.gif')
to_gif(func, 'cat.gif')

## Parking

In [None]:
import torch
class ParkingFeaturizer(StandardScaler):
    def __init__(self):
        super().__init__(19)
    
    def transform_state(self, state, info=None):
        speed = info['speed'] if info else 0.0
        state = np.concat([state['observation'], state['achieved_goal'], state['desired_goal'], np.array([speed])])
        return torch.tensor(state, dtype=torch.float32).unsqueeze(0)


In [None]:
import highway_env.envs.parking_env as parking_env

env = parking_env.ParkingEnv({
    "observation": {
        "type": "KinematicsGoal",
        "features": ['x', 'y', 'vx', 'vy', 'cos_h', 'sin_h'],
        "scales": [100, 100, 5, 5, 1, 1],
        "normalize": True
    },
    "action": {
        "type": "ContinuousAction"
    },
    "simulation_frequency": 15,
    "policy_frequency": 3,
    "screen_width": 600,
    "screen_height": 300,
    "centering_position": [0.5, 0.5],
    "scaling": 7,
    "show_trajectories": False,
    "render_agent": True,
    "offscreen_rendering": True
})
env.render_mode = 'rgb_array'
featurizer = ParkingFeaturizer()
agent = LichtenbergAgent(19, 2, "figure2d.npy", hidden_layers=3, tau=0.05, batch_size=64, n_iter=3, pop=15)
trainer = DDPGTrainer(env, agent, featurizer)

In [None]:
trainer.train(episodes=10000)
# trainer.save()

## Inverted Pendulum

In [None]:
env = gym.make('InvertedDoublePendulum-v4', render_mode='rgb_array')
env.action_space = np.linspace(env.action_space.low, env.action_space.high, 21) # discretize action space
state_dim = env.observation_space.shape[0]

featurizer = StandardScaler(state_dim) # apply adaptive scaling to state vectors
agent = DQLAgent( 
                 input_dim=state_dim, # neural net params
                 output_dim=env.action_space.shape[0],
                 hidden_dim=128,
                 hidden_layers=5,
                 batch_size=256,
                 gamma=0.99, # discount factor
                 min_epsilon=0.1, epsilon_decay=0.999, # exploration rate and decay
                 tau=0.005 # update rate of target net
                )

trainer = DQLTrainer(env, agent, featurizer)
trainer.train(episodes=300)
info = trainer.run_episode(False)
print(f"cumulative reward: {info['reward']:.2f}, steps: {info['steps']}")
trainer.plot_losses()

In [None]:
env = gym.make('InvertedDoublePendulum-v4', render_mode='rgb_array')
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
featurizer = StandardScaler(state_dim)
agent = DDPGAgent(state_dim, action_dim, hidden_layers=2, tau=0.01, batch_size=256)
featurizer = StandardScaler(state_dim)
trainer = DDPGTrainer(env, agent, featurizer)
trainer.train(episodes=10000)
trainer.plot_losses()

## Ball and Beam Problem

In [None]:
import ballbeam_gym.envs
import warnings
warnings.filterwarnings('ignore')

env = ballbeam_gym.envs.BallBeamSetpointEnv(timestep=0.02, setpoint=-0.8, beam_length=2.0, max_angle=0.5, max_timesteps=500, action_mode='discrete')
env.action_space = np.arange(3)
state_dim = env.observation_space.shape[0]

featurizer = StandardScaler(state_dim)
agent = DQLAgent(state_dim, env.action_space.shape[0], batch_size=128, epsilon_decay=0.9995)

In [None]:
trainer = DQLTrainer(env, agent, featurizer)
trainer.train(episodes=1000)
trainer.plot_losses()

In [None]:
info = trainer.run_episode()
to_gif(info['rgb_arrays'], 'ball_and_beam.gif', duration=25)