In [36]:
import os
import shutil

import numpy as np
import gymnasium as gym
from gymnasium import Wrapper, Env
from gymnasium.wrappers import RecordVideo
from stable_baselines3 import PPO


In [37]:
def obs_to_state(obs):
    c1, s1, c2, s2, o1, o2 = obs
    t1, t2 = np.arccos(c1), np.arccos(c2)
    if s1 < 0: t1 = -t1
    if s2 < 0: t2 = -t2
    return np.array([t1, t2, o1, o2])


# TODO: Get this to work; we need episode lengths to be constant. See:
# https://imitation.readthedocs.io/en/latest/main-concepts/variable_horizon.html
class FixEpisodeLength(Wrapper):
    def __init__(self, env: Env):
        super().__init__(env)
        self.freeze = False
        self.last_obs = None
        self.success_reward = None
    
    def step(self, action):
        obs, reward, done, truncated, info = self.env.step(action)
        if self.freeze:
            obs, reward = self.last_obs, self.success_reward
            # self.env.state = obs_to_state(obs)  # TODO: Acrobat-specific
        elif done:
            self.freeze = True
            self.success_reward = reward
        self.last_obs = obs
        return obs, reward, False, truncated, info
    
    def reset(self, **kwargs):
        self.freeze = False
        self.last_obs = None
        self.success_reward = None
        return self.env.reset(**kwargs)


env = gym.make("Acrobot-v1", max_episode_steps=512, render_mode="rgb_array")
# env = FixEpisodeLength(env)

In [39]:
def wrap_around_video_recorder(env, folder):
    return RecordVideo(
        env, folder,
        episode_trigger=(lambda episode_id: True),
    )


# Initialize video recording output directory
output_folder = os.path.join(os.getcwd(), 'gym-results', 'oracle')
os.makedirs(output_folder, exist_ok=True)

train_output_folder = os.path.join(output_folder, 'train')
shutil.rmtree(train_output_folder, ignore_errors=True)
os.makedirs(train_output_folder, exist_ok=True)

# Wrap around video recorder
env = wrap_around_video_recorder(
    env,
    train_output_folder,
)

# Train a policy
model = PPO("MlpPolicy", env, verbose=1, seed=0).learn(total_timesteps=16384)
env.close()
model.save('my-model')

  logger.warn(


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-0.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-0.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-0.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-1.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-1.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-1.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-2.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-2.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-2.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-3.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-3.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-3.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 480      |
|    ep_rew_mean     | -480     |
| time/              |          |
|    fps             | 170      |
|    iterations      | 1        |
|    time_elapsed    | 12       |
|    total_timesteps | 2048     |
---------------------------------
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-4.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-4.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-4.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-5.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-5.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-5.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-6.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-6.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-6.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-7.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-7.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-7.mp4
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 496         |
|    ep_rew_mean          | -496        |
| time/                   |             |
|    fps                  | 149         |
|    iterations           | 2           |
|    time_elapsed         | 27          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.003556368 |
|    clip_fraction        | 0.00444     |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.1        |
|    explained_variance   | -0.0304     |
|    learning_rate        | 0.0003      |
|    loss                 | 19.2        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00257    |
|    value_loss           | 144         |
----

                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-8.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-9.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-9.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-9.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-10.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-10.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-10.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-11.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-11.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-11.mp4
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 502        |
|    ep_rew_mean          | -501       |
| time/                   |            |
|    fps                  | 147        |
|    iterations           | 3          |
|    time_elapsed         | 41         |
|    total_timesteps      | 6144       |
| train/                  |            |
|    approx_kl            | 0.00415776 |
|    clip_fraction        | 0.00229    |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.09      |
|    explained_variance   | 0.00241    |
|    learning_rate        | 0.0003     |
|    loss                 | 21.7       |
|    n_updates            | 20         |
|    policy_gradient_loss | -0.00096   |
|    value_loss           | 109        |
-----------------------

                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-12.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-13.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-13.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-13.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-14.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-14.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-14.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-15.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-15.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-15.mp4
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 504         |
|    ep_rew_mean          | -504        |
| time/                   |             |
|    fps                  | 148         |
|    iterations           | 4           |
|    time_elapsed         | 55          |
|    total_timesteps      | 8192        |
| train/                  |             |
|    approx_kl            | 0.010258327 |
|    clip_fraction        | 0.0551      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | 0.0025      |
|    learning_rate        | 0.0003      |
|    loss                 | 20.8        |
|    n_updates            | 30          |
|    policy_gradient_loss | -0.00454    |
|    value_loss           | 98.8        |
---

                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-16.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-17.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-17.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-17.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-18.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-18.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-18.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-19.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-19.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-19.mp4
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 506         |
|    ep_rew_mean          | -506        |
| time/                   |             |
|    fps                  | 144         |
|    iterations           | 5           |
|    time_elapsed         | 70          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.009465017 |
|    clip_fraction        | 0.0242      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -0.00254    |
|    learning_rate        | 0.0003      |
|    loss                 | 15.7        |
|    n_updates            | 40          |
|    policy_gradient_loss | -0.00291    |
|    value_loss           | 79.4        |
---

                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-20.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-21.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-21.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-21.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-22.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-22.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-22.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-23.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-23.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-23.mp4
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 507          |
|    ep_rew_mean          | -507         |
| time/                   |              |
|    fps                  | 147          |
|    iterations           | 6            |
|    time_elapsed         | 83           |
|    total_timesteps      | 12288        |
| train/                  |              |
|    approx_kl            | 0.0054701674 |
|    clip_fraction        | 0.0214       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.08        |
|    explained_variance   | 0.00941      |
|    learning_rate        | 0.0003       |
|    loss                 | 11.2         |
|    n_updates            | 50           |
|    policy_gradient_loss | -0.00271     |
|    value_loss           

                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-24.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-25.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-25.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-25.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-26.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-26.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-26.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-27.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-27.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-27.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-28.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-28.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-28.mp4
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 490          |
|    ep_rew_mean          | -489         |
| time/                   |              |
|    fps                  | 147          |
|    iterations           | 7            |
|    time_elapsed         | 97           |
|    total_timesteps      | 14336        |
| train/                  |              |
|    approx_kl            | 0.0072997697 |
|    clip_fraction        | 0.029        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.08        |
|    explained_variance   | -0.0522      |
|    learning_rate        | 0.0003       |
|    loss                 | 13.7         |
|    n_updates            | 60           |
|    policy_gradient_loss | -0.0036      |
|    value_loss           

                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-29.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-30.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-30.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-30.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-31.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-31.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-31.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-32.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-32.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-32.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-33.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-33.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-33.mp4
Moviepy - Building video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-34.mp4.
Moviepy - Writing video /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-34.mp4



                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-34.mp4
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 462         |
|    ep_rew_mean          | -462        |
| time/                   |             |
|    fps                  | 148         |
|    iterations           | 8           |
|    time_elapsed         | 110         |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.008670125 |
|    clip_fraction        | 0.104       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.05       |
|    explained_variance   | 0.00981     |
|    learning_rate        | 0.0003      |
|    loss                 | 10.9        |
|    n_updates            | 70          |
|    policy_gradient_loss | -0.0107     |
|    value_loss           | 65          |
---

                                                                                                     

Moviepy - Done !
Moviepy - video ready /Users/dk/Documents/Academic/UT2023/CS395T_Fall/cs395t-final-project/gym-results/oracle/train/rl-video-episode-35.mp4


We now generate oracle dataset using the trained policy model.

In [None]:
# # Generate oracle data using the trained policy
# test_output_folder = os.path.join(output_folder, 'test')
# shutil.rmtree(test_output_folder, ignore_errors=True)
# os.makedirs(test_output_folder, exist_ok=True)

# env = wrap_around_video_recorder(
#     get_env(),
#     test_output_folder,
# )

# trajectories = []  # trajectories is a list of lists
# seed = 1
# trajectories.append([])
# obs, info = env.reset(seed=seed)
# T = 10000
# for i in range(T):
#     trajectories[-1].append(obs)
#     action, _state = model.predict(obs, deterministic=True)
#     trajectories[-1].append(action)
#     obs, reward, done, truncated, info = env.step(action)
#     trajectories[-1].append(reward)
#     if done or truncated:
#         if i + 1 == T:
#             # will close outside the loop, no need to reset
#             continue
#         trajectories[-1].append(obs)
#         seed += 1  # try a new seed
#         trajectories.append([])
#         obs, info = env.reset(seed=seed)
# trajectories[-1].append(obs)
# env.close()

In [None]:
# data_output_folder = os.path.join(output_folder, 'data')
# shutil.rmtree(data_output_folder, ignore_errors=True)
# os.makedirs(data_output_folder, exist_ok=True)

# for e_idx, trajectory in enumerate(trajectories):
#     assert len(trajectory) % 3 == 1
#     states, actions, rewards = [], [], []
#     for idx, elem in enumerate(trajectory):
#         if idx % 3 == 0:    # state
#             states.append(elem)
#         elif idx % 3 == 1:  # action
#             actions.append(elem)
#         else:               # reward
#             rewards.append(elem)
#     data = {
#         'states': np.stack(states),
#         'actions': np.stack(actions),
#         'rewards': np.array(rewards),
#     }
#     filename = f'episode-{e_idx}.npz'
#     filepath = os.path.join(data_output_folder, filename)
#     np.savez_compressed(filepath, **data)


### Imitation Learning Using AIRL

In [None]:
# model.save('my-model')

# from stable_baselines3.ppo import MlpPolicy
# newmodel = PPO.load('my-model.zip')
# type(newmodel)

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy

from imitation.algorithms.adversarial.airl import AIRL
from imitation.data import rollout
from imitation.data.wrappers import RolloutInfoWrapper
from imitation.policies.serialize import load_policy
from imitation.rewards.reward_nets import BasicShapedRewardNet
from imitation.util.networks import RunningNorm
from imitation.util.util import make_vec_env

In [None]:
SEED = 42

In [None]:
env = make_vec_env(
    "Acrobot-v1",
    rng=np.random.default_rng(SEED),
    n_envs=8,
    post_wrappers=[
        # FixEpisodeLength,
        lambda env, _: RolloutInfoWrapper(env),  # to compute rollouts
    ],
)

In [None]:
expert = model.policy
expert

In [None]:
rollouts = rollout.rollout(
    expert,
    env,
    rollout.make_sample_until(min_episodes=60),
    rng=np.random.default_rng(SEED),
)

In [None]:
learner = PPO(
    env=env,
    policy="MlpPolicy",
    batch_size=64,
    ent_coef=0.0,
    learning_rate=0.0005,
    gamma=0.95,
    clip_range=0.1,
    vf_coef=0.1,
    n_epochs=5,
    seed=SEED,
)
#same as PPO2
#multi agent learning, parallelism: https://stable-baselines.readthedocs.io/en/master/guide/vec_envs.html

In [None]:
reward_net = BasicShapedRewardNet(
    observation_space=env.observation_space,
    action_space=env.action_space,
    normalize_input_layer=RunningNorm,
)

In [None]:
airl_trainer = AIRL(
    demonstrations=rollouts,
    demo_batch_size=2048,
    gen_replay_buffer_capacity=512,
    n_disc_updates_per_round=16,
    venv=env,
    gen_algo=learner,
    reward_net=reward_net,
    allow_variable_horizon=True,  # TODO: Implement FixEpisodeLength() wrapper and disable this
)

In [None]:
env.seed(SEED)
learner_rewards_before_training, _ = evaluate_policy(
    learner, env, 100, return_episode_rewards=True,
)


In [None]:
airl_trainer.train(20000)  # Train for 2_000_000 steps to match expert.
env.seed(SEED)
learner_rewards_after_training, _ = evaluate_policy(
    learner, env, 100, return_episode_rewards=True,
)
#check if evaluate policy has option  has option deterministic=True


In [None]:
print("mean reward after training:", np.mean(learner_rewards_after_training))
print("mean reward before training:", np.mean(learner_rewards_before_training))

In [None]:
#EvalCallback for time series of learning curve
#RL evaluation source: https://arxiv.org/abs/1709.06560