# Testing DDPG

## Imports

In [12]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.results_plotter import load_results, ts2xy
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
import matplotlib.pyplot as plt
import numpy as np
import os

In [None]:


log_dire = "/Users/rashad/Documents/Uni/Year 4/BIOE70077 - Reinforcement Learning/Coursework/Coursework 2/gym"
ids = ["Walker2d-v5", "Hopper-v5"]
log_dirs = []
for i in range(len(ids)):
    log_dir = log_dire + "/" + str(ids[i])
    os.makedirs(log_dir, exist_ok=True)
    env = gym.make(ids[i], render_mode="rgb_array")
    env = Monitor(env, log_dir)
    model = PPO.load

    log_dirs.append(log_dir)
    Train the model
    model.learn(total_timesteps=1000000, progress_bar=False)  # Set large timesteps but rely on episodes to stop
    env.close()
    #model.save(log_dir + "/final_model")

Code from stable_baselines results_plotter modified for prettiness

In [5]:
from typing import Callable, List, Optional, Tuple

import numpy as np
import pandas as pd

# import matplotlib
# matplotlib.use('TkAgg')  # Can change to 'Agg' for non-interactive mode
from matplotlib import pyplot as plt

from stable_baselines3.common.monitor import load_results

X_TIMESTEPS = "timesteps"
X_EPISODES = "episodes"
X_WALLTIME = "walltime_hrs"
POSSIBLE_X_AXES = [X_TIMESTEPS, X_EPISODES, X_WALLTIME]
EPISODES_WINDOW = 100


def rolling_window(array: np.ndarray, window: int) -> np.ndarray:
    """
    Apply a rolling window to a np.ndarray

    :param array: the input Array
    :param window: length of the rolling window
    :return: rolling window on the input array
    """
    shape = array.shape[:-1] + (array.shape[-1] - window + 1, window)
    strides = (*array.strides, array.strides[-1])
    return np.lib.stride_tricks.as_strided(array, shape=shape, strides=strides)


def window_func(var_1: np.ndarray, var_2: np.ndarray, window: int, func: Callable) -> Tuple[np.ndarray, np.ndarray]:
    """
    Apply a function to the rolling window of 2 arrays

    :param var_1: variable 1
    :param var_2: variable 2
    :param window: length of the rolling window
    :param func: function to apply on the rolling window on variable 2 (such as np.mean)
    :return:  the rolling output with applied function
    """
    var_2_window = rolling_window(var_2, window)
    function_on_var2 = func(var_2_window, axis=-1)
    return var_1[window - 1 :], function_on_var2


def ts2xy(data_frame: pd.DataFrame, x_axis: str) -> Tuple[np.ndarray, np.ndarray]:
    """
    Decompose a data frame variable to x and ys

    :param data_frame: the input data
    :param x_axis: the axis for the x and y output
        (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs')
    :return: the x and y output
    """
    if x_axis == X_TIMESTEPS:
        x_var = np.cumsum(data_frame.l.values)
        y_var = data_frame.r.values
    elif x_axis == X_EPISODES:
        x_var = np.arange(len(data_frame))
        y_var = data_frame.r.values
    elif x_axis == X_WALLTIME:
        # Convert to hours
        x_var = data_frame.t.values / 3600.0
        y_var = data_frame.r.values
    else:
        raise NotImplementedError
    return x_var, y_var


def plot_curves(
    xy_list: List[Tuple[np.ndarray, np.ndarray]], x_axis: str, title: str, figsize: Tuple[int, int] = (8, 2)
) -> None:
    """
    plot the curves

    :param xy_list: the x and y coordinates to plot
    :param x_axis: the axis for the x and y output
        (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs')
    :param title: the title of the plot
    :param figsize: Size of the figure (width, height)
    """

    plt.figure(title, figsize=figsize, facecolor='#121212', dpi=300)
    max_x = max(xy[0][-1] for xy in xy_list)
    min_x = 0
    for _, (x, y) in enumerate(xy_list):
        plt.scatter(x, y, s=2, c=y, cmap="RdYlGn")
        # Do not plot the smoothed curve at all if the timeseries is shorter than window size.

        # Compute and plot rolling mean with window of size EPISODE_WINDOW
        x, y_mean = window_func(x, y, EPISODES_WINDOW, np.mean)
        plt.plot(x, y_mean, label="mean")
        plt.rcParams['axes.facecolor'] = '#121212'
        plt.rcParams['axes.edgecolor'] = 'white'
        plt.rcParams['axes.spines.top'] = False
        plt.rcParams['axes.spines.right'] = False
    plt.xlim(min_x, max_x)
    plt.title(title, color='white')
    plt.yticks(np.arange(round(min(y) / 500.0) * 500.0, max(y), step=500), color='white')
    plt.xticks(color='white')
    plt.xlabel("Timesteps (1e7)", color='white')
    plt.ylabel("Episode Rewards", color='white')
    plt.tight_layout()


def plot_results(
    dirs: List[str], num_timesteps: Optional[int], x_axis: str, task_name: str, figsize: Tuple[int, int] = (8, 2), 
) -> None:
    """
    Plot the results using csv files from ``Monitor`` wrapper.

    :param dirs: the save location of the results to plot
    :param num_timesteps: only plot the points below this value
    :param x_axis: the axis for the x and y output
        (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs')
    :param task_name: the title of the task to plot
    :param figsize: Size of the figure (width, height)
    """
    
    data_frames = []
    for folder in dirs:
        data_frame = load_results(folder)
        if num_timesteps is not None:
            data_frame = data_frame[data_frame.l.cumsum() <= num_timesteps]
        data_frames.append(data_frame)
    xy_list = [ts2xy(data_frame, x_axis) for data_frame in data_frames]
    plot_curves(xy_list, x_axis, task_name, figsize)


In [None]:
from stable_baselines3.common import results_plotter
titles = ["Walker 2D", "Hopper"]
ids = ["Walker2d-v5", "Hopper-v5"]
for i in range(len(ids)):
    
    title = f"{titles[i]} using PPO over 1m timesteps"
    plot_results(
    [  log_dirs[i]], 1e7, results_plotter.X_TIMESTEPS, title, figsize=(8,5),
)
    

IndexError: index -1 is out of bounds for axis 0 with size 0

<Figure size 2400x1500 with 0 Axes>

In [None]:
import gymnasium as gym
from stable_baselines3 import PPO
import os

# Define the list of environments
env_ids = ["Walker2d-v5", "Hopper-v5"]

for env_id in env_ids:
    # Paths for the saved model and videos
    model_path = f"/Users/rashad/Documents/Uni/Year 4/BIOE70077 - Reinforcement Learning/Coursework/Coursework 2/gym/{env_id}/final_model"
    video_dir = f"/Users/rashad/Documents/Uni/Year 4/BIOE70077 - Reinforcement Learning/Coursework/Coursework 2/gym/{env_id}/videos"
    
    # Create the video directory if it doesn't exist
    os.makedirs(video_dir, exist_ok=True)
    
    # Create the environment with the RecordVideo wrapper
    env = gym.make(env_id, render_mode="rgb_array")
    env = gym.wrappers.RecordVideo(env, video_folder=video_dir, video_length=1000, fps=30)
    
    # Load the saved PPO model
    model = PPO.load(model_path)
    
    # Run the policy in the environment and record the video
    obs, _ = env.reset()
    done = False
    max_steps = 1000  # Limit to 1000 steps
    step_count = 0
    while not done and step_count < max_steps:
        # Predict the action using the trained model
        action, _ = model.predict(obs)
        obs, reward, done, _, _ = env.step(action)
        step_count += 1
    
    env.close()
    print(f"Video saved to {video_dir}")


  logger.warn(


Video saved to /Users/rashad/Documents/Uni/Year 4/BIOE70077 - Reinforcement Learning/Coursework/Coursework 2/gym/HalfCheetah-v5/videos


  logger.warn(
Exception: code() takes at most 16 arguments (18 given)
Exception: code() takes at most 16 arguments (18 given)


Video saved to /Users/rashad/Documents/Uni/Year 4/BIOE70077 - Reinforcement Learning/Coursework/Coursework 2/gym/Walker2d-v5/videos


  logger.warn(
Exception: code() takes at most 16 arguments (18 given)
Exception: code() takes at most 16 arguments (18 given)


Video saved to /Users/rashad/Documents/Uni/Year 4/BIOE70077 - Reinforcement Learning/Coursework/Coursework 2/gym/Hopper-v5/videos


: 