In [1]:
import os
import matplotlib.pyplot as plt
import numpy as np
import ale_py
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack, VecVideoRecorder
from stable_baselines3.common.atari_wrappers import AtariWrapper
from datetime import datetime

In [2]:
# Step 1: Create Test Run Folder
def create_test_folder(base_folder="test_ppo"):
    """
    Create a timestamped folder for each test run inside the `test` directory.
    """
    os.makedirs(base_folder, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    test_run_folder = os.path.join(base_folder, f"test_run_{timestamp}")
    os.makedirs(test_run_folder, exist_ok=True)
    os.makedirs(os.path.join(test_run_folder, "videos"), exist_ok=True)
    return test_run_folder

# Step 2: Save Reward Plot
def save_plot(rewards, moving_avg, output_folder):
    """
    Save a plot of rewards and their moving average.
    """
    plt.figure(figsize=(12, 6))
    plt.plot(rewards, label="Score", color="indigo", alpha=0.6)
    if moving_avg is not None:
        plt.plot(moving_avg, label="Moving Average (window=10)", color="blue")
    plt.xlabel("Episode")
    plt.ylabel("Rewards")
    plt.title("Reward Progression over 100 Episodes")
    plt.legend()
    plt.grid(True)
    plot_path = os.path.join(output_folder, "rewards_plot.png")
    plt.savefig(plot_path)
    plt.close()

# Step 3: Record Video
def record_video(model, env, video_folder, video_length=1000):
    """
    Record a gameplay video.
    """
    env.reset()
    env = VecVideoRecorder(
        env,
        video_folder,
        record_video_trigger=lambda x: x == 0,
        video_length=video_length,
        name_prefix="test_performance",
    )
    obs = env.reset()
    video_rewards = []
    done_count = 0
    episode_reward = 0
    for _ in range(video_length):
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = env.step(action)
        #video_rewards.append(reward)
        episode_reward += reward
        if done:
            done_count += 1
            video_rewards.append(episode_reward)
            episode_reward = 0
        if done_count == 5:
            break
    
    env.close()
    return video_rewards

# function to run n episodes
def run_episodes(model, env, n_episodes=100):
    """
    Run the model for n episodes and return the rewards.
    """
    rewards = []
    for _ in range(n_episodes):
        obs = env.reset()
        video_rewards = []
        done_count = 0
        episode_reward = 0
        while done_count < 5:
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done, _ = env.step(action)
            #video_rewards.append(reward)
            episode_reward += reward
            if done:
                done_count += 1
                video_rewards.append(episode_reward)
                episode_reward = 0
        rewards.append(video_rewards)
    return rewards


# Step 4: Preprocess Atari Environment
def create_env(env_name="BreakoutNoFrameskip-v4", frame_stack=4):
    def make_env():
        gym.register_envs(ale_py)
        env = gym.make(env_name, render_mode="rgb_array")
        env = AtariWrapper(env, clip_reward=False)
        return env

    env = DummyVecEnv([make_env])
    env = VecFrameStack(env, n_stack=frame_stack)
    return env

In [4]:
# Set up test folder
test_folder = create_test_folder()
# Create environment
env = create_env()
# Load the saved PPO model
model_path = "models/ppo_breakout.zip"
model = PPO.load(model_path, env=env)

Wrapping the env in a VecTransposeImage.


[W NNPACK.cpp:64] Could not initialize NNPACK! Reason: Unsupported hardware.


In [18]:
video_folder = os.path.join(test_folder, "videos")

# set the video length so it is the same as the number of steps in the episode
rewards = record_video(model, env, video_folder, video_length= 10000000)
save_plot(rewards, None, test_folder)

# Save the rewards plot
print(f"Video rewards: {np.sum(rewards)}")
print(f"Gameplay video saved in: {video_folder}")

MoviePy - Building video /Users/martinsssssss/Desktop/7th Semester/Paradigms of ML/Project/GIT/part2/test_ppo/test_run_20241217-002134/videos/test_performance-step-0-to-step-10000000.mp4.
MoviePy - Writing video /Users/martinsssssss/Desktop/7th Semester/Paradigms of ML/Project/GIT/part2/test_ppo/test_run_20241217-002134/videos/test_performance-step-0-to-step-10000000.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready /Users/martinsssssss/Desktop/7th Semester/Paradigms of ML/Project/GIT/part2/test_ppo/test_run_20241217-002134/videos/test_performance-step-0-to-step-10000000.mp4
Video rewards: 32.0
Gameplay video saved in: test_ppo/test_run_20241217-002134/videos


In [19]:
rewards = run_episodes(model, env, n_episodes=100)
ep_rewards = [np.sum(episode_rewards) for episode_rewards in rewards]
print(f"Average reward over 100 episodes: {np.mean(ep_rewards)}")

save_plot(ep_rewards, None, test_folder)

Average reward over 100 episodes: 22.68000030517578
