## Plotting after Training

Using checkpoints and a separate evaluation script. 

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from stable_baselines3 import TD3

def evaluate_model(model_path, eval_env, n_episodes=20):
    model = TD3.load(model_path, env=eval_env)
    episode_rewards = []
    
    for _ in range(n_episodes):
        obs, info = eval_env.reset()
        done = False
        total_reward = 0.0
        
        while not done:
            action, _states = model.predict(obs, deterministic=True)
            obs, reward, done, truncated, info = eval_env.step(action)
            done = done or truncated
            total_reward += reward
        episode_rewards.append(total_reward)
    
    avg_reward = np.mean(episode_rewards)
    return avg_reward

# Suppose you have a list of checkpoints
checkpoints = [
    "./checkpoints/td3_hockey_100000_steps.zip",
    "./checkpoints/td3_hockey_200000_steps.zip",
    "./checkpoints/td3_hockey_300000_steps.zip",
    # ...
]

eval_env = ...  # re-create the environment
timesteps = []
avg_rewards = []

for ckpt in checkpoints:
    # extract number of steps from filename or store them in a separate list
    steps = int(ckpt.split("_")[-2].replace("steps.zip",""))
    timesteps.append(steps)

    mean_return = evaluate_model(ckpt, eval_env, n_episodes=20)
    avg_rewards.append(mean_return)

# Plot
plt.figure()
plt.plot(timesteps, avg_rewards, marker='o')
plt.xlabel("Training Steps")
plt.ylabel("Average Episode Reward")
plt.title("TD3 Performance Over Time")
plt.show()
