In [8]:
import gymnasium
import flappy_bird_gymnasium
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.monitor import Monitor

# Recreate environment
env = gymnasium.make("FlappyBird-v0", render_mode="rgb_array", use_lidar=True)
env = Monitor(env)
env = DummyVecEnv([lambda: env])
env = VecNormalize.load("ppo_flappy_2000000_vecnormalize.pkl", env)
env.training = False
env.norm_reward = False

# Load model
total_timesteps = 2_000_000
model_name = f"ppo_flappy_{total_timesteps}"
model = PPO.load(model_name, env=env)




In [9]:
import numpy as np
from moviepy import ImageSequenceClip
import os

print("🎥 Recording 5 episodes of the trained agent...")

reward_queue = []
time_queue = []
video_output_dir = "videos"
os.makedirs(video_output_dir, exist_ok=True)

n_episodes = 5

for episode in range(1, n_episodes + 1):
    obs = env.reset()
    done = [False]
    frames = []
    total_reward = 0
    steps = 0

    while not done[0]:
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        total_reward += reward[0]
        steps += 1

        frame = env.envs[0].render()
        if isinstance(frame, np.ndarray) and frame.shape[-1] == 3:
            frames.append(frame)
        else:
            print(f"⚠️ Episode {episode}: Skipping a malformed frame at step {steps}.")

    # Track stats
    reward_queue.append(total_reward)
    time_queue.append(steps)

    # Save video
    if frames:
        clip = ImageSequenceClip(frames, fps=30)
        video_path = f"{video_output_dir}/{model_name}_episode_{episode}.mp4"
        clip.write_videofile(video_path, fps=30)
        print(f"✅ Saved video for Episode {episode} - Reward: {total_reward}, Steps: {steps}")
    else:
        print(f"⚠️ Episode {episode}: No valid frames captured.")

# After all episodes
print("\n📊 Summary of Evaluation Episodes:")
for i, (r, t) in enumerate(zip(reward_queue, time_queue), start=1):
    print(f"Episode {i}: Reward = {r:.2f}, Steps = {t}")

print(f"\n✅ Average Reward: {np.mean(reward_queue):.2f} ± {np.std(reward_queue):.2f}")
print(f"✅ Average Steps: {np.mean(time_queue):.2f} ± {np.std(time_queue):.2f}")


🎥 Recording 5 episodes of the trained agent...
MoviePy - Building video videos/ppo_flappy_2000000_episode_1.mp4.
MoviePy - Writing video videos/ppo_flappy_2000000_episode_1.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready videos/ppo_flappy_2000000_episode_1.mp4
✅ Saved video for Episode 1 - Reward: -1.5000016689300537, Steps: 50
MoviePy - Building video videos/ppo_flappy_2000000_episode_2.mp4.
MoviePy - Writing video videos/ppo_flappy_2000000_episode_2.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready videos/ppo_flappy_2000000_episode_2.mp4
✅ Saved video for Episode 2 - Reward: -1.5000016689300537, Steps: 50
MoviePy - Building video videos/ppo_flappy_2000000_episode_3.mp4.
MoviePy - Writing video videos/ppo_flappy_2000000_episode_3.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready videos/ppo_flappy_2000000_episode_3.mp4
✅ Saved video for Episode 3 - Reward: -1.5000016689300537, Steps: 50
MoviePy - Building video videos/ppo_flappy_2000000_episode_4.mp4.
MoviePy - Writing video videos/ppo_flappy_2000000_episode_4.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready videos/ppo_flappy_2000000_episode_4.mp4
✅ Saved video for Episode 4 - Reward: 4.299995422363281, Steps: 102
MoviePy - Building video videos/ppo_flappy_2000000_episode_5.mp4.
MoviePy - Writing video videos/ppo_flappy_2000000_episode_5.mp4



                                                                           

MoviePy - Done !
MoviePy - video ready videos/ppo_flappy_2000000_episode_5.mp4
✅ Saved video for Episode 5 - Reward: 170.90122985839844, Steps: 1990

📊 Summary of Evaluation Episodes:
Episode 1: Reward = -1.50, Steps = 50
Episode 2: Reward = -1.50, Steps = 50
Episode 3: Reward = -1.50, Steps = 50
Episode 4: Reward = 4.30, Steps = 102
Episode 5: Reward = 170.90, Steps = 1990

✅ Average Reward: 34.14 ± 68.42
✅ Average Steps: 448.40 ± 771.06


