In [1]:
# !pip install swing
# !pip install gymnasium[box2d]
# !pip install --upgrade moviepy
# !pip install stable-baselines3
# !sudo apt-get update
# !sudo apt-get install -y swig
# !sudo apt-get install -y python3-dev
# !pip install "gymnasium[box2d]"
# !pip install flappy-bird-gymnasium

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

In [2]:
# # %% Install required packages
# !pip install flappy-bird-gymnasium stable-baselines3 moviepy gymnasium[box2d]

# %% Imports
import os
import gymnasium
import flappy_bird_gymnasium
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.evaluation import evaluate_policy
from moviepy import ImageSequenceClip



  from pkg_resources import resource_stream, resource_exists


In [3]:
# %% Create directories
log_dir = "./ppo_logs/"
checkpoint_dir = "./checkpoints/"
os.makedirs(log_dir, exist_ok=True)
os.makedirs(checkpoint_dir, exist_ok=True)

# %% Create and wrap the environment
env = gymnasium.make("FlappyBird-v0", render_mode="rgb_array", use_lidar=True)
env = Monitor(env)
env = DummyVecEnv([lambda: env])
env = VecNormalize(env, norm_obs=True, norm_reward=True)

# %% Define the PPO model
model = PPO(
    "MlpPolicy",
    env,
    # verbose=1,
    device="cuda",  # 👈 Forces use of GPU
    tensorboard_log=log_dir,
    n_steps=1024,
    batch_size=64,
    gae_lambda=0.95,
    gamma=0.99,
    n_epochs=10,
    learning_rate=2.5e-4,
    clip_range=0.2
)

# %% Add checkpoint callback
checkpoint_callback = CheckpointCallback(
    save_freq=100_000,
    save_path=checkpoint_dir,
    name_prefix="ppo_flappy"
)

# %% Train the model
total_timesteps = 2_000_000
model.learn(total_timesteps=total_timesteps, progress_bar=True, callback=checkpoint_callback)



Output()

  logger.warn(f"{pre} is not within the observation space.")


<stable_baselines3.ppo.ppo.PPO at 0x2d3a6807eb0>

In [4]:
# %% Save model and VecNormalize stats
model_name = f"ppo_flappy_{total_timesteps}"
model.save(model_name)
env.save(f"{model_name}_vecnormalize.pkl")



In [12]:
# %% Evaluate the model
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
print(f"✅ Evaluation Result: Mean Reward = {mean_reward:.2f} ± {std_reward:.2f}")

env.close()

✅ Evaluation Result: Mean Reward = 15.70 ± 31.00


In [13]:
env = gymnasium.make("FlappyBird-v0", render_mode="rgb_array", use_lidar=True)
env = Monitor(env)
env = DummyVecEnv([lambda: env])
env = VecNormalize(env, norm_obs=True, norm_reward=True)


In [19]:
import numpy as np
from moviepy import ImageSequenceClip
import os

print("🎥 Recording 5 episodes of the trained agent...")

reward_queue = []
time_queue = []
video_output_dir = "videos"
os.makedirs(video_output_dir, exist_ok=True)

n_episodes = 5

for episode in range(1, n_episodes + 1):
    obs = env.reset()
    done = [False]
    frames = []
    total_reward = 0
    steps = 0

    while not done[0]:
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        total_reward += reward[0]
        steps += 1

        frame = env.envs[0].render()
        if isinstance(frame, np.ndarray) and frame.shape[-1] == 3:
            frames.append(frame)
        else:
            print(f"⚠️ Episode {episode}: Skipping a malformed frame at step {steps}.")

    # Track stats
    reward_queue.append(total_reward)
    time_queue.append(steps)

    # Save video
    if frames:
        clip = ImageSequenceClip(frames, fps=30)
        video_path = f"{video_output_dir}/{model_name}_episode_{episode}.mp4"
        clip.write_videofile(video_path, fps=30)
        print(f"✅ Saved video for Episode {episode} - Reward: {total_reward}, Steps: {steps}")
    else:
        print(f"⚠️ Episode {episode}: No valid frames captured.")

# After all episodes
print("\n📊 Summary of Evaluation Episodes:")
for i, (r, t) in enumerate(zip(reward_queue, time_queue), start=1):
    print(f"Episode {i}: Reward = {r:.2f}, Steps = {t}")

print(f"\n✅ Average Reward: {np.mean(reward_queue):.2f} ± {np.std(reward_queue):.2f}")
print(f"✅ Average Steps: {np.mean(time_queue):.2f} ± {np.std(time_queue):.2f}")


🎥 Recording 5 episodes of the trained agent...
MoviePy - Building video videos/ppo_flappy_2000000_episode_1.mp4.
MoviePy - Writing video videos/ppo_flappy_2000000_episode_1.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready videos/ppo_flappy_2000000_episode_1.mp4
✅ Saved video for Episode 1 - Reward: 11.18974781036377, Steps: 631
MoviePy - Building video videos/ppo_flappy_2000000_episode_2.mp4.
MoviePy - Writing video videos/ppo_flappy_2000000_episode_2.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready videos/ppo_flappy_2000000_episode_2.mp4
✅ Saved video for Episode 2 - Reward: -2.7562689781188965, Steps: 50
MoviePy - Building video videos/ppo_flappy_2000000_episode_3.mp4.
MoviePy - Writing video videos/ppo_flappy_2000000_episode_3.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready videos/ppo_flappy_2000000_episode_3.mp4
✅ Saved video for Episode 3 - Reward: -0.27411776781082153, Steps: 50
MoviePy - Building video videos/ppo_flappy_2000000_episode_4.mp4.
MoviePy - Writing video videos/ppo_flappy_2000000_episode_4.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready videos/ppo_flappy_2000000_episode_4.mp4
✅ Saved video for Episode 4 - Reward: -2.387612819671631, Steps: 50
MoviePy - Building video videos/ppo_flappy_2000000_episode_5.mp4.
MoviePy - Writing video videos/ppo_flappy_2000000_episode_5.mp4



                                                                           

MoviePy - Done !
MoviePy - video ready videos/ppo_flappy_2000000_episode_5.mp4
✅ Saved video for Episode 5 - Reward: 33.7279052734375, Steps: 1457

📊 Summary of Evaluation Episodes:
Episode 1: Reward = 11.19, Steps = 631
Episode 2: Reward = -2.76, Steps = 50
Episode 3: Reward = -0.27, Steps = 50
Episode 4: Reward = -2.39, Steps = 50
Episode 5: Reward = 33.73, Steps = 1457

✅ Average Reward: 7.90 ± 13.89
✅ Average Steps: 447.60 ± 552.59
