<a href="https://colab.research.google.com/github/kuds/rl-unity-soccer/blob/main/%5BAtari%20Tennis%5D%20Reinforcement%20Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install swig

Collecting swig
  Downloading swig-4.2.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (3.6 kB)
Downloading swig-4.2.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: swig
Successfully installed swig-4.2.1


In [None]:
!pip install gymnasium gymnasium[atari] stable_baselines3 pettingzoo multi-agent-ale-py

Collecting multi-agent-ale-py
  Downloading multi-agent-ale-py-0.1.11.tar.gz (551 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m552.0/552.0 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: multi-agent-ale-py


In [None]:
"""
To run this code, make sure you have the necessary packages installed:

pip install "gymnasium[atari,accept-rom-license]"
pip install "stable-baselines3[extra]"

This code trains an agent using PPO to play Atari Tennis.
"""

import gymnasium as gym
from pettingzoo.atari import tennis_v3
from stable_baselines3 import PPO
from stable_baselines3.common.atari_wrappers import (
    NoopResetEnv, MaxAndSkipEnv, EpisodicLifeEnv,
    FireResetEnv, WarpFrame, ClipRewardEnv
)
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
import time

def make_env(env_id):
    """
    Creates and wraps the Atari environment.
    """
    env = tennis_v3.env(render_mode="human")
    env.reset(seed=42)
    #env = gym.make(env_id, render_mode='rgb_array')
    env = NoopResetEnv(env, noop_max=30)
    env = MaxAndSkipEnv(env, skip=4)
    env = EpisodicLifeEnv(env)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    env = WarpFrame(env)
    env = ClipRewardEnv(env)
    return env

def main():
    # Environment ID for Atari Tennis
    env_id = "ALE/Tennis-v5"

    # Number of parallel environments (increase for faster training)
    num_envs = 8  # You can adjust this number

    # Create the vectorized environment
    env = DummyVecEnv([lambda: make_env(env_id) for _ in range(num_envs)])

    # Stack frames (for temporal information)
    env = VecFrameStack(env, n_stack=4)

    # Create the PPO agent with CNN policy (since observations are images)
    model = PPO("CnnPolicy", env, verbose=1)

    # Train the agent
    total_timesteps = 10_000_000  # Adjust as needed
    model.learn(total_timesteps=total_timesteps)

    # Save the model
    model.save("ppo_atari_tennis")

    # Close the environment
    env.close()

    # Evaluate the trained agent
    evaluate_agent()

def evaluate_agent():
    # Create the environment for evaluation
    env_id = "ALE/Tennis-v5"
    env = gym.make(env_id, render_mode='human')

    # Apply necessary wrappers
    env = NoopResetEnv(env, noop_max=30)
    env = MaxAndSkipEnv(env, skip=4)
    env = EpisodicLifeEnv(env)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    env = WarpFrame(env)
    env = ClipRewardEnv(env)

    # Stack frames
    env = DummyVecEnv([lambda: env])
    env = VecFrameStack(env, n_stack=4)

    # Load the trained model
    model = PPO.load("ppo_atari_tennis")

    obs = env.reset()
    while True:
        action, _ = model.predict(obs)
        obs, rewards, dones, infos = env.step(action)
        # Rendering is handled by the environment when render_mode='human'
        if dones:
            obs = env.reset()

    env.close()

if __name__ == "__main__":
    main()
