## Install Packages

In [4]:
!apt-get install -y swig
!pip install box2d-py pygame gymnasium[box2d] stable-baselines3[extra]

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
swig is already the newest version (4.0.2-1ubuntu1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


## Import Lib

In [5]:
import numpy as np
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack


## Train PPO Agent

In [6]:
env = make_vec_env(
    "CarRacing-v3",
    n_envs=1,
    env_kwargs={"render_mode": "rgb_array"},
)
env = VecFrameStack(env, n_stack=4)

model = PPO("CnnPolicy", env, verbose=1)

model.learn(total_timesteps=100_000)

model.save("ppo_carracing_model")
env.close()

  from pkg_resources import resource_stream, resource_exists
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)


Using cuda device
Wrapping the env in a VecTransposeImage.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -55.9    |
| time/              |          |
|    fps             | 79       |
|    iterations      | 1        |
|    time_elapsed    | 25       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | -53.6       |
| time/                   |             |
|    fps                  | 71          |
|    iterations           | 2           |
|    time_elapsed         | 57          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.008728069 |
|    clip_fraction        | 0.0931      |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.26       |
|    explaine

## Test PPO Agent

In [7]:
test_env = make_vec_env(
    "CarRacing-v3",
    n_envs=1,
    env_kwargs={"render_mode": "rgb_array"},
)
test_env = VecFrameStack(test_env, n_stack=4)

model = PPO.load("ppo_carracing_model", env=test_env)

n_episodes = 5
scores = []

for ep in range(n_episodes):
    obs = test_env.reset()
    done = False
    total_reward = 0

    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, info = test_env.step(action)
        total_reward += reward[0]

    scores.append(total_reward)
    print(f"🎯 Episode {ep+1}: Score = {total_reward:.2f}")

test_env.close()

mean_score = np.mean(scores)
print(f"\n✅ Average Score over {n_episodes} episodes: {mean_score:.2f}")

Wrapping the env in a VecTransposeImage.
🎯 Episode 1: Score = 826.74
🎯 Episode 2: Score = 523.29
🎯 Episode 3: Score = 615.66
🎯 Episode 4: Score = 305.90
🎯 Episode 5: Score = 252.74

✅ Average Score over 5 episodes: 504.86
