<a href="https://colab.research.google.com/github/kuds/rl-car-racing/blob/main/%5BCar%20Racing%5D%20Proximal%20Policy%20Optimization%20(PPO).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install swig moviepy



In [2]:
!pip install gymnasium gymnasium[box2d] stable_baselines3 rl_zoo3



In [3]:
import gymnasium
from gymnasium.wrappers import RecordVideo

import stable_baselines3
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import VecFrameStack, VecVideoRecorder
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
from stable_baselines3.common.vec_env import VecTransposeImage
from stable_baselines3.common.evaluation import evaluate_policy

import platform

import rl_zoo3
import torch
import time
torch.backends.cudnn.benchmark = True

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [4]:
print("Python Version: {}".format(platform.python_version()))
print("Is Cuda Available: {}".format(torch.cuda.is_available()))
print("Torch Version: {}".format(torch.__version__))
print("Cuda Version: {}".format(torch.version.cuda))
print("Stable Baseline Version: {}".format(stable_baselines3.__version__))
print("RL Zoo Version: {}".format(rl_zoo3.__version__))
print("Gymnasium Version: {}".format(gymnasium.__version__))

Python Version: 3.10.12
Is Cuda Available: True
Torch Version: 2.3.1+cu121
Cuda Version: 12.1
Stable Baseline Version: 2.3.2
RL Zoo Version: 2.3.0
Gymnasium Version: 0.29.1


In [None]:
# Create CarRacing environment

#env = gymnasium.make("CarRacing-v2", render_mode="rgb_array")
env = make_vec_env("CarRacing-v2", n_envs=32)
env = VecFrameStack(env, n_stack=4)
#env = VecTransposeImage(env)

#env_val = gymnasium.make("CarRacing-v2", render_mode="rgb_array")
env_val = make_vec_env("CarRacing-v2", n_envs=1)
env_val = VecFrameStack(env_val, n_stack=4)
env_val = VecTransposeImage(env_val)

eval_callback = EvalCallback(env_val, best_model_save_path="./logs/", log_path="./logs/", eval_freq=500, render=False)

# Initialize PPO
model = PPO('CnnPolicy', env, verbose=1)

# Train the model
model.learn(total_timesteps=500000,  progress_bar=True, callback=eval_callback)

# Save the model
model.save("ppo_car_racing")

mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1)
print(f"Mean reward: {mean_reward:.2f} +/- {std_reward:.2f}")

In [None]:
# prompt: Write code to get the best model and run gymnasium's Car racing against it. Save the results to a gif

# Load the best model
env = make_vec_env("CarRacing-v2", n_envs=1)
env = VecFrameStack(env, n_stack=4)
env = VecTransposeImage(env)
best_model_path = "./logs/best_model.zip"
best_model = PPO.load(best_model_path, env=env)

# Record video of the best model playing CarRacing
env = VecVideoRecorder(env, "./videos/", record_video_trigger=lambda x: x == 0, video_length=1000, name_prefix="best_model_car_racing")

obs = env.reset()
for _ in range(1000):
    action, _states = best_model.predict(obs, deterministic=True)
    obs, rewards, dones, info = env.step(action)
    env.render()

env.close()