In [1]:
import os
import gymnasium as gym
import numpy as np
import cv2
from gymnasium.wrappers import TransformObservation
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
from gymnasium.spaces import Box

# ===============================================
# Environment Setup
# ===============================================

# Grayscale + Resize function to preprocess the observation
def preprocess_obs(obs):
    """
    Converts a raw RGB observation from the environment to a single-channel,
    resized grayscale image.
    """
    gray = cv2.cvtColor(obs, cv2.COLOR_RGB2GRAY)
    resized = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_AREA)
    return np.expand_dims(resized, axis=-1).astype(np.uint8)

# Define the new observation space
new_obs_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)

# Factory function to create the wrapped environment
def make_env():
    """
    Creates and returns a single instance of the CarRacing-v3 environment
    with the necessary wrappers for preprocessing and monitoring.
    """
    env = gym.make("CarRacing-v3", render_mode="rgb_array", continuous=True)
    env = TransformObservation(env, func=preprocess_obs, observation_space=new_obs_space)
    env = Monitor(env)
    return env

# Create a vectorized environment from the factory function
env = DummyVecEnv([make_env])

# Stack 4 frames together to capture motion over time
env = VecFrameStack(env, n_stack=4, channels_order="last")

  from pkg_resources import resource_stream, resource_exists


In [2]:
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3 import PPO
import optuna

def objective(trial: optuna.Trial) -> float:
    """
    Objective function for Optuna to optimize PPO hyperparameters.
    """
    # Define the search space for PPO hyperparameters
    # You can choose which hyperparameters you want to tune
    # and what their search ranges should be.
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-3)
    n_steps = trial.suggest_int('n_steps', 2048, 8192, log=True)
    gamma = trial.suggest_uniform('gamma', 0.9, 0.999)
    ent_coef = trial.suggest_uniform('ent_coef', 0.0, 0.1)
    clip_range = trial.suggest_uniform('clip_range', 0.1, 0.4)
    batch_size = trial.suggest_int('batch_size', 64, 512, log=True)
    n_epochs = trial.suggest_int('n_epochs', 5, 20)

    # Note: For CarRacing, a CNN policy is used. You can also tune network architecture,
    # but let's start with the standard PPO hyperparameters first.
    
    # Create the PPO model with the suggested hyperparameters
    # Ensure you are using the correct policy (CnnPolicy) for the CarRacing environment
    model = PPO("CnnPolicy", 
                env, 
                learning_rate=learning_rate,
                n_steps=n_steps,
                gamma=gamma,
                ent_coef=ent_coef,
                clip_range=clip_range,
                batch_size=batch_size,
                n_epochs=n_epochs,
                verbose=0) # Set verbose to 0 to avoid printing too much info

    # Set up evaluation and pruning
    # It's crucial to evaluate the model periodically and report the score to Optuna.
    # Optuna can then use a pruner to stop unpromising trials early.
    eval_env = DummyVecEnv([make_env])
    eval_env = VecFrameStack(eval_env, n_stack=4, channels_order="last")
    
    # A custom callback is often used for this. Stable-Baselines3 provides an EvalCallback.
    # We will use this to evaluate the policy and report to Optuna.
    # You can also use a custom callback that integrates with Optuna's pruning.
    
    # A simple approach is to use evaluate_policy at the end of the training
    try:
        model.learn(total_timesteps=20_000, progress_bar=True) # Train for a set number of timesteps per trial
        mean_reward, _ = evaluate_policy(model, eval_env, n_eval_episodes=5, deterministic=True)
    finally:
        eval_env.close()

    return mean_reward

In [3]:
# Create the Optuna study with pruning enabled
study = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler(seed=42),
    pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=1)
)

# Optimize the objective
study.optimize(objective, n_trials=50, n_jobs=-1)

# %%
# Print best result
print("Number of finished trials:", len(study.trials))
print("Best trial:", study.best_trial.params)
print(f"Best mean reward: {study.best_value}")

# Visualize the results
optuna.visualization.plot_optimization_history(study).show()
optuna.visualization.plot_param_importances(study).show()

[I 2025-08-02 13:32:01,967] A new study created in memory with name: no-name-7afb64d3-8264-412d-8ad4-1956afc44e5d
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-3)
  gamma = trial.suggest_uniform('gamma', 0.9, 0.999)
  ent_coef = trial.suggest_uniform('ent_coef', 0.0, 0.1)
  clip_range = trial.suggest_uniform('clip_range', 0.1, 0.4)
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7186 and n_envs=1)
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=2806 and n_envs=1)
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=3356 and n_envs=1)
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=2292 and n_envs=1)


: 

In [None]:
# Create the Optuna study. We want to maximize the mean reward.
study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=42))

# Run the optimization
# n_trials is the number of hyperparameter combinations to try.
# You can increase this for a more thorough search.
study.optimize(objective, n_trials=50, n_jobs=-1) # n_jobs=-1 to use all cores

print("Number of finished trials:", len(study.trials))
print("Best trial:", study.best_trial.params)
print(f"Best mean reward: {study.best_value}")

# Visualize the results
optuna.visualization.plot_optimization_history(study).show()
optuna.visualization.plot_param_importances(study).show()