<a href="https://colab.research.google.com/github/shubhamt2897/Gymnasium_Robotics_Tutorial/blob/main/Fetch_Slide_SAC_hyperparameters.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# 1. Pin NumPy to our standard, stable version FIRST for maximum compatibility
!pip install "numpy==1.26.4"

# 2. Install Optuna for tuning and all our other required libraries
!pip install -U gymnasium gymnasium-robotics stable-baselines3["extra"] mujoco optuna plotly

Collecting gymnasium-robotics
  Using cached gymnasium_robotics-1.3.1-py3-none-any.whl.metadata (8.7 kB)
Collecting mujoco
  Using cached mujoco-3.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (44 kB)
Collecting optuna
  Using cached optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting plotly
  Using cached plotly-6.1.2-py3-none-any.whl.metadata (6.9 kB)
Collecting stable-baselines3[extra]
  Using cached stable_baselines3-2.6.0-py3-none-any.whl.metadata (4.8 kB)
Collecting mujoco
  Using cached mujoco-3.1.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (44 kB)
Collecting PettingZoo>=1.23.0 (from gymnasium-robotics)
  Using cached pettingzoo-1.25.0-py3-none-any.whl.metadata (8.9 kB)
Collecting glfw (from mujoco)
  Using cached glfw-2.9.0-py2.py27.py3.py30.py31.py32.py33.py34.py35.py36.py37.py38.p39.p310.p311.p312.p313-none-manylinux_2_28_x86_64.whl.metadata (5.4 kB)
Collecting alembic>=1.5.0 (from optuna)
  Using cached alembic-1.16.2

In [2]:
# Import all necessary libraries
import gymnasium as gym
import gymnasium_robotics
from stable_baselines3 import SAC, HerReplayBuffer
import optuna
import torch as th
import numpy as np
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv

# --- Configuration ---
ENV_ID = "FetchSlide-v3"
# Let's increase the number of trials since we have a GPU
N_TRIALS = 50
# Training timesteps for EACH trial. Keep this relatively low to finish faster.
N_TIMESTEPS = 25000
# Number of episodes to evaluate EACH trained model
N_EVAL_EPISODES = 25


# --- The Objective Function for Optuna ---
def objective(trial: optuna.Trial) -> float:
    """
    Trains and evaluates an SAC model with hyperparameters suggested by Optuna.
    """
    print(f"\n--- Starting Trial #{trial.number} ---")

    # 1. Suggest Hyperparameters
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-3, log=True)
    net_arch_str = trial.suggest_categorical("net_arch", ["small", "medium", "big"])
    net_arch_map = {"small": [64, 64], "medium": [128, 128], "big": [256, 256]}
    net_arch = net_arch_map[net_arch_str]
    policy_kwargs = dict(net_arch=net_arch)

    # 2. Create and Train the Model
    train_env = gym.make(ENV_ID)
    replay_buffer_class = HerReplayBuffer
    replay_buffer_kwargs = dict(n_sampled_goal=4, goal_selection_strategy="future")

    model = SAC(
        "MultiInputPolicy",
        train_env,
        learning_rate=learning_rate,
        policy_kwargs=policy_kwargs,
        replay_buffer_class=replay_buffer_class,
        replay_buffer_kwargs=replay_buffer_kwargs,
        verbose=0,
        device="cuda"
    )

    model.learn(total_timesteps=N_TIMESTEPS)

    # 3. Evaluate the Model
    eval_env = gym.make(ENV_ID)
    successful_episodes = 0
    for _ in range(N_EVAL_EPISODES):
        obs, _ = eval_env.reset()
        done = False
        while not done:
            action, _ = model.predict(obs, deterministic=True)
            obs, _, terminated, truncated, info = eval_env.step(action)
            done = terminated or truncated
            if done and info.get('is_success'):
                successful_episodes += 1

    eval_env.close()
    success_rate = successful_episodes / N_EVAL_EPISODES
    print(f"Trial #{trial.number} Finished. Success Rate: {success_rate:.2f}")

    # 4. Return the score
    return success_rate

# --- Start the Tuning Process ---
# Create the study and start the optimization
study = optuna.create_study(direction="maximize")
# We can set a timeout to avoid exceeding Colab's limits, e.g., 2 hours (7200s)
study.optimize(objective, n_trials=N_TRIALS, timeout=7200)

# --- Print the Best Results ---
print("\n--- Hyperparameter Tuning Complete ---")
print(f"Number of finished trials: {len(study.trials)}")
print("Best trial:")
best_trial = study.best_trial
print(f"  Value (Success Rate): {best_trial.value:.4f}")
print("  Params: ")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

[I 2025-06-22 10:39:27,965] A new study created in memory with name: no-name-17c26e2e-82de-42e4-878b-b1c5cbca2058



--- Starting Trial #0 ---


[I 2025-06-22 10:46:47,282] Trial 0 finished with value: 0.0 and parameters: {'learning_rate': 0.0003339156398126654, 'net_arch': 'big'}. Best is trial 0 with value: 0.0.


Trial #0 Finished. Success Rate: 0.00

--- Starting Trial #1 ---


[I 2025-06-22 10:53:53,814] Trial 1 finished with value: 0.0 and parameters: {'learning_rate': 3.6799030526689e-05, 'net_arch': 'big'}. Best is trial 0 with value: 0.0.


Trial #1 Finished. Success Rate: 0.00

--- Starting Trial #2 ---


[I 2025-06-22 11:01:25,809] Trial 2 finished with value: 0.0 and parameters: {'learning_rate': 0.000861910787043841, 'net_arch': 'medium'}. Best is trial 0 with value: 0.0.


Trial #2 Finished. Success Rate: 0.00

--- Starting Trial #3 ---


[I 2025-06-22 11:08:45,933] Trial 3 finished with value: 0.0 and parameters: {'learning_rate': 2.008294584526385e-05, 'net_arch': 'medium'}. Best is trial 0 with value: 0.0.


Trial #3 Finished. Success Rate: 0.00

--- Starting Trial #4 ---


[I 2025-06-22 11:16:16,522] Trial 4 finished with value: 0.0 and parameters: {'learning_rate': 0.0002571529659020239, 'net_arch': 'small'}. Best is trial 0 with value: 0.0.


Trial #4 Finished. Success Rate: 0.00

--- Starting Trial #5 ---


[I 2025-06-22 11:23:38,922] Trial 5 finished with value: 0.0 and parameters: {'learning_rate': 2.74906329065569e-05, 'net_arch': 'medium'}. Best is trial 0 with value: 0.0.


Trial #5 Finished. Success Rate: 0.00

--- Starting Trial #6 ---


[I 2025-06-22 11:30:50,598] Trial 6 finished with value: 0.0 and parameters: {'learning_rate': 1.9592461759987165e-05, 'net_arch': 'medium'}. Best is trial 0 with value: 0.0.


Trial #6 Finished. Success Rate: 0.00

--- Starting Trial #7 ---


[I 2025-06-22 11:38:04,198] Trial 7 finished with value: 0.0 and parameters: {'learning_rate': 2.08155614995188e-05, 'net_arch': 'medium'}. Best is trial 0 with value: 0.0.


Trial #7 Finished. Success Rate: 0.00

--- Starting Trial #8 ---


[I 2025-06-22 11:45:21,546] Trial 8 finished with value: 0.0 and parameters: {'learning_rate': 1.9756257627009703e-05, 'net_arch': 'medium'}. Best is trial 0 with value: 0.0.


Trial #8 Finished. Success Rate: 0.00

--- Starting Trial #9 ---


[I 2025-06-22 11:52:33,048] Trial 9 finished with value: 0.0 and parameters: {'learning_rate': 2.702912177563251e-05, 'net_arch': 'medium'}. Best is trial 0 with value: 0.0.


Trial #9 Finished. Success Rate: 0.00

--- Starting Trial #10 ---


[I 2025-06-22 11:59:46,871] Trial 10 finished with value: 0.0 and parameters: {'learning_rate': 0.0001587333066556646, 'net_arch': 'big'}. Best is trial 0 with value: 0.0.


Trial #10 Finished. Success Rate: 0.00

--- Starting Trial #11 ---


[I 2025-06-22 12:07:06,316] Trial 11 finished with value: 0.0 and parameters: {'learning_rate': 7.792065043874267e-05, 'net_arch': 'big'}. Best is trial 0 with value: 0.0.


Trial #11 Finished. Success Rate: 0.00

--- Starting Trial #12 ---


[I 2025-06-22 12:14:34,529] Trial 12 finished with value: 0.0 and parameters: {'learning_rate': 0.00042551219566087223, 'net_arch': 'big'}. Best is trial 0 with value: 0.0.


Trial #12 Finished. Success Rate: 0.00

--- Starting Trial #13 ---


[I 2025-06-22 12:21:53,519] Trial 13 finished with value: 0.0 and parameters: {'learning_rate': 6.686722368638437e-05, 'net_arch': 'big'}. Best is trial 0 with value: 0.0.


Trial #13 Finished. Success Rate: 0.00

--- Starting Trial #14 ---


[I 2025-06-22 12:29:15,816] Trial 14 finished with value: 0.0 and parameters: {'learning_rate': 4.788921778519387e-05, 'net_arch': 'big'}. Best is trial 0 with value: 0.0.


Trial #14 Finished. Success Rate: 0.00

--- Starting Trial #15 ---


[I 2025-06-22 12:36:35,471] Trial 15 finished with value: 0.0 and parameters: {'learning_rate': 1.0070539616642638e-05, 'net_arch': 'small'}. Best is trial 0 with value: 0.0.


Trial #15 Finished. Success Rate: 0.00

--- Starting Trial #16 ---


[I 2025-06-22 12:43:59,605] Trial 16 finished with value: 0.0 and parameters: {'learning_rate': 0.00016026666117837155, 'net_arch': 'big'}. Best is trial 0 with value: 0.0.


Trial #16 Finished. Success Rate: 0.00

--- Hyperparameter Tuning Complete ---
Number of finished trials: 17
Best trial:
  Value (Success Rate): 0.0000
  Params: 
    learning_rate: 0.0003339156398126654
    net_arch: big


In [3]:
# --- Visualize the Results ---
# Requires the 'plotly' library we installed earlier

# Show the optimization history
fig1 = optuna.visualization.plot_optimization_history(study)
fig1.show()

# Show the parameter importance
fig2 = optuna.visualization.plot_param_importances(study)
fig2.show()

# Show slice plots to see how each parameter affects the outcome
fig3 = optuna.visualization.plot_slice(study, params=["learning_rate", "net_arch"])
fig3.show()

RuntimeError: Encountered zero total variance in all trees.