In [None]:
%load_ext dotenv
%dotenv

import os

# Fetch database connection details from environment variables
db_name = os.getenv("DB_NAME")
db_user = os.getenv("DB_USER")
db_password = os.getenv("DB_PASSWORD")
db_host = os.getenv("DB_HOST")
db_port = os.getenv("DB_PORT")

# Construct the PostgreSQL connection URL
mysql_url = f"mysql+pymysql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"

In [4]:
import optuna
study = optuna.study.load_study(study_name='1M_steps', storage=mysql_url)

In [5]:
best_trial = study.best_trial
print(f"Best trial: {best_trial.number}")
print(f"Value: {best_trial.value}")
print("Params: ")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

Best trial: 26
Value: 111.9383544921875
Params: 
    batch_size: 1024
    n_steps: 32
    n_epochs: 5
    gamma: 0.9889492313175872
    gae_lambda: 0.8304325583515066
    ent_coef: 0.004264269589637711


In [6]:
df = study.trials_dataframe(attrs=("number", "value", "params", "state"))
print(df)

    number       value  params_batch_size  params_ent_coef  params_gae_lambda  \
0        0   94.101013               1024         0.003248           0.872390   
1        1 -148.231247               2048         0.004839           0.907993   
2        2 -124.188011               1024         0.000137           0.887406   
3        3         NaN               1024         0.000540           0.807726   
4        4         NaN               1024         0.005629           0.950725   
5        5   36.841469                512         0.000383           0.805506   
6        6 -241.252151               2048         0.001344           0.905305   
7        7   98.746216                512         0.004757           0.932952   
8        8  -20.672592                512         0.006298           0.811259   
9        9 -227.209183               2048         0.006272           0.921644   
10      10  -75.745880               2048         0.005699           0.945049   
11      11  -98.585014      

In [7]:
from optuna.visualization import (
    plot_contour,
    plot_edf,
    plot_intermediate_values,
    plot_parallel_coordinate,
    plot_rank,
    plot_slice,
    plot_timeline,
)

plot_optimization_history(study)



NameError: name 'plot_optimization_history' is not defined

In [None]:
plot_intermediate_values(study)


In [None]:
plot_parallel_coordinate(study)

In [None]:
plot_contour(study)


In [None]:
plot_slice(study)


In [None]:
plot_param_importances(study)


In [None]:
plot_edf(study)


In [None]:
plot_rank(study)


In [None]:
plot_timeline(study)

In [None]:

# Visualize optimization history and parameter importances
optimization_history_figure = plot_optimization_history(study)
optimization_history_figure.write_image("optimization_history.png")
mlflow.log_artifact("optimization_history.png")

param_importance_figure = plot_param_importances(study)
param_importance_figure.write_image("param_importances.png")
mlflow.log_artifact("param_importances.png")


In [None]:
env = gym.make("LunarLander-v2")
env.reset()
print("Observation Space Shape", env.observation_space.shape)
print("Action Space Shape", env.action_space.n)
env.close()

## Benchmark CPU vs GPU

In [None]:
import time

import gymnasium as gym
import torch

TOTAL_TIMESTEPS = 100000  # Short benchmark to measure FPS

def measure_fps(device):
    """
    Measure FPS on the given device (cpu or cuda).
    """
    # Create environment
    env = make_vec_env("LunarLander-v2", n_envs=1)

    # Initialize the model on the specified device
    model = PPO('MlpPolicy', env, device=device)

    # Start timer
    start_time = time.time()

    # Train for a small number of timesteps (benchmark)
    model.learn(total_timesteps=TOTAL_TIMESTEPS)

    # End timer
    end_time = time.time()

    # Calculate FPS (frames per second)
    elapsed_time = end_time - start_time
    fps = TOTAL_TIMESTEPS / elapsed_time

    # Clean up
    env.close()

    return fps

# Check if GPU is available
gpu_available = torch.cuda.is_available()

# Measure FPS on CPU
cpu_fps = measure_fps(device="cpu")
print(f"CPU FPS: {cpu_fps:.2f}")

# Measure FPS on GPU (if available)
if gpu_available:
    gpu_fps = measure_fps(device="cuda")
    print(f"GPU FPS: {gpu_fps:.2f}")
else:
    gpu_fps = 0
    print("GPU is not available.")

# Select the device with the higher FPS
if gpu_fps > cpu_fps:
    print("Using GPU for training.")
    chosen_device = "cuda"
else:
    print("Using CPU for training.")
    chosen_device = "cpu"



## Examples

In [None]:
# TODO: Evaluate the agent with this instead

# Create a new environment for evaluation
eval_env = Monitor(gym.make("LunarLander-v2"))

# Evaluate the model with 10 evaluation episodes and deterministic=True
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)

# Print the results
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

In [None]:
import gymnasium as gym
from huggingface_sb3 import package_to_hub
from stable_baselines3.common.vec_env import DummyVecEnv

# PLACE the variables you've just defined two cells above
# Define the name of the environment
env_id = "LunarLander-v2"

# TODO: Define the model architecture we used
model_architecture = "PPO"

## Define a repo_id
## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
## CHANGE WITH YOUR REPO ID
repo_id = "ThomasSimonini/ppo-LunarLander-v2"  # Change with your repo id, you can't push with mine 😄

## Define the commit message
commit_message = "Upload PPO LunarLander-v2 trained agent"

# Create the evaluation env and set the render_mode="rgb_array"
eval_env = DummyVecEnv([lambda: Monitor(gym.make(env_id, render_mode="rgb_array"))])

# PLACE the package_to_hub function you've just filled here
package_to_hub(
    model=model,  # Our trained model
    model_name=model_name,  # The name of our trained model
    model_architecture=model_architecture,  # The model architecture we used: in our case PPO
    env_id=env_id,  # Name of the environment
    eval_env=eval_env,  # Evaluation Environment
    repo_id=repo_id,  # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
    commit_message=commit_message,
)