    # Importing Libraries

In [1]:
import datetime
import multiprocessing
import random
import time

import torch
import wandb
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import CallbackList, EvalCallback
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack, VecTransposeImage
from wandb.integration.sb3 import WandbCallback
from feature_extraction.callbacks.wandb_on_training_end_callback import WandbOnTrainingEndCallback
from feature_extraction.feature_extractors.resnet.block_feature_extractor import BlockFeatureExtractor
from feature_extraction.wrappers.vec_feature_extractor import VecFeatureExtractor
from utils import linear_schedule, make_resnet_atari_env
from collections import OrderedDict

from torchvision.models import ResNet50_Weights, resnet50

2024-04-29 13:43:05.944765: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-29 13:43:05.964384: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-29 13:43:05.964404: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-29 13:43:05.964901: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-29 13:43:05.968199: I tensorflow/core/platform/cpu_feature_guar

# Method for running a single experiment with a configuration

In [2]:
def run_experiment(config):
    wandb.login()

    # Initialize the wandb run
    wandb.init(project="ablation_study",
                     name="sweep",
                     config=config, save_code=True,
                     sync_tensorboard=True)
    
    config = wandb.config
    
    log_dir = f"logs/sweep"
    #feature_extractor = StageFeatureExtractor()
    
    model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Device: ", device)
    model.to(device)
    feature_extractor = BlockFeatureExtractor(model, 16, log_dir, log_to_wandb=True)
    
    # Create Evaluation Environment
    vec_eval_env = make_resnet_atari_env(
        "BreakoutNoFrameskip-v4",
        n_envs=8,
        seed=14,
    )
    
    print("------- PRINTING WRAPPER OBSERVATION SPACES ------")
    print("original_observation_space", vec_eval_env.observation_space.shape)
    vec_eval_env = VecTransposeImage(vec_eval_env)
    print("vec_transpose_obs_space", vec_eval_env.observation_space.shape)

    vec_eval_env = VecFrameStack(vec_eval_env, n_stack=4)
    print("vec_frame_stack_obs_space", vec_eval_env.observation_space.shape)

    vec_eval_env = VecFeatureExtractor(vec_eval_env, feature_extractor, n_stacks=4)
    print("vec_feature_extractor_obs_space", vec_eval_env.observation_space.shape)
    print("------- FINISHED PRINTING WRAPPER OBSERVATION SPACES ------")

    
    # Create Training Environment    
    vec_train_env = make_resnet_atari_env("BreakoutNoFrameskip-v4", n_envs=8, seed=12)
    vec_train_env = VecTransposeImage(vec_train_env)
    vec_train_env = VecFrameStack(vec_train_env, n_stack=4)
    vec_train_env = VecFeatureExtractor(vec_train_env, feature_extractor, n_stacks=4)

    # Define the keys for PPO-specific hyperparameters
    ppo_params_keys = [
        'batch_size',
        'ent_coef',
        'n_epochs',
        'n_steps',
        'vf_coef',
        'normalize_advantage',
    ]

    # Filter the config dictionary to extract only the PPO hyperparameters
    ppo_hyperparams = {key: config[key] for key in ppo_params_keys if key in config}

    # Additional hyperparameters not in the initial filter that require custom handling
    learning_rate_schedule = linear_schedule(2.5e-4)
    clip_range_schedule = linear_schedule(0.1)

    # Instantiate the PPO model with the specified hyperparameters and environment
    model = PPO(
        policy="MlpPolicy",
        **ppo_hyperparams,
        learning_rate=learning_rate_schedule,
        clip_range=clip_range_schedule,
        env=vec_train_env,
        verbose=1,
        tensorboard_log=f"{log_dir}",
    )


    wandb.agent(sweep_id, model.learn(total_timesteps=10_000, callback=WandbCallback), count=3)

    wandb.finish()

# Method for running multiple experiments with a configuration

# Run experiments

In [3]:
sweep_config = {
    'method': 'bayes',  # Bayesian optimization
    'metric': {
        'name': 'ep_rew_mean',  # Metric to optimize
        'goal': 'maximize'  # Goal: maximize mean reward
    },
    'parameters': {
        'learning_rate': {
            'distribution': 'uniform',
            'min': 0.0001,
            'max': 0.001
        },
        'batch_size': {
            'distribution': 'q_log_uniform_values',
            'q': 1,
            'min': 64,
            'max': 512
        },
        'clip_range': {
            'distribution': 'uniform',
            'min': 0.1,
            'max': 0.2
        },
        'n_steps': {
            'distribution': 'q_log_uniform_values',
            'q': 1,
            'min': 64,
            'max': 2048
        },
        'n_epochs': {
            'values': [3, 4, 5, 6]
        },
        'ent_coef': {
            'distribution': 'log_uniform_values',
            'min': 0.0001,
            'max': 0.1
        },
        'vf_coef': {
            'distribution': 'uniform',
            'min': 0.1,
            'max': 1.0
        }
    }
}

sweep_id = wandb.sweep(sweep_config, project="ablation_study")

run_experiment(sweep_config);

Create sweep with ID: ecrx1yhp
Sweep URL: https://wandb.ai/feature_extraction/ablation_study/sweeps/ecrx1yhp


[34m[1mwandb[0m: Currently logged in as: [33madicreson[0m ([33mfeature_extraction[0m). Use [1m`wandb login --relogin`[0m to force relogin


Device:  cuda
Resnet Device is: cuda


A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


------- PRINTING WRAPPER OBSERVATION SPACES ------
original_observation_space (224, 224, 3)
vec_transpose_obs_space (3, 224, 224)
vec_frame_stack_obs_space (12, 224, 224)
vec_feature_extractor_obs_space (1, 8192)
------- FINISHED PRINTING WRAPPER OBSERVATION SPACES ------
Using cuda device
Logging to logs/sweep/PPO_1


2024-04-29 13:43:14.610002: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-29 13:43:14.632513: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-29 13:43:14.632539: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-29 13:43:14.633100: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-29 13:43:14.636634: I tensorflow/core/platform/cpu_feature_guar

TypeError: expected str, bytes or os.PathLike object, not dict