In [1]:
# Implementation Parameters
run_no = 0
gpu_id = 1


# compression ratio for each frame
compress_ratio = 0.0

# Hyperparameters
# The different number of processes that will be used
NUM_ENVS = 16
TRAIN_STEPS = 3E7 # should result in 12E7 timesteps

mother_seed = 20230618

In [2]:
import torch
torch.cuda.set_device(gpu_id)

In [3]:
import os
import sys
import git
import pathlib

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

PROJ_ROOT_PATH = pathlib.Path(git.Repo('.', search_parent_directories=True).working_tree_dir)
PROJ_ROOT =  str(PROJ_ROOT_PATH)
if PROJ_ROOT not in sys.path:
    sys.path.append(PROJ_ROOT)

print(f"Project Root Directory: {PROJ_ROOT}")

Project Root Directory: /repos/drl_csense


In [4]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import gymnasium as gym
from stable_baselines3 import A2C

In [7]:
from lib.folder_paths import get_exp_name_tag, deconstruct_exp_name, makeget_logging_dir
from lib.env_utils import make_atari_env_Compressed_VecFrameStack, SaveBestModelCallback
from lib.vizresults import plot_single_run_rewards

In [8]:
env_id = "BreakoutNoFrameskip-v4"

exp_param_type = "compress" 
exp_param_value = compress_ratio

# Get names and tags of experiment
exp_name, exp_metaname, exp_tag = get_exp_name_tag(env_id, exp_param_type, exp_param_value)

In [9]:
# Get directories
models_dir, log_dir, gif_dir, image_dir = makeget_logging_dir(exp_name)

In [10]:
# ALgorithm to use
ALGO = A2C

In [11]:
print("-------")
print(f"RUN: {run_no}")

# Log directory for each run of the experiment
run_log_dir = f"{log_dir}/run_{run_no}"
best_model_path = f"{models_dir}/{exp_name}-run_{run_no}-best"

# Create the callback: check every 1000 steps
callback = SaveBestModelCallback(check_freq=1E4, 
                                log_dir=run_log_dir,
                                save_path=best_model_path,
                                verbose=0)

# Make vector environment
env = make_atari_env_Compressed_VecFrameStack(env_id,
                                                n_envs=NUM_ENVS,
                                                monitor_dir=run_log_dir,
                                                seed=mother_seed+run_no,
                                                compress_ratio=compress_ratio)
# Create RL model
model = A2C("CnnPolicy", env, verbose=0)

# Train the agent
model.learn(total_timesteps=TRAIN_STEPS, 
            progress_bar=False, 
            callback=callback)

-------
RUN: 0


A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


Num timesteps: 3200
Best mean reward: -inf - Last mean reward per episode: 0.60
Saving new best model to /repos/drl_csense/models/BreakoutNoFrameskip-v4--compress/BreakoutNoFrameskip-v4--compress_0.0/BreakoutNoFrameskip-v4--compress_0.0-run_0-best.zip
Num timesteps: 4800
Best mean reward: 0.60 - Last mean reward per episode: 0.69
Saving new best model to /repos/drl_csense/models/BreakoutNoFrameskip-v4--compress/BreakoutNoFrameskip-v4--compress_0.0/BreakoutNoFrameskip-v4--compress_0.0-run_0-best.zip
Num timesteps: 6400
Best mean reward: 0.69 - Last mean reward per episode: 0.81
Saving new best model to /repos/drl_csense/models/BreakoutNoFrameskip-v4--compress/BreakoutNoFrameskip-v4--compress_0.0/BreakoutNoFrameskip-v4--compress_0.0-run_0-best.zip
Num timesteps: 8000
Best mean reward: 0.81 - Last mean reward per episode: 0.84
Saving new best model to /repos/drl_csense/models/BreakoutNoFrameskip-v4--compress/BreakoutNoFrameskip-v4--compress_0.0/BreakoutNoFrameskip-v4--compress_0.0-run_0-b

KeyboardInterrupt: 

In [None]:
# Save the final agent
model.save(f"{models_dir}/{exp_name}-run_{run_no}")

In [None]:
# save training plot
from lib.vizresults import plot_single_run_rewards
window=100
plot_single_run_rewards(exp_name, run_no, window, savefig=True)