In [1]:
# import argparse
import datetime

start = datetime.datetime.now()

# # define arguments
# parser = argparse.ArgumentParser()
# parser.add_argument('--env_id', type=str)
# parser.add_argument('--exp_param_type', type=str) # "compress"
# parser.add_argument('--exp_param_value', type=float)
# parser.add_argument('--run_no', type=int)
# parser.add_argument('--n_envs', type=int)
# parser.add_argument('--run_seed', type=int)
# parser.add_argument('--trainsteps', type=int)
# parser.add_argument('--gpu_id', type=int)

# args= parser.parse_args()

# env_id = args.env_id
# exp_param_type = args.exp_param_type # "compress"
# exp_param_value = args.exp_param_value # compression ratio for each frame
# compress_ratio = exp_param_value
# run_no = args.run_no
# gpu_id = args.gpu_id
# NUM_ENVS = args.n_envs # The different number of processes that will be used
# TRAIN_STEPS = args.trainsteps # TRAIN_STEPS = 3E7 should result in 12E7 timesteps due to VecStack=4
# run_seed = args.run_seed

In [2]:
env_id = 'BipedalWalker-v3'
exp_param_type = "vanilla"
exp_param_value = "0" # noise parameter
run_no = 0
gpu_id = 2
NUM_ENVS = 5 # The different number of processes that will be used
TRAIN_STEPS = 1E6 # TRAIN_STEPS = 3E7 should result in 12E7 timesteps due to VecStack=4
# run_seed = 20230912

In [3]:
import torch
torch.cuda.set_device(gpu_id)

In [4]:
import os
import sys
import git
import pathlib
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

PROJ_ROOT_PATH = pathlib.Path(git.Repo('.', search_parent_directories=True).working_tree_dir)
PROJ_ROOT =  str(PROJ_ROOT_PATH)
if PROJ_ROOT not in sys.path:
    sys.path.append(PROJ_ROOT)

print(f"Project Root Directory: {PROJ_ROOT}")

Project Root Directory: /repos/drl_csense


In [5]:
import numpy as np
import matplotlib.pyplot as plt

import gymnasium as gym
from stable_baselines3 import PPO, SAC
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env

from lib.folder_paths import get_exp_name_tag, deconstruct_exp_name, makeget_logging_dir
# from lib.env_utils import AtariWrapper_NoisyFrame, AtariWrapper_Compressed, make_atari_env_Custom_VecFrameStack, SaveBestModelCallback
from lib.env_utils import SaveBestModelCallback
from lib.vizresults import plot_single_run_rewards

In [6]:
# Get names and tags of experiment
exp_name, exp_metaname, exp_tag = get_exp_name_tag(env_id, exp_param_type, exp_param_value)

print(f"Experiment: {exp_name}")
print(f"Using device: {torch.cuda.current_device()}")

# Get directories
models_dir, log_dir, gif_dir, image_dir = makeget_logging_dir(exp_name)

# ALgorithm to use
ALGO_TYPE = "PPO"

ALGO_DICT = {"SAC": SAC, "PPO": PPO}
ALGO = ALGO_DICT[ALGO_TYPE]

run_mother_seed = 30092023
for run_no in range(5):
    run_seed = run_mother_seed + run_no
    # Start Experiment
    print("-------")
    print(f"RUN: {run_no}")
    
    # Log directory for each run of the experiment
    run_log_dir = f"{log_dir}/run_{run_no}"
    best_model_path = f"{models_dir}/{exp_name}-run_{run_no}-best"
    
    # # Create the callback: check every 1000 steps
    # callback = SaveBestModelCallback(check_freq=int(TRAIN_STEPS/10), 
    #                                 log_dir=run_log_dir,
    #                                 save_path=best_model_path,
    #                                 verbose=1) 
    
    vec_env = make_vec_env(env_id, 
                           n_envs=NUM_ENVS,
                           monitor_dir=run_log_dir,
                           seed=run_seed, 
                           vec_env_cls=DummyVecEnv)

    # Create RL model
    model = ALGO("MlpPolicy", vec_env, verbose=0)
    
    # Train the agent
    model.learn(total_timesteps=TRAIN_STEPS, 
                progress_bar=not False, 
                callback=None)
    
    # Save the final agent
    model.save(f"{models_dir}/{exp_name}-run_{run_no}-{ALGO_TYPE}")

Experiment: BipedalWalker-v3--vanilla_0
Using device: 2
-------
RUN: 0


Output()

Output()

-------
RUN: 1


Output()

-------
RUN: 2


Output()

-------
RUN: 3


-------
RUN: 4


Output()