In [None]:
import sys
import os

sys.path.append(os.path.abspath("../..")) #make the folder "automl" part of this


In [None]:
PATH_TO_STORE_EXPERIMENTS = "data\\rl_training"

## Define RL Pipeline structure

In [None]:
from automl.base_configurations.environment.cart_pole import dqn_sb3

rl_pipeline_config = dqn_sb3.config_dict()

In [None]:
experiment_name = "dqn_sb3_cartpole"

### Change Environment

In [None]:
environment = rl_pipeline_config["input"]["environment"]
environment_input = environment[1]

#environment_input["render_mode"] = "human"

## Hyperparameters

### Base Model

In [None]:
LOAD_MODEL = True

In [None]:
if LOAD_MODEL:

    base_model_path = 'data\\models\\sb3_CartPole_dqn\\sb3_CartPole_dqn_perturbed_0_10.pkl'
    
    model_name = os.path.splitext(os.path.basename(base_model_path))[0]
    
    experiment_name = f"{experiment_name}_{model_name}"
    
    rl_pipeline_input = rl_pipeline_config["input"]
    agents_input = rl_pipeline_input["agents_input"]
    policy_tuple = agents_input["policy"]
    policy_input = policy_tuple[1]
    
    policy_input["model"] = base_model_path


In [None]:
rl_pipeline_input = rl_pipeline_config["input"]
rl_trainer_tuple = rl_pipeline_input["rl_trainer"]
rl_Trainer_input = rl_trainer_tuple[1]
agents_trainers_input = rl_Trainer_input["agents_trainers_input"]

In [None]:
agents_trainers_input["limit_total_steps"] = 8e4

In [None]:
agents_trainers_input["learning_start_step_delay"] = 5000

## Gen RL Pipeline

In [None]:
from automl.utils.json_component_utils import gen_component_from
from automl.rl.rl_pipeline import RLPipelineComponent

rl_pipeline : RLPipelineComponent = gen_component_from(rl_pipeline_config)

In [None]:
rl_pipeline.pass_input({"base_directory" : PATH_TO_STORE_EXPERIMENTS,
                        "artifact_relative_directory" : experiment_name,
                        "create_new_directory" : True})

experiment_path = rl_pipeline.get_artifact_directory()

print(f"Experiment path: {experiment_path}")

## Do the training

In [None]:
rl_pipeline.run()

### Save configuration

In [None]:
rl_pipeline.save_configuration(save_exposed_values=True)

## See Results

In [None]:
AGGREGATE_NUMBER = None

In [None]:

from automl.loggers.result_logger import RESULTS_FILENAME, ResultLogger

results_directory = f"{experiment_path}\\RLTrainerComponent"
    
results_logger = ResultLogger(input={
                                            "results_filename" : RESULTS_FILENAME,
                                            "base_directory" : results_directory,
                                            "artifact_relative_directory" : '',
                                            "create_new_directory" : False
                            })

In [None]:
#results_logger.plot_graph(x_axis='episode', y_axis=[('total_reward', name)], to_show=False)
results_logger.plot_confidence_interval(x_axis='episode', y_column='episode_reward',show_std=False, to_show=False, y_values_label=experiment_name, aggregate_number=AGGREGATE_NUMBER)
results_logger.plot_linear_regression(x_axis='episode', y_axis='episode_reward', to_show=False, y_label=experiment_name + '_linear')