This notebook substitutes some classes in an experience by "debug" versions of them, which write to file almost every intermidiate step, as to help detect any incoherence in the code

In [None]:
PATH_TO_STORE_EXPERIMENTS = "data\\rl_training"

In [None]:
#experiment_name = "dqn_sb3_cartpole"
#experiment_name = "dqn_sb3_cartpole_ppo"

experiment_name = "dqn_cartpole"

# Preparation before loading experiment

## Change logging system

In [None]:
from automl.loggers.logger_component import LoggerSchema 


LoggerSchema.get_schema_parameter_signature("write_to_file_when_text_lines_over").change_default_value(-1)
LoggerSchema.get_schema_parameter_signature("necessary_logger_level").change_default_value("INFO")

In [None]:
from automl.loggers.component_with_results import ResultLogger


ResultLogger.get_schema_parameter_signature("save_results_on_log").change_default_value(True)

# The base Experiment

## Base Configuration

In [None]:
from automl.base_configurations.environment.cart_pole import dqn_sb3 as base_rl_configuration
#from automl.base_configurations.environment.cart_pole import ppo_sb3 as base_rl_configuration


rl_pipeline_config = base_rl_configuration.config_dict()

## Base Configuration Interpretation

In [None]:
rl_pipeline_input = rl_pipeline_config["input"]

rl_trainer_tuple = rl_pipeline_input["rl_trainer"]
rl_trainer_input = rl_trainer_tuple[1]

agents_input = rl_pipeline_input["agents_input"]

policy_tuple = agents_input["policy"]
policy_input = policy_tuple[1]

agents_trainers_input = rl_trainer_input["agents_trainers_input"]

In [None]:
learner_tuple = agents_trainers_input["learner"]
learner_input = learner_tuple[1]

optimizer_tuple = learner_input["optimizer"]
optimizer_input = optimizer_tuple[1]

In [None]:
memory_tuple = agents_trainers_input["memory"]

In [None]:
environment = rl_pipeline_config["input"]["environment"]
environment_input = environment[1]

# Changes to the base configuration

## Code to help alter experiment

In [None]:
def substitute_value_in_dict(dict_with_value : dict, key, new_value):
    print(f"Old value for key '{key}': {dict_with_value.get(key, None)}, new value: {new_value}")
    dict_with_value[key] = new_value

def remove_value_in_dict(dict_with_value : dict, key, new_value):
    print(f"Old value for key '{key}': {dict_with_value.get(key, None)}, to be removed...")
    dict_with_value.pop(key, None)



def substitute_tuple_value_in_dict(dict_with_tuple : dict, key, tuple_index, new_value):

    tuple_value : tuple = dict_with_tuple[key]

    print(f"Old value for tuple pos {tuple_index}: {tuple_value[tuple_index]}, new value: {new_value}")
    new_tuple_value = tuple( new_value if tuple_index == i else tuple_value[i] for i in range(len(tuple_value)) )

    dict_with_tuple[key] = new_tuple_value


## Manual Hyperparameter Tuning

### Experiment

### Base Model

In [None]:
LOAD_MODEL = False

In [None]:
import os

if LOAD_MODEL:

    base_model_path = 'data\\models\\sb3_CartPole_dqn\\sb3_CartPole_dqn_perturbed_0_10'
    #base_model_path = 'data\\models\\sb3_CartPole_dqn\\sb3_CartPole_dqn_perturbed_5_50'
    #base_model_path = 'data\\models\\sb3_CartPole_ppo\\sb3_CartPole_ppo_gaussian_0_0.8_0.9'

    #base_model_path = 'data\\models\\FC_CartPole_ppo\\FC_CartPole_ppo'
        
    model_name = os.path.basename(base_model_path)
    
    experiment_name = f"{experiment_name}\\{model_name}"
        
    policy_input["model"] = base_model_path

    #learner_input["critic_model"] = 'data\\models\\sb3_CartPole_ppo_critic\\sb3_CartPole_ppo_critic'

else:
    experiment_name = f"{experiment_name}\\original"


### Other value changes

In [None]:
#rl_trainer_input["limit_total_steps"] = 1000

#rl_trainer_input.pop("limit_total_steps", None)

#rl_trainer_input["num_episodes"] = 4000


In [None]:
#agents_trainers_input["learning_start_step_delay"] = 5000
#agents_trainers_input["learning_start_ep_delay"] = 150

In [None]:
#agents_trainers_input["optimization_interval"] = 2048

#substitute_value_in_dict(agents_trainers_input, "times_to_learn", 3)

In [None]:
#optimizer_input["clip_grad_norm"] = 0.1
#substitute_value_in_dict(optimizer_input, "clip_grad_value", 0.1)

#substitute_value_in_dict(optimizer_input, "learning_rate", 0.0012)

# Gen RL Pipeline

In [None]:

from automl.rl.rl_pipeline import RLPipelineComponent
from automl.utils.json_utils.json_component_utils import gen_component_from

rl_pipeline : RLPipelineComponent = gen_component_from(rl_pipeline_config)

In [None]:
rl_pipeline.pass_input({"base_directory" : PATH_TO_STORE_EXPERIMENTS,
                        "artifact_relative_directory" : experiment_name,
                        "create_new_directory" : True})

experiment_path = rl_pipeline.get_artifact_directory()

print(f"Experiment path: {experiment_path}")

# Do the training

In [None]:
from automl.loggers.global_logger import activate_global_logger

activate_global_logger(rl_pipeline.get_artifact_directory())

In [None]:
rl_pipeline.proccess_input_if_not_proccesd()

In [None]:
rl_pipeline.run()

## Save configuration

In [None]:
#rl_pipeline.save_configuration(save_exposed_values=True)
from automl.basic_components.state_management import save_state


save_state(rl_pipeline, save_definition=True)

## See Results

In [None]:
AGGREGATE_NUMBER = 5

In [None]:

from automl.loggers.result_logger import RESULTS_FILENAME, ResultLogger

results_directory = f"{experiment_path}\\RLTrainerComponent"
    
results_logger = ResultLogger(input={
                                        "results_filename" : RESULTS_FILENAME,
                                        "base_directory" : results_directory,
                                        "artifact_relative_directory" : '',
                                        "create_new_directory" : False
                            })

In [None]:
#results_logger.plot_graph(x_axis='episode', y_axis=[('total_reward', name)], to_show=False)
results_logger.plot_confidence_interval(x_axis='episode', y_column='episode_reward',show_std=True, to_show=False, y_values_label=experiment_name, aggregate_number=AGGREGATE_NUMBER)
results_logger.plot_linear_regression(x_axis='episode', y_axis='episode_reward', to_show=False, y_label=experiment_name + '_linear')
