This notebook substitutes some classes in an experience by "debug" versions of them, which write to file almost every intermidiate step, as to help detect any incoherence in the code

In [1]:
PATH_TO_STORE_EXPERIMENTS = "data\\rl_training"

In [2]:
experiment_name = "dqn_multi_agent"

# Preparation before loading experiment

## Change logging system

In [3]:
from automl.loggers.logger_component import LoggerSchema 

LoggerSchema.get_schema_parameter_signature("write_to_file_when_text_lines_over").change_default_value(-1)
LoggerSchema.get_schema_parameter_signature("necessary_logger_level").change_default_value("INFO")

In [4]:
from automl.loggers.component_with_results import ResultLogger


ResultLogger.get_schema_parameter_signature("save_results_on_log").change_default_value(True)

# The base Experiment

## Base Configuration

In [5]:
from automl.rl.whole_configurations import rl_multi_agent_pipeline as base_rl_configuration


rl_pipeline_config = base_rl_configuration.config_dict()

## Base Configuration Interpretation

In [6]:
rl_pipeline_input = rl_pipeline_config["input"]

rl_trainer_tuple = rl_pipeline_input["rl_trainer"]
rl_trainer_input = rl_trainer_tuple[1]

agents_input = rl_pipeline_input["agents_input"]

policy_tuple = agents_input["policy"]
policy_input = policy_tuple[1]

agents_trainers_input = rl_trainer_input["agents_trainers_input"]

In [7]:
learner_tuple = agents_trainers_input["learner"]
learner_input = learner_tuple[1]

optimizer_tuple = learner_input["optimizer"]
optimizer_input = optimizer_tuple[1]

In [8]:
memory_tuple = agents_trainers_input["memory"]

In [9]:
environment = rl_pipeline_config["input"]["environment"]
environment_input = environment[1]

In [10]:
exploration_strategy_tuple = agents_trainers_input["exploration_strategy"]
exploration_strategy_input = exploration_strategy_tuple[1]

# Changes to the base configuration

## Code to help alter experiment

In [11]:
def substitute_value_in_dict(dict_with_value : dict, key, new_value):
    print(f"Old value for key '{key}': {dict_with_value.get(key, None)}, new value: {new_value}")
    dict_with_value[key] = new_value

def remove_value_in_dict(dict_with_value : dict, key, new_value):
    print(f"Old value for key '{key}': {dict_with_value.get(key, None)}, to be removed...")
    dict_with_value.pop(key, None)



def substitute_tuple_value_in_dict(dict_with_tuple : dict, key, tuple_index, new_value):

    tuple_value : tuple = dict_with_tuple[key]

    print(f"Old value for tuple pos {tuple_index}: {tuple_value[tuple_index]}, new value: {new_value}")
    new_tuple_value = tuple( new_value if tuple_index == i else tuple_value[i] for i in range(len(tuple_value)) )

    dict_with_tuple[key] = new_tuple_value


## Manual Hyperparameter Tuning

### Experiment

### Base Model

### Other value changes

In [12]:
#rl_trainer_input["limit_total_steps"] = 1000

#rl_trainer_input.pop("limit_total_steps", None)

#rl_trainer_input["num_episodes"] = 4000


In [13]:
#agents_trainers_input["learning_start_step_delay"] = 5000
#agents_trainers_input["learning_start_ep_delay"] = 150

#substitute_value_in_dict(agents_trainers_input, "learning_start_ep_delay", 2897)

In [14]:
#agents_trainers_input["optimization_interval"] = 450

#substitute_value_in_dict(agents_trainers_input, "times_to_learn", 3)

In [15]:
#optimizer_input["clip_grad_norm"] = 0.1

#substitute_value_in_dict(optimizer_input, "clip_grad_value", 0.2956984463839789)

#substitute_value_in_dict(optimizer_input, "learning_rate", 0.006807860813523758)

In [16]:
#substitute_value_in_dict(agents_trainers_input, "discount_factor", 0.8790365307757482)

In [17]:
#substitute_value_in_dict(learner_input, "target_update_rate", 0.5511208693081078)

In [18]:
#substitute_value_in_dict(exploration_strategy_input, "epsilon_end", 0.009535369612528788)

# Gen RL Pipeline

In [19]:

from automl.rl.rl_pipeline import RLPipelineComponent
from automl.utils.json_utils.json_component_utils import gen_component_from

rl_pipeline : RLPipelineComponent = gen_component_from(rl_pipeline_config)

In [20]:
rl_pipeline.pass_input({
    "base_directory" : PATH_TO_STORE_EXPERIMENTS,
                        "artifact_relative_directory" : experiment_name,
                        "create_new_directory" : True,
                        "do_full_setup_of_seed" : True}
                        )

experiment_path = rl_pipeline.get_artifact_directory()

print(f"Experiment path: {experiment_path}")

Experiment path: data\rl_training\dqn_multi_agent_28


# Do the training

In [21]:
from automl.loggers.global_logger import activate_global_logger

activate_global_logger(rl_pipeline.get_artifact_directory())

Global logger is trying to be activated in directory: data\rl_training\dqn_multi_agent_28
[2025-11-28 15:13:09] Global logger activation as ended, activated in data\rl_training\dqn_multi_agent_28\_global_logger


In [22]:
rl_pipeline.run()

[2025-11-28 15:13:09] <automl.rl.rl_pipeline.RLPipelineComponent object at 0x000002A4A3048050>: Seed is 3572337724
[2025-11-28 15:13:09] <automl.rl.rl_pipeline.RLPipelineComponent object at 0x000002A4A3048050>: Activating full setup of seed
[2025-11-28 15:13:09] Python seed 3572337724
[2025-11-28 15:13:09] Python seed 3572337724
[2025-11-28 15:13:09] Torch seed 976400350
[2025-11-28 15:13:09] Numpy seed 2159222677
[2025-11-28 15:13:10] Component RLPipelineComponent of type <class 'automl.rl.rl_pipeline.RLPipelineComponent'> is dealing with the exception: 'ImageReverterToSingleChannel' object has no attribute 'device'


AttributeError: 'ImageReverterToSingleChannel' object has no attribute 'device'

## Save configuration

In [None]:
#rl_pipeline.save_configuration(save_exposed_values=True)
from automl.basic_components.state_management import save_state


save_state(rl_pipeline, save_definition=True)

## See Results

In [None]:
AGGREGATE_NUMBER = 5

In [None]:

from automl.loggers.result_logger import RESULTS_FILENAME, ResultLogger

results_directory = f"{experiment_path}\\RLTrainerComponent"
    
results_logger = ResultLogger(input={
                                        "results_filename" : RESULTS_FILENAME,
                                        "base_directory" : results_directory,
                                        "artifact_relative_directory" : '',
                                        "create_new_directory" : False
                            })

In [None]:
#results_logger.plot_graph(x_axis='episode', y_axis=[('total_reward', name)], to_show=False)
results_logger.plot_confidence_interval(x_axis='episode', y_column='episode_reward',show_std=True, to_show=False, y_values_label=experiment_name, aggregate_number=AGGREGATE_NUMBER)
results_logger.plot_linear_regression(x_axis='episode', y_axis='episode_reward', to_show=False, y_label=experiment_name + '_linear')
