In [31]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [32]:
import os, sys
import pandas as pd
import yaml

root_dir = os.path.dirname(os.path.dirname(os.path.realpath(".")))
sys.path.append(root_dir)

import numpy as np

import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
from matplotlib.legend_handler import HandlerTuple
from scipy.stats import sem, t
import torch
import pickle

from common.src.simple_dqn_agent import AgentDQN
from common.src.distribution_src import (
    make_env,
    StandardizeWrapper,
    RandomStartStateWrapper,
)
from rl_envs_forge.envs.grid_world.grid_world import GridWorld

from common.src.experiment_utils import (
    setup_logger,
    cleanup_file_handlers,
    namespace_to_dict,
)

import seaborn as sns

sns.set_theme()

In [33]:
def tuple_constructor(loader, node):
    return tuple(loader.construct_sequence(node))


def generic_constructor(loader, tag_suffix, node):
    if isinstance(node, yaml.MappingNode):
        # Attempt to handle unhashable keys by converting them to a string representation
        # This is a workaround and might need adjustment based on specific use cases
        return {
            str(loader.construct_object(key)): loader.construct_object(value)
            for key, value in node.value
        }
    elif isinstance(node, yaml.SequenceNode):
        return [loader.construct_object(child) for child in node.value]
    elif isinstance(node, yaml.ScalarNode):
        return loader.construct_scalar(node)


yaml.SafeLoader.add_constructor("tag:yaml.org,2002:python/tuple", tuple_constructor)
yaml.SafeLoader.add_multi_constructor("", generic_constructor)

In [34]:
def find_matching_paths(root_dir):
    """
    Finds all matching paths under the given root directory using os.scandir.

    Parameters:
    - root_dir: The root directory to start searching from.

    Returns:
    - A list of matching paths.
    """
    matching_paths = []

    def recursive_scan(dir_path):
        with os.scandir(dir_path) as entries:
            for entry in entries:
                if entry.is_dir():
                    if len(os.path.relpath(entry.path, root_dir).split(os.sep)) == 2:
                        matching_paths.append(entry.path)
                    recursive_scan(entry.path)

    recursive_scan(root_dir)
    return matching_paths


# Example usage
root_dir = (
    r"D:\Work\repos\phd-research\experiments\dqn\results\2024Jun30-122741_configs"
)
matching_paths = find_matching_paths(root_dir)
len(matching_paths)

16

In [35]:
def make_env_custom(
    rows,
    cols,
    start_state,
    p_success,
    terminal_states,
    seed,
    walls=None,
    episode_length_limit=None,
    randomize_starting_position=None,
):
    env = GridWorld(
        rows=rows,
        cols=cols,
        start_state=start_state,
        walls=walls,
        p_success=p_success,
        terminal_states=terminal_states,
        seed=seed,
        rewards={
            "valid_move": 0,
            "wall_collision": 0,
            "out_of_bounds": 0,
            "default": 0.0,
        },
        episode_length_limit=episode_length_limit,
    )
    if randomize_starting_position:
        env = RandomStartStateWrapper(env, 1)
    return StandardizeWrapper(env)

In [36]:
def read_yaml_files(paths):
    """
    Reads the post_config.yaml file from each directory in the given list of paths.

    Parameters:
    - paths: List of directories to search for post_config.yaml.

    Returns:
    - A dictionary where keys are the directory paths and values are the contents of post_config.yaml.
    """
    config_data = {}

    for path in paths:
        yaml_file = os.path.join(path, "post_cfg.yaml")
        if os.path.exists(yaml_file):
            with open(yaml_file, "r") as file:
                config_data[path] = yaml.safe_load(file)
        else:
            print(f"post_config.yaml not found in {path}")

    return config_data


def flatten_stats(stats_dict):
    """
    Flattens the nested episode frames statistics in the given dictionary.

    Parameters:
    - stats_dict: The dictionary containing the statistics.

    Returns:
    - A flattened dictionary containing the episode frames statistics.
    """
    flattened = {}
    for key, value in stats_dict["episode_frames"].items():
        flattened[f"episode_frames_{key}"] = value
    return flattened


def evaluation_experiment(config):

    logger = setup_logger(
        config["full_title"],
        log_file=os.path.join(config["out_dir"], "experiment_log.log"),
    )

    rows = config["rows"]
    cols = config["cols"]
    start_state = config["start_state"]
    p_success = config["p_success"]
    terminal_states = config["terminal_states"]
    run_id = config["run_id"]
    episode_length_limit = config.get("episode_length_limit")
    walls = set(config["walls"]) if config.get("walls") else None
    randomize_starting_position = config.get("randomize_starting_position")

    if config["algorithm"] == "dataset_normed":
        config["normalize_replay_buffer_freq"] = True

    ### Setup environments ###
    train_env = make_env_custom(
        rows,
        cols,
        start_state,
        p_success,
        terminal_states,
        run_id,
        walls=walls,
        episode_length_limit=episode_length_limit,
        randomize_starting_position=randomize_starting_position,
    )
    validation_env = make_env_custom(
        rows,
        cols,
        start_state,
        p_success,
        terminal_states,
        run_id,
        episode_length_limit=episode_length_limit,
        randomize_starting_position=randomize_starting_position,
    )

    ### Setup output and loading paths ###

    experiment_agent = AgentDQN(
        train_env=train_env,
        validation_env=validation_env,
        experiment_output_folder=config["out_dir"],
        experiment_name=config["experiment"],
        resume_training_path=path,
        save_checkpoints=False,
        logger=logger,
        config=config,
    )

    ep_validation_stats = experiment_agent.validate_epoch()

    return ep_validation_stats


# Example usage
root_dir = (
    r"D:\Work\repos\phd-research\experiments\dqn\results\2024Jun30-122741_configs"
)
matching_paths = find_matching_paths(root_dir)
config_data = read_yaml_files(matching_paths)

all_data = []
for path, config in config_data.items():
    print(f"Path: {path}")
    print(f"Config: {config}\n")

    ep_validation_stats = evaluation_experiment(config)
    ep_lengths = flatten_stats(ep_validation_stats)

    ep_lengths["rb_type"] = config["replay_buffer"]["type"]
    ep_lengths["run_id"] = config["run_id"]

    df = pd.DataFrame([ep_lengths])
    all_data.append(df)

# Concatenate all data into a single DataFrame
if all_data:
    combined_data = pd.concat(all_data, ignore_index=True)
    print(combined_data)
else:
    print("No data collected.")

Path: D:\Work\repos\phd-research\experiments\dqn\results\2024Jun30-122741_configs\0000_replay_buffer.type_ReplayBuffer__algorithm_default\0
Config: {'agent_params': {'agent': 'AgentDQN', 'args': {'batch_size': 32, 'epsilon': {'decay': 80000, 'end': 0.01, 'start': 1.0}, 'gamma': 0.9, 'hidden_size': 16, 'loss_fcn': 'mse_loss', 'replay_start_size': 1000, 'target_model_update_freq': 50, 'train_step_cnt': 4000, 'training_freq': 4, 'validation_enabled': False, 'validation_epsilon': 0.001, 'validation_step_cnt': 500}}, 'algorithm': 'default', 'cfg_id': 0, 'cols': 11, 'episode_length_limit': 100, 'experiment': 'experiment_distributions', 'experiment_arguments': {'algorithm': 'default', 'replay_buffer': {'type': 'ReplayBuffer'}}, 'full_title': '2024Jun30-122741_configs_replay_buffer.type=ReplayBuffer; algorithm=default', 'neural_fit_mode': 'max', 'num_steps': 40000, 'optim': {'args': {'eps': 0.0003125, 'lr': 0.001}, 'name': 'Adam'}, 'out_dir': '.\\results\\2024Jun30-122741_configs\\0000_replay_

  logger.warn(
  logger.warn(
  logger.warn(


2024-06-30 18:05:13,366 - 2024Jun30-122741_configs_replay_buffer.type=ReplayBuffer; algorithm=default - INFO - Loaded previous training status from the following files: {'replay_buffer_file': 'D:\\Work\\repos\\phd-research\\experiments\\dqn\\results\\2024Jun30-122741_configs\\0000_replay_buffer.type_ReplayBuffer__algorithm_default\\0\\experiment_distributions_replay_buffer', 'train_stats_file': 'D:\\Work\\repos\\phd-research\\experiments\\dqn\\results\\2024Jun30-122741_configs\\0000_replay_buffer.type_ReplayBuffer__algorithm_default\\0\\experiment_distributions_train_stats', 'checkpoint_model_file': 'D:\\Work\\repos\\phd-research\\experiments\\dqn\\results\\2024Jun30-122741_configs\\0000_replay_buffer.type_ReplayBuffer__algorithm_default\\0\\model_checkpoints\\mck_60'}
2024-06-30 18:05:13,366 - 2024Jun30-122741_configs_replay_buffer.type=ReplayBuffer; algorithm=default - INFO - Loaded previous training status from the following files: {'replay_buffer_file': 'D:\\Work\\repos\\phd-resear

In [37]:
combined_data

Unnamed: 0,episode_frames_min,episode_frames_max,episode_frames_mean,episode_frames_median,episode_frames_std,rb_type
0,9,17,13.289474,13.0,1.985578,ReplayBuffer
1,11,100,25.238095,13.0,30.539443,ReplayBuffer
2,9,18,13.810811,14.0,2.545137,ReplayBuffer
3,9,17,12.725,12.5,2.190748,ReplayBuffer
4,9,19,14.852941,15.5,2.981634,ReplayBuffer
5,10,100,66.75,100.0,42.929448,ReplayBuffer
6,9,18,12.625,12.0,2.243741,ReplayBuffer
7,10,17,13.210526,13.0,2.079447,ReplayBuffer
8,8,16,11.953488,12.0,1.84206,UniqueReplayBuffer
9,6,16,11.043478,11.0,2.873844,UniqueReplayBuffer


In [42]:
combined_data.columns

Index(['episode_frames_min', 'episode_frames_max', 'episode_frames_mean',
       'episode_frames_median', 'episode_frames_std', 'rb_type'],
      dtype='object')

In [43]:
groupby_column = "rb_type"
mean_columns = ['episode_frames_min', 'episode_frames_max', 'episode_frames_mean',
       'episode_frames_median', 'episode_frames_std']

grouped_df = combined_data.groupby(groupby_column)[mean_columns].mean()
grouped_df

Unnamed: 0_level_0,episode_frames_min,episode_frames_max,episode_frames_mean,episode_frames_median,episode_frames_std
rb_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ReplayBuffer,9.5,38.25,21.562731,24.125,10.936897
UniqueReplayBuffer,17.875,36.0,25.673269,21.4375,7.231305


In [39]:
config["replay_buffer"]

{'max_size': 10000, 'n_step': 0, 'type': 'UniqueReplayBuffer'}