In [105]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


In [106]:
colours = ['red', 'green', 'blue', 'orange', 'purple']

#### HPO Below

In [107]:
hpo_sequence = {
        'MiniGrid-Empty-5x5-v0': 5,
        'MiniGrid-DoorKey-5x5-v0': 8, 
        'MiniGrid-Unlock-v0': 18, 
        # 'MiniGrid-KeyCorridorS3R1-v0': 12,
        # 'MiniGrid-LavaGapS5-v0': 15
    }  

best_trial = 1
study_name = 'mlp_hpo'
hpo_path = fr"C:\Users\Logan\Documents\School\Wales\MSc\continual-rl-lnn\src\continual\minigrid\HPO\{study_name}\models"

In [108]:
# Load all best_trial trials into array
seeds = [1001, 2002, 3003] # HPO Seeds
perf_mats = [np.load(f'{hpo_path}\hpo_trial_{best_trial}_{seeds[i]}\performance_matrix.npy') for i in range(len(seeds))]
perf_std_mats = [np.load(f'{hpo_path}\hpo_trial_{best_trial}_{seeds[i]}\performance_std_matrix.npy') for i in range(len(seeds))]

optuna_mean_perf_mat = np.mean(perf_mats, axis=0) # Per-task average across all perf_matrices
optuna_mean_perf_std_mat = np.mean(perf_std_mats, axis=0) # Per-task average across all perf_std_matrices

#### Experiments Below

In [174]:
exp_sequence = {
        'MiniGrid-Empty-5x5-v0': 5,
        'MiniGrid-DoorKey-5x5-v0': 8, 
        'MiniGrid-Unlock-v0': 18, 
        'MiniGrid-LavaGap-v0': 15
    }  

In [175]:
# model_type = 'CfC_A&C'
# model_type = 'CfC_Actor'
# model_type = 'CfC_Critic'
# model_type = 'LSTM'
model_type = 'MLP'
experiment_name = 'clear_testing'
exp_path = fr"C:\Users\Logan\Documents\School\Wales\MSc\continual-rl-lnn\src\continual\minigrid\experiments\{experiment_name}\models"

In [176]:
from pathlib import Path

def find_model_dirs(base_path, model_type):
    base = Path(base_path)
    return [p for p in base.iterdir() if p.is_dir() and model_type in p.name]

folders = find_model_dirs(exp_path, model_type)

In [177]:
perf_mats = [np.load(f'{exp_path}\{f.name}\performance_matrix.npy') for f in folders]
perf_std_mats = [np.load(f'{exp_path}\{f.name}\performance_std_matrix.npy') for f in folders]

mean_perf_mat = np.mean(perf_mats, axis=0) # Per-task average across all perf_matrices
mean_perf_std_mat = np.mean(perf_std_mats, axis=0) # Per-task average across all perf_std_matrices

In [178]:
mean_perf_mat
# mean_perf_std_mat

array([[0.95500004, 0.        , 0.        ],
       [0.95500004, 0.96176797, 0.        ],
       [0.95500004, 0.67386397, 0.80937498]])

In [None]:
def plot_perf_matrix(mean_perf_mat, mean_perf_std_mat, sequence=None, save_path='./performance_matrix', std=False):
    
    # Extract the environment name alone, not 'MiniGrid' or 'v-0' for clean presentation
    labels = []
    for e in sequence:
        e = e.split('-')

        # If len(e) is >= 4, the environment has something like '5x5'or '6x6' which is relevant, and should be kept (e.g. Empty-5x5)
        if len(e) >= 4:
            labels.append('-'.join(e[1:2]))
        # Otherwise just take the environment name (e.g. DoorKey)
        else:
            labels.append(e[1])

    if std:
        annot = np.empty_like(mean_perf_mat, dtype=object)
        for i in range(mean_perf_mat.shape[0]):
            for j in range(mean_perf_mat.shape[1]):
                annot[i, j] = f"{mean_perf_mat[i, j]:.2f}\n±{mean_perf_std_mat[i, j]:.2f}"

    # Reverse the sequence and convert to list to display as the y_tick labels
    plt.figure(figsize=(6, 5))
    plt.style.use('fivethirtyeight')
    if std:
        sns.heatmap(mean_perf_mat, annot=annot, fmt='', cmap='GnBu', xticklabels=labels, yticklabels=labels)
    else:
        sns.heatmap(mean_perf_mat, annot=True, fmt='.2f', cmap='GnBu', xticklabels=labels, yticklabels=labels)
    plt.title('Performance Matrix')
    plt.xlabel('Evaluation Task')
    plt.ylabel('Train Task')
    plt.tight_layout()
    if std:
        save_path = f'{save_path}_std'

    out_path = Path(f"{save_path}.svg")
    out_path.parent.mkdir(parents=True, exist_ok=True)

    plt.savefig(out_path)
    plt.close()

plot_perf_matrix(optuna_mean_perf_mat, optuna_mean_perf_std_mat, hpo_sequence, save_path=f'optuna_{study_name}/mean_performance')
plot_perf_matrix(optuna_mean_perf_mat, optuna_mean_perf_std_mat, hpo_sequence, save_path=f'optuna_{study_name}/mean_performance', std=True)
# plot_perf_matrix(mean_perf_mat, mean_perf_std_mat, exp_sequence, save_path=f'{model_type}_{experiment_name}/mean_performance')
# plot_perf_matrix(mean_perf_mat, mean_perf_std_mat, exp_sequence, save_path=f'{model_type}_{experiment_name}/mean_performance', std=True)

In [180]:
# def extract_task_names(sequence):
#     return [tasks.split('-')[1] for tasks in sequence]
    

In [181]:
# def plot_reward(path, sequence, save_path='./loss'):

#     sequence = extract_task_names(sequence)
#     # Load the models associated 'updates.csv' file
#     df = pd.read_csv(f'{path}/episodes.csv')
    
#     # Extract the timestep
#     training_step = df['global_step'][0] # Extract the first logged global_step

#     # Extract the first global_step value at the start of every task -> Very messy, but good enough for now
#     task_boundaries_steps = [list(df[df['task_index'] == i]['global_step'])[0] for i in range(len(sequence))]

#     print(training_step)
#     print(task_boundaries_steps)

#     # Extract and plot x & y coordinates
#     x, y = df['global_step'], df['episodic_return']
#     plt.plot(x, y)
#     plt.xlabel('Total Timesteps')
#     plt.ylabel('Reward')
    
#     # Add vertical lines across the plot to differentiate task training
#     for indx, task in enumerate(sequence):
#         if indx == 0: continue
#         plt.axvline(x=task_boundaries_steps[indx], color=colours[indx-1], linestyle='dashed', alpha=0.4, label=f'{sequence[indx-1]}→{task}')
    
#     plt.legend()
#     plt.tight_layout()
#     plt.plot()
#     plt.savefig(save_path)
#     plt.close()

# # plot_reward(path, list(sequence.keys()), './rewards.svg')

In [182]:
# def plot_reward(path, sequence, save_path='./rewards.svg'):
#     import matplotlib.pyplot as plt
#     import seaborn as sns
#     import pandas as pd
#     from scipy.signal import savgol_filter
    
#     sequence = extract_task_names(sequence)
#     # Load the models associated 'episodes.csv' file
#     df = pd.read_csv(f'{path}/episodes.csv')
    
#     # Set up modern styling
#     plt.style.use('fivethirtyeight')
#     _, ax = plt.subplots(figsize=(12, 8))
    
#     # Modern color palette
#     colors = ['#1f77b4', '#ff7f0e', "#25a325", "#d61e1e", '#9467bd']
    
#     # Extract the timestep and boundaries
#     task_boundaries_steps = [list(df[df['task_index'] == i]['global_step'])[0] for i in range(len(sequence))]
    
#     # Extract and plot x & y coordinates with better styling
#     x, y = df['global_step'], df['episodic_return']
#     ax.plot(x, y, linewidth=2.5, alpha=0.8, color=colors[0], zorder=3)
    
#     # Add vertical lines with modern styling
#     for indx, task in enumerate(sequence):
#         if indx == 0: continue
#         ax.axvline(x=task_boundaries_steps[indx], color=colors[indx], linestyle='dashed', 
#                   linewidth=2, alpha=0.7, zorder=2, label=f'{sequence[indx-1]}→{task}')
    
#     # Improved styling
#     ax.set_xlabel('Total Timesteps', fontsize=14, fontweight='bold')
#     ax.set_ylabel('Episodic Return', fontsize=14, fontweight='bold')
#     ax.set_title('Training Reward - Best HPO Trial', fontsize=16, fontweight='bold', pad=20)
    
#     # Better grid and background
#     ax.grid(True, alpha=0.75, linestyle='dashed', linewidth=0.5)
#     ax.set_facecolor('#fafafa')
    
#     # Modern legend
#     legend = ax.legend(loc='lower right', frameon=True, fancybox=True, shadow=True, fontsize=12)
#     legend.get_frame().set_facecolor('white')
#     legend.get_frame().set_alpha(0.9)
    
#     # Clean spines
#     ax.spines['top'].set_visible(False)
#     ax.spines['right'].set_visible(False)
#     ax.spines['left'].set_linewidth(1.5)
#     ax.spines['bottom'].set_linewidth(1.5)
    
#     # Set proper margins
#     ax.margins(0)
#     ax.set_ylim(0, 1)
#     # Better tick labels
#     ax.tick_params(axis='both', which='major', labelsize=12)
    
#     plt.tight_layout()
    
#     plt.savefig(save_path, dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
#     plt.close()

# plot_reward(path, list(sequence.keys()), './rewards.svg')

In [183]:
# def plot_training_loss(path, sequence, save_path='./loss'):

#     sequence = extract_task_names(sequence)
#     # Load the models associated 'updates.csv' file
#     df = pd.read_csv(f'{path}/updates.csv')
    
#     # Extract the first global_step value at the start of every task -> Very messy, but good enough for now
#     task_boundaries_steps = [list(df[df['task_index'] == i]['global_step'])[0] for i in range(len(sequence))]

#     # Extract and plot x & y coordinates
#     x, y = df['global_step'], df['policy_loss']
#     plt.plot(x, y)
#     plt.xlabel('Total Timesteps')
#     plt.ylabel('Policy Loss')
    
#     # Add vertical lines across the plot to differentiate task training
#     for indx, task in enumerate(sequence):
#         if indx == 0: continue
#         plt.axvline(x=task_boundaries_steps[indx], color=colours[indx-1], linestyle='dashed', alpha=0.4, label=f'{sequence[indx-1]}→{task}')
    
#     plt.legend()
#     plt.tight_layout()
#     plt.plot()
#     plt.savefig(save_path)
#     plt.close()

# # plot_training_loss(path, list(sequence.keys()), './loss.svg')

In [184]:
# def plot_loss_curve(path, sequence, save_path='./rewards.svg'):
#     sequence = extract_task_names(sequence)
#     # Load the models associated 'updates.csv' file
#     df = pd.read_csv(f'{path}/updates.csv')
    
#     # Set up modern styling
#     plt.style.use('fivethirtyeight')
#     _, ax = plt.subplots(figsize=(12, 8))
    
#     # Modern color palette
#     colors = ['#1f77b4', '#ff7f0e', "#25a325", "#d61e1e", '#9467bd']
    
#     # Extract the timestep and boundaries
#     task_boundaries_steps = [list(df[df['task_index'] == i]['global_step'])[0] for i in range(len(sequence))]
    
#     # Extract and plot x & y coordinates with better styling
#     x, y = df['global_step'], df['policy_loss']
#     ax.plot(x, y, linewidth=2.5, alpha=0.8, color=colors[0], zorder=3)
    
#     # Add vertical lines with modern styling
#     for indx, task in enumerate(sequence):
#         if indx == 0: continue
#         ax.axvline(x=task_boundaries_steps[indx], color=colors[indx], linestyle='dashed', 
#                   linewidth=2, alpha=0.7, zorder=2, label=f'{sequence[indx-1]}→{task}')
    
#     # Improved styling
#     ax.set_xlabel('Total Timesteps', fontsize=14, fontweight='bold')
#     ax.set_ylabel('Policy Loss', fontsize=14, fontweight='bold')
#     ax.set_title('Training Loss - Best HPO Trial', fontsize=16, fontweight='bold', pad=20)
    
#     # Better grid and background
#     ax.grid(True, alpha=0.75, linestyle='dashed', linewidth=0.5)
#     ax.set_facecolor('#fafafa')
    
#     # Modern legend
#     legend = ax.legend(loc='lower right', frameon=True, fancybox=True, shadow=True, fontsize=12)
#     legend.get_frame().set_facecolor('white')
#     legend.get_frame().set_alpha(0.9)
    
#     # Clean spines
#     ax.spines['top'].set_visible(False)
#     ax.spines['right'].set_visible(False)
#     ax.spines['left'].set_linewidth(1.5)
#     ax.spines['bottom'].set_linewidth(1.5)
    
#     # Set proper margins
#     # ax.margins(0)
#     # ax.set_ylim(0, 1)
#     # Better tick labels
#     ax.tick_params(axis='both', which='major', labelsize=12)
    
#     plt.tight_layout()
    
#     plt.savefig(save_path, dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
#     plt.close()

# plot_loss_curve(path, list(sequence.keys()), './loss.svg')