In [17]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


In [18]:
path = r"C:\Users\Logan\Documents\School\Wales\MSc\continual-rl-lnn\src\continual\minigrid\HPO\lstm_hpo\models\hpo_trial_1"

In [19]:
sequence = {
        'MiniGrid-Empty-5x5-v0': 5,
        'MiniGrid-DoorKey-5x5-v0': 8, 
        # 'MiniGrid-Unlock-v0': 18, 
        # 'MiniGrid-KeyCorridorS3R1-v0': 12,
        # 'MiniGrid-LavaGapS5-v0': 15
    }   

colours = ['red', 'green', 'blue', 'orange', 'purple']

In [20]:
def extract_task_names(sequence):
    return [tasks.split('-')[1] for tasks in sequence]
    

In [21]:
def plot_reward(path, sequence, save_path='./loss'):

    sequence = extract_task_names(sequence)
    # Load the models associated 'updates.csv' file
    df = pd.read_csv(f'{path}/episodes.csv')
    
    # Extract the timestep
    training_step = df['global_step'][0] # Extract the first logged global_step

    # Extract the first global_step value at the start of every task -> Very messy, but good enough for now
    task_boundaries_steps = [list(df[df['task_index'] == i]['global_step'])[0] for i in range(len(sequence))]

    print(training_step)
    print(task_boundaries_steps)

    # Extract and plot x & y coordinates
    x, y = df['global_step'], df['episodic_return']
    plt.plot(x, y)
    plt.xlabel('Total Timesteps')
    plt.ylabel('Reward')
    
    # Add vertical lines across the plot to differentiate task training
    for indx, task in enumerate(sequence):
        if indx == 0: continue
        plt.axvline(x=task_boundaries_steps[indx], color=colours[indx-1], linestyle='dashed', alpha=0.4, label=f'{sequence[indx-1]}→{task}')
    
    plt.legend()
    plt.tight_layout()
    plt.plot()
    plt.savefig(save_path)
    plt.close()

# plot_reward(path, list(sequence.keys()), './rewards.svg')

In [25]:
def plot_reward(path, sequence, save_path='./rewards.svg'):
    import matplotlib.pyplot as plt
    import seaborn as sns
    import pandas as pd
    from scipy.signal import savgol_filter
    
    sequence = extract_task_names(sequence)
    # Load the models associated 'episodes.csv' file
    df = pd.read_csv(f'{path}/episodes.csv')
    
    # Set up modern styling
    plt.style.use('fivethirtyeight')
    _, ax = plt.subplots(figsize=(12, 8))
    
    # Modern color palette
    colors = ['#1f77b4', '#ff7f0e', "#25a325", "#d61e1e", '#9467bd']
    
    # Extract the timestep and boundaries
    task_boundaries_steps = [list(df[df['task_index'] == i]['global_step'])[0] for i in range(len(sequence))]
    
    # Extract and plot x & y coordinates with better styling
    x, y = df['global_step'], df['episodic_return']
    ax.plot(x, y, linewidth=2.5, alpha=0.8, color=colors[0], zorder=3)
    
    # Add vertical lines with modern styling
    for indx, task in enumerate(sequence):
        if indx == 0: continue
        ax.axvline(x=task_boundaries_steps[indx], color=colors[indx], linestyle='dashed', 
                  linewidth=2, alpha=0.7, zorder=2, label=f'{sequence[indx-1]}→{task}')
    
    # Improved styling
    ax.set_xlabel('Total Timesteps', fontsize=14, fontweight='bold')
    ax.set_ylabel('Episodic Return', fontsize=14, fontweight='bold')
    ax.set_title('Training Reward - Best HPO Trial', fontsize=16, fontweight='bold', pad=20)
    
    # Better grid and background
    ax.grid(True, alpha=0.75, linestyle='dashed', linewidth=0.5)
    ax.set_facecolor('#fafafa')
    
    # Modern legend
    legend = ax.legend(loc='lower right', frameon=True, fancybox=True, shadow=True, fontsize=12)
    legend.get_frame().set_facecolor('white')
    legend.get_frame().set_alpha(0.9)
    
    # Clean spines
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(1.5)
    ax.spines['bottom'].set_linewidth(1.5)
    
    # Set proper margins
    ax.margins(0)
    ax.set_ylim(0, 1)
    # Better tick labels
    ax.tick_params(axis='both', which='major', labelsize=12)
    
    plt.tight_layout()
    
    plt.savefig(save_path, dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
    plt.close()

plot_reward(path, list(sequence.keys()), './rewards.svg')

In [23]:
def plot_training_loss(path, sequence, save_path='./loss'):

    sequence = extract_task_names(sequence)
    # Load the models associated 'updates.csv' file
    df = pd.read_csv(f'{path}/updates.csv')
    
    # Extract the first global_step value at the start of every task -> Very messy, but good enough for now
    task_boundaries_steps = [list(df[df['task_index'] == i]['global_step'])[0] for i in range(len(sequence))]

    # Extract and plot x & y coordinates
    x, y = df['global_step'], df['policy_loss']
    plt.plot(x, y)
    plt.xlabel('Total Timesteps')
    plt.ylabel('Policy Loss')
    
    # Add vertical lines across the plot to differentiate task training
    for indx, task in enumerate(sequence):
        if indx == 0: continue
        plt.axvline(x=task_boundaries_steps[indx], color=colours[indx-1], linestyle='dashed', alpha=0.4, label=f'{sequence[indx-1]}→{task}')
    
    plt.legend()
    plt.tight_layout()
    plt.plot()
    plt.savefig(save_path)
    plt.close()

# plot_training_loss(path, list(sequence.keys()), './loss.svg')

In [24]:
def plot_loss_curve(path, sequence, save_path='./rewards.svg'):
    sequence = extract_task_names(sequence)
    # Load the models associated 'updates.csv' file
    df = pd.read_csv(f'{path}/updates.csv')
    
    # Set up modern styling
    plt.style.use('fivethirtyeight')
    _, ax = plt.subplots(figsize=(12, 8))
    
    # Modern color palette
    colors = ['#1f77b4', '#ff7f0e', "#25a325", "#d61e1e", '#9467bd']
    
    # Extract the timestep and boundaries
    task_boundaries_steps = [list(df[df['task_index'] == i]['global_step'])[0] for i in range(len(sequence))]
    
    # Extract and plot x & y coordinates with better styling
    x, y = df['global_step'], df['policy_loss']
    ax.plot(x, y, linewidth=2.5, alpha=0.8, color=colors[0], zorder=3)
    
    # Add vertical lines with modern styling
    for indx, task in enumerate(sequence):
        if indx == 0: continue
        ax.axvline(x=task_boundaries_steps[indx], color=colors[indx], linestyle='dashed', 
                  linewidth=2, alpha=0.7, zorder=2, label=f'{sequence[indx-1]}→{task}')
    
    # Improved styling
    ax.set_xlabel('Total Timesteps', fontsize=14, fontweight='bold')
    ax.set_ylabel('Policy Loss', fontsize=14, fontweight='bold')
    ax.set_title('Training Loss - Best HPO Trial', fontsize=16, fontweight='bold', pad=20)
    
    # Better grid and background
    ax.grid(True, alpha=0.75, linestyle='dashed', linewidth=0.5)
    ax.set_facecolor('#fafafa')
    
    # Modern legend
    legend = ax.legend(loc='lower right', frameon=True, fancybox=True, shadow=True, fontsize=12)
    legend.get_frame().set_facecolor('white')
    legend.get_frame().set_alpha(0.9)
    
    # Clean spines
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(1.5)
    ax.spines['bottom'].set_linewidth(1.5)
    
    # Set proper margins
    # ax.margins(0)
    # ax.set_ylim(0, 1)
    # Better tick labels
    ax.tick_params(axis='both', which='major', labelsize=12)
    
    plt.tight_layout()
    
    plt.savefig(save_path, dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
    plt.close()

plot_loss_curve(path, list(sequence.keys()), './loss.svg')