In [3]:
import os
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

In [4]:
def load_history_from_folder(folder_path):
    """Loads all .pkl metric files from a given folder into a dictionary."""
    history = {}
    metrics_dir = os.path.join(folder_path, 'metrics')

    for file_name in os.listdir(metrics_dir):
        if file_name.endswith('.pkl'):
            metric_name = file_name.replace('.pkl', '')
            file_path = os.path.join(metrics_dir, file_name)
            with open(file_path, 'rb') as f:
                history[metric_name] = pickle.load(f)
    return history

In [5]:
def get_experiment_order(folder_name):
    """Helper function to sort experiment folders chronologically."""
    if folder_name == 'initial':
        return 0
    try:
        # Extracts the number from folders like 'adam_from_epoch51'
        return int(folder_name.split('epoch')[-1])
    except (ValueError, IndexError):
        return 999 # Put unrecognized folders at the end

In [6]:
def combine_histories(base_dir='runs'):
    """Finds all experiment folders, loads their histories, and combines them."""
    experiment_folders = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]
    
    # Sort folders chronologically based on their name
    experiment_folders.sort(key=get_experiment_order)
    
    print(f"Found and ordered experiments: {experiment_folders}")
    
    full_history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}
    phase_boundaries = []
    
    for folder in experiment_folders:
        folder_path = os.path.join(base_dir, folder)
        history = load_history_from_folder(folder_path)
        
        if history:
            # Mark the end of the previous phase / start of the new one
            if full_history['train_loss']: # Don't add a boundary before the first phase
                phase_boundaries.append(len(full_history['train_loss']))
            
            # Append new data
            for key in full_history.keys():
                if key in history:
                    full_history[key].extend(history[key])

    return full_history, phase_boundaries, experiment_folders

In [7]:
def plot_learning_curves(history, boundaries, save_path='learning_curves.png'):
    """Plots and saves the training and validation learning curves."""
    sns.set_style("whitegrid")
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
    
    epochs = range(1, len(history['train_loss']) + 1)
    
    # Plotting Loss
    ax1.plot(epochs, history['train_loss'], 'b-', label='Training Loss')
    ax1.plot(epochs, history['val_loss'], 'r-', label='Validation Loss')
    ax1.set_ylabel('Loss', fontsize=14)
    ax1.set_title('Training and Validation Loss Over Time', fontsize=16)
    ax1.legend(fontsize=12)
    
    # Plotting Accuracy
    ax2.plot(epochs, history['train_acc'], 'b-', label='Training Accuracy')
    ax2.plot(epochs, history['val_acc'], 'r-', label='Validation Accuracy')
    ax2.set_xlabel('Epochs', fontsize=14)
    ax2.set_ylabel('Accuracy', fontsize=14)
    ax2.set_title('Training and Validation Accuracy Over Time', fontsize=16)
    ax2.legend(fontsize=12)
    
    # Add vertical lines and text for phase transitions
    for boundary in boundaries:
        ax1.axvline(x=boundary, color='gray', linestyle='--', linewidth=2)
        ax2.axvline(x=boundary, color='gray', linestyle='--', linewidth=2)
        ax2.text(boundary + 1, 0.5, f'Phase Change', rotation=90, verticalalignment='center', color='gray')

    plt.tight_layout()
    plt.savefig(save_path, dpi=300)
    print(f"Learning curves saved to {save_path}")
    plt.show()

In [12]:
def plot_final_accuracies(base_dir='runs', save_path='final_accuracies.png'):
    """Creates a bar plot comparing the best validation accuracy from each run."""
    experiment_folders = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d)) and not d.startswith('.')]
    experiment_folders.sort(key=get_experiment_order)
    
    results = []
    for folder in experiment_folders:
        checkpoint_path = os.path.join(base_dir, folder, 'checkpoint.pth')
        # Handle the initial run's different checkpoint name
        if folder == 'initial' and not os.path.exists(checkpoint_path):
             checkpoint_path = os.path.join(base_dir, folder, 'checkpoint_epoch_50.pth')

        if os.path.exists(checkpoint_path):
            checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu'))
            best_acc = checkpoint.get('best_val_accuracy', 0)
            results.append({'Experiment': folder, 'Best Validation Accuracy': best_acc * 100})
            
    if not results:
        print("Could not find any checkpoint files to generate final accuracy plot.")
        return

    df = pd.DataFrame(results)
    
    plt.figure(figsize=(12, 7))
    sns.set_style("whitegrid")
    bar_plot = sns.barplot(x='Best Validation Accuracy', y='Experiment', data=df, palette='viridis', orient='h')
    
    plt.title('Best Validation Accuracy per Experiment Phase', fontsize=16)
    plt.xlabel('Accuracy (%)', fontsize=14)
    plt.ylabel('Experiment Phase', fontsize=14)
    plt.xlim(80, 95) # Focus on the relevant accuracy range
    
    # Add labels to the bars
    for index, value in enumerate(df['Best Validation Accuracy']):
        bar_plot.text(value, index, f' {value:.2f}%', color='black', ha="left", va='center')

    plt.tight_layout()
    plt.savefig(save_path, dpi=300)
    print(f"✅ Final accuracies bar chart saved to {save_path}")
    plt.show()


In [13]:

BASE_RUN_DIR = 'runs'
full_history, boundaries, folder_names = combine_histories(BASE_RUN_DIR)

if full_history['train_loss']:
    # 2. Plot the continuous learning curves
    plot_learning_curves(full_history, boundaries)
        
    # 3. Plot a bar chart comparing the best performance of each phase
    plot_final_accuracies(BASE_RUN_DIR)
else:
    print("No data found to plot.")

Found and ordered experiments: ['initial', 'adam_from_epoch51', 'adam_from_epoch71', 'adam_from_epoch101', 'adam_from_epoch121', 'adam_from_epoch141', '.ipynb_checkpoints']


FileNotFoundError: [Errno 2] No such file or directory: 'runs/.ipynb_checkpoints/metrics'