In [None]:
import pandas as pd

# --- Configuration ---
errors_csv_path = 'model/all_errors.csv'

# --- Load Data ---
# Load the full dataset of all model errors
try:
    df_all = pd.read_csv(errors_csv_path)
    print(f"Successfully loaded {errors_csv_path}. Shape: {df_all.shape}")
except FileNotFoundError:
    print(f"Error: File not found at {errors_csv_path}")
    print("Please ensure the path is correct relative to your notebook's location.")
    df_all = pd.DataFrame()

# --- Filter for the Exhaustive Search Dataset ---
if not df_all.empty:
    # These filters correspond to the exhaustive search you described for wf_id=151
    wf_id_filter = 151
    method_filter = 'autoregressive'
    horizon_filter = 8

    df_exhaustive = df_all[
        (df_all['wf_id'] == wf_id_filter) &
        (df_all['prediction_method'] == method_filter) &
        (df_all['forecast_horizon'] == horizon_filter)
    ].copy() # Use .copy() to avoid SettingWithCopyWarning

    df_exhaustive = df_exhaustive.dropna(axis=1, how='all')

    print(f"\nFiltered for exhaustive search (wf_id={wf_id_filter}, method='{method_filter}', horizon={horizon_filter}).")
    print(f"Shape of the filtered data: {df_exhaustive.shape}")

    # Display the first few rows to verify
    print("\nFirst 5 rows of the filtered data:")
    display(df_exhaustive.head())

In [None]:
import ast

def format_architecture_label(cell_str):
    """
    Converts a string representation of a list of cells (e.g., '[128, 64, 32]')
    into a simple neuron configuration string (e.g., '128-64-32').

    Args:
        cell_str (str): The string from the 'number_of_cells' column.

    Returns:
        str: A formatted string showing only the neuron counts.
    """
    try:
        # Safely evaluate the string to a list
        cells = ast.literal_eval(cell_str)
        # Join the cell numbers with hyphens
        return "-".join(map(str, cells))
    except (ValueError, SyntaxError):
        # Return the original string if it's not a valid list format
        return cell_str

# You can add other helper functions to this cell as we build more plots.
print("Helper functions defined.")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import ast
import os
from matplotlib.lines import Line2D
import matplotlib.patches as mpatches

# This cell assumes 'df_exhaustive' and 'format_architecture_label' are available.

if 'df_exhaustive' in locals() and not df_exhaustive.empty:

    # --- Data and Style Configuration ---
    # metrics_map = {
    #     'R^2_t+1': '$R^2$',
    #     'nRMSE_capacity_t+1': 'nRMSE',
    #     'nMAE_capacity_t+1': 'nMAE'
    # }
    metrics_map = {
        'R^2_overall': '$R^2$',
        'nMAE_overall': 'nMAE'
    }
    mean_props = {"marker":"o", "markerfacecolor":"white", "markeredgecolor":"black", "markersize":"8"}

    # Create output directory
    output_dir = os.path.join('plots_for_thesis', 'hyperparameter_plots')
    os.makedirs(output_dir, exist_ok=True)

    print("Generating final hyperparameter sensitivity plots for wf_id=151...\n")

    # --- 1. Plots for Simple Hyperparameters ---
    simple_params = ['learning_rate', 'batch_size', 'dropout_rate', 'epochs', 'time_steps']
    param_display_names = ['Learning Rate', 'Batch Size', 'Dropout Rate', 'Number of Epochs', 'Time Steps (Lookback Period)']

    for param_col, param_name in zip(simple_params, param_display_names):
        fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=True, dpi=200)
        fig.patch.set_facecolor('white')
        fig.suptitle(f'Model Performance vs. {param_name}', fontsize=20, y=0.98)

        for i, (metric_col, metric_name) in enumerate(metrics_map.items()):
            ax = axes[i]
            grouped_means = df_exhaustive.groupby(param_col)[metric_col].mean()
            norm = plt.Normalize(vmin=grouped_means.min(), vmax=grouped_means.max())
            cmap_name = 'RdYlGn_r' if 'n' in metric_col else 'RdYlGn'
            cmap = plt.get_cmap(cmap_name)
            palette = {val: cmap(norm(mean_val)) for val, mean_val in grouped_means.items()}

            sns.boxplot(data=df_exhaustive, x=param_col, y=metric_col, ax=ax, showmeans=True,
                        meanprops=mean_props, showfliers=False, palette=palette, hue=param_col, legend=False)
            ax.set_ylabel(metric_name, fontsize=14)
            ax.set_xlabel('')

        # --- Create Enhanced Legend ---
        mean_marker = Line2D([0], [0], marker='o', color='w', label='Mean Value', markerfacecolor='white', markeredgecolor='black', markersize=10)
        color_info = mpatches.Patch(color='none', label='Color: Green (Better) to Red (Worse)')
        fig.legend(handles=[mean_marker, color_info], loc='lower center', bbox_to_anchor=(0.5, 0.88),
                   ncol=2, frameon=False, fontsize=12)

        axes[-1].set_xlabel(param_name, fontsize=14)
        plt.tight_layout(rect=[0, 0, 1, 0.94])

        # Correctly save the figure
        output_path = os.path.join(output_dir, f'metrics_vs_wf151_{param_col}.png')
        plt.savefig(output_path, dpi=200)
        plt.show()
        plt.close(fig)

    # --- 2. Plot for Network Architecture ---
    print("\nGenerating combined plot for Network Architecture...\n")
    df_exhaustive['architecture_label'] = df_exhaustive['number_of_cells'].apply(format_architecture_label)

    def get_sort_key_raw(cell_str):
        cells = ast.literal_eval(cell_str)
        return (len(cells), sum(cells))
    sorted_cell_strings = sorted(df_exhaustive['number_of_cells'].unique(), key=get_sort_key_raw)
    sorted_labels = [format_architecture_label(s) for s in sorted_cell_strings]

    fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=True, dpi=200)
    fig.patch.set_facecolor('white')
    fig.suptitle('Model Performance vs. Layer Configration', fontsize=22)

    for i, (metric_col, metric_name) in enumerate(metrics_map.items()):
        ax = axes[i]
        grouped_means = df_exhaustive.groupby('architecture_label')[metric_col].mean()
        norm = plt.Normalize(vmin=grouped_means.min(), vmax=grouped_means.max())
        cmap_name = 'RdYlGn_r' if 'n' in metric_col else 'RdYlGn'
        cmap = plt.get_cmap(cmap_name)
        palette = {val: cmap(norm(mean_val)) for val, mean_val in grouped_means.items()}

        sns.boxplot(data=df_exhaustive, x='architecture_label', y=metric_col, order=sorted_labels, ax=ax,
                    showmeans=True, meanprops=mean_props, showfliers=False, palette=palette,
                    hue='architecture_label', legend=False)
        ax.set_ylabel(metric_name, fontsize=16)
        ax.set_xlabel('')

    # Add the same enhanced legend to this plot
    mean_marker = Line2D([0], [0], marker='o', color='w', label='Mean Value', markerfacecolor='white', markeredgecolor='black', markersize=10)
    color_info = mpatches.Patch(color='none', label='Color: Green (Better) to Red (Worse)')
    fig.legend(handles=[mean_marker, color_info], loc='lower center', bbox_to_anchor=(0.5, 0.88),
               ncol=2, frameon=False, fontsize=12)

    axes[-1].set_xlabel('LSTM Neuron Configuration', fontsize=16)
    plt.setp(axes[-1].get_xticklabels(), rotation=45, ha='right')

    plt.tight_layout(rect=[0, 0, 1, 0.95])
    output_path = os.path.join(output_dir, 'metrics_vs_wf151_architecture.png')
    plt.savefig(output_path, dpi=200)
    plt.show()
    plt.close(fig)

else:
    print("The 'df_exhaustive' DataFrame is not available. Please run Cell 1 first.")