In [None]:
import pandas as pd
from src.GLOBAL_VARS import NotTrustedPlants

# --- Configuration ---
errors_csv_path = 'model/all_errors.csv'

# Define the specific hyperparameter combinations used in the targeted search
# This ensures we are only analyzing the results from this specific experiment tier.
TARGETED_SEARCH_PARAMS = {
    'number_of_cells': ['[64, 32, 16]', '[128, 64, 32]', '[256, 128, 64]', '[512, 256, 128]'],
    'batch_size': [32, 64, 128],
    'epochs': [100],
    'learning_rate': [0.001, 0.01],
    'dropout_rate': [0.2],
    'forecast_horizon': [24],
    'time_steps': [1, 2, 4, 8]
}

MANUAL_EXCLUDE_WFS = [47, 49, 89, 154, 196] + NotTrustedPlants

# --- Load Data ---
try:
    df_all = pd.read_csv(errors_csv_path)
    print(f"Successfully loaded {errors_csv_path}. Shape: {df_all.shape}")
except FileNotFoundError:
    print(f"Error: File not found at {errors_csv_path}")
    df_all = pd.DataFrame()

# --- Filter for the Targeted Search Dataset ---
if not df_all.empty:
    df_targeted = df_all.copy()

    # Apply filters for the specific hyperparameter combinations
    for param, values in TARGETED_SEARCH_PARAMS.items():
        df_targeted = df_targeted[df_targeted[param].isin(values)]

    print(f"\nFiltered for targeted search parameters. Shape after filtering: {df_targeted.shape}")

    # Exclude manually specified wind farms
    initial_rows = len(df_targeted)
    df_targeted = df_targeted[~df_targeted['wf_id'].isin(MANUAL_EXCLUDE_WFS)]
    print(f"Removed {initial_rows - len(df_targeted)} rows for manually excluded wf_ids: {MANUAL_EXCLUDE_WFS}")

    df_targeted = df_targeted.dropna(axis=1, how='all')


    print(f"\nFinal shape of the targeted analysis data: {df_targeted.shape}")

    # Display the first few rows and unique wf_ids to verify
    print("\nFirst 5 rows of the final filtered data:")
    display(df_targeted.head())

    print("\nUnique Wind Farms in this dataset:")
    print(sorted(df_targeted['wf_id'].unique()))

In [None]:
df_targeted

In [None]:
import ast

def format_architecture_label(cell_str):
    """
    Converts a string representation of a list of cells (e.g., '[128, 64, 32]')
    into a simple neuron configuration string (e.g., '128-64-32').

    Args:
        cell_str (str): The string from the 'number_of_cells' column.

    Returns:
        str: A formatted string showing only the neuron counts.
    """
    try:
        # Safely evaluate the string to a list
        cells = ast.literal_eval(cell_str)
        # Join the cell numbers with hyphens
        return "-".join(map(str, cells))
    except (ValueError, SyntaxError):
        # Return the original string if it's not a valid list format
        return cell_str

# You can add other helper functions to this cell as we build more plots.
print("Helper functions defined.")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import ast
import os
from matplotlib.lines import Line2D
import matplotlib.patches as mpatches

# This cell assumes 'df_targeted' DataFrame from Cell 1.

if 'df_targeted' in locals() and not df_targeted.empty:

    # --- Data and Style Configuration ---
    df_targeted['architecture_label'] = df_targeted['number_of_cells'].apply(format_architecture_label)
    metrics_map = {
        'R^2_overall': '$R^2$',
        'nMAE_overall': 'nMAE'
    }
    mean_props = {"marker":"o", "markerfacecolor":"white", "markeredgecolor":"black", "markersize":"8"}
    output_dir = os.path.join('plots_for_thesis', 'hyperparameter_plots')
    os.makedirs(output_dir, exist_ok=True)

    print("Generating final hyperparameter sensitivity plots...")

    # --- 1. Plots for Simple Hyperparameters ---
    simple_params = ['learning_rate', 'batch_size', 'time_steps', 'dropout_rate', 'epochs']
    param_display_names = ['Learning Rate', 'Batch Size', 'Time Steps (Lookback Period)', 'Dropout Rate', 'Number of Epochs']

    for param_col, param_name in zip(simple_params, param_display_names):
        if df_targeted[param_col].nunique() > 1:
            fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=True, dpi=200)
            fig.patch.set_facecolor('white')
            fig.suptitle(f'Model Performance vs. {param_name}', fontsize=20, y=0.98)

            for i, (metric_col, metric_name) in enumerate(metrics_map.items()):
                ax = axes[i]
                grouped_means = df_targeted.groupby(param_col)[metric_col].mean()
                norm = plt.Normalize(vmin=grouped_means.min(), vmax=grouped_means.max())
                cmap_name = 'RdYlGn_r' if 'n' in metric_col else 'RdYlGn'
                cmap = plt.get_cmap(cmap_name)
                palette = {val: cmap(norm(mean_val)) for val, mean_val in grouped_means.items()}

                sns.boxplot(data=df_targeted, x=param_col, y=metric_col, ax=ax, showmeans=True,
                            meanprops=mean_props, showfliers=False, palette=palette, hue=param_col, legend=False)
                ax.set_ylabel(metric_name, fontsize=14)
                ax.set_xlabel('')

            # --- Create a clean, custom legend ---
            mean_marker = Line2D([0], [0], marker='o', color='w', label='Mean Value', markerfacecolor='white', markeredgecolor='black', markersize=10)
            color_info = mpatches.Patch(color='none', label='Color: Green (Better) to Red (Worse)')
            fig.legend(handles=[mean_marker, color_info], loc='lower center', bbox_to_anchor=(0.5, 0.88),
                       ncol=2, frameon=False, fontsize=12) # frameon=False removes background

            axes[-1].set_xlabel(param_name, fontsize=14)
            plt.tight_layout(rect=[0, 0, 1, 0.94])

            output_path = os.path.join(output_dir, f'metrics_vs_{param_name.replace(" ", "_")}_multi_farm.png')
            plt.savefig(output_path, dpi=200)
            plt.show()
            plt.close(fig)
        else:
            print(f"--> Skipping plot for '{param_name}' as it has only one value in this dataset.")

    # --- 2. Plot for Network Architecture ---
    print("\nGenerating plot for Network Architecture...\n")

    def get_sort_key_from_label(label):
        parts = label.split('-')
        return (len(parts), sum(map(int, parts)))
    sorted_labels = sorted(df_targeted['architecture_label'].unique(), key=get_sort_key_from_label)

    fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=True, dpi=200)
    fig.patch.set_facecolor('white')
    fig.suptitle('Model Performance vs. Layer Configration', fontsize=22, y=0.98)

    for i, (metric_col, metric_name) in enumerate(metrics_map.items()):
        ax = axes[i]
        grouped_means = df_targeted.groupby('architecture_label')[metric_col].mean()
        norm = plt.Normalize(vmin=grouped_means.min(), vmax=grouped_means.max())
        cmap_name = 'RdYlGn_r' if 'n' in metric_col else 'RdYlGn'
        cmap = plt.get_cmap(cmap_name)
        palette = {val: cmap(norm(mean_val)) for val, mean_val in grouped_means.items()}

        sns.boxplot(data=df_targeted, x='architecture_label', y=metric_col, order=sorted_labels, ax=ax,
                    showmeans=True, meanprops=mean_props, showfliers=False, palette=palette,
                    hue='architecture_label', legend=False)
        ax.set_ylabel(metric_name, fontsize=16)
        ax.set_xlabel('')

    # --- Add the same clean legend here ---
    mean_marker = Line2D([0], [0], marker='o', color='w', label='Mean Value', markerfacecolor='white', markeredgecolor='black', markersize=10)
    color_info = mpatches.Patch(color='none', label='Color: Green (Better) to Red (Worse)')
    fig.legend(handles=[mean_marker, color_info], loc='lower center', bbox_to_anchor=(0.5, 0.88),
               ncol=2, frameon=False, fontsize=12)

    axes[-1].set_xlabel('LSTM Neuron Configuration', fontsize=16)
    plt.setp(axes[-1].get_xticklabels(), rotation=45, ha='right')

    plt.tight_layout(rect=[0, 0, 1, 0.95])
    output_path = os.path.join(output_dir, 'metrics_vs_architecture_multi_farm.png')
    plt.savefig(output_path, dpi=200)
    plt.show()
    plt.close(fig)

else:
    print("The 'df_targeted' DataFrame is not available. Please run Cell 1 first.")