In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize, LinearSegmentedColormap
import matplotlib.cm as cm
import logging

In [None]:
def find_center_of_excellence(df, metric, params, tolerance=0.01, alpha=0.5):
    """
    Improved version: favors high metric values more strongly than density,
    so it won't drift toward dense low regions.
    alpha ‚àà [0,1] controls the balance: 0.5 = equal, >0.5 favors metric.
    """
    from scipy.stats import gaussian_kde
    import numpy as np

    top_val = df[metric].max()
    elite_df = df[df[metric] >= top_val - tolerance * top_val]

    if len(elite_df) == 0:
        return df.loc[df[metric].idxmax()]

    try:
        param_values = elite_df[params].astype(float).values.T
        kde = gaussian_kde(param_values)
        densities = kde(param_values)
    except Exception:
        return elite_df.loc[elite_df[metric].idxmax()]

    # Normalize both contributions
    metric_norm = (elite_df[metric] - elite_df[metric].min()) / (elite_df[metric].max() - elite_df[metric].min() + 1e-9)
    density_norm = densities / np.max(densities)

    # Weighted combination: stronger preference for high metric values
    scores = (metric_norm ** alpha) * (density_norm ** (1 - alpha))

    best_idx = scores.idxmax()
    return elite_df.loc[best_idx]


In [None]:
# --- SUPPRESS DEBUG FONT LOGGING ---
mpl_logger = logging.getLogger('matplotlib')
mpl_logger.setLevel(logging.WARNING)
# ------------------------------------

# 'coolwarm' goes from blue (cool, low) to white (mid) to red (warm, high).
cmap_name = 'turbo'
cmap = cm.get_cmap(cmap_name)

# Get the colors for the extreme ends of the colormap
kde_colors = {
    'good': cmap(1.0),  # Red end of coolwarm
    'bad': cmap(0.0)    # Blue end of coolwarm
}
# ----------------------------------------------------------------------

In [None]:

# ---------------------------------------------
# 1. Refactored Function: Optimized for Continuous Metrics
# ---------------------------------------------
def plot_corner_metric_continuous(df, file_name=None, metrics=['f1_score', 'recall', 'precision'], f1_threshold=0.9, dpi=300, save_fig=False):

    print("Generating Corner Plots with Continuous Metric and Consistent KDE Coloring...")

    cmap_dict = {
        'f1_score': cmap_name,
        'recall': cmap_name,
        'precision': cmap_name
    }
    
    param_names = {
        'sigma': 'Sigma ($\sigma$)',
        'gauss_threshold': 'Gauss Threshold',
        'overlap_threshold': 'Overlap Threshold',
        'center_distance': 'Center Distance (D)',
        'pair_gap': 'Pair Gap (g)',
        'number_samples': 'Number of Samples'
    }
    params = list(param_names.keys())

    # --- üõ†Ô∏è NEW FIX: Ensure all hyperparameter columns are float ---
    for p in params:
        # We use .copy() to avoid setting a value on a slice warning if df is a subset
        if p in df.columns:
            try:
                df[p] = df[p].astype(float)
            except ValueError:
                print(f"Warning: Could not convert parameter '{p}' to float. Check data types.")
    # --------------------------------------------------------------------


    # --- Diagonal Plot Function (KDE with F1 Threshold) ---
    def plot_kde_for_param(ax, df, param, metric, best_value, threshold):
        # Apply the threshold for the KDE plot
        if metric == 'f1_score':
            df_good = df[df[metric] >= threshold]
            df_bad = df[df[metric] < threshold]
        else:
            median_val = df[metric].median()
            df_good = df[df[metric] >= median_val]
            df_bad = df[df[metric] < median_val]
        
        # Plot 'Bad' distribution (using the cmap's 'bad' color)
        if len(df_bad) > 0:
            sns.kdeplot(x=df_bad[param], ax=ax,
                        color=kde_colors['bad'], fill=True, alpha=0.5, linewidth=1.5,
                        label=f'Low {metric.upper()} (n={len(df_bad)})', bw_method=0.1)
        
        # Plot 'Good' distribution (using the cmap's 'good' color)
        if len(df_good) > 0:
            sns.kdeplot(x=df_good[param], ax=ax,
                        color=kde_colors['good'], fill=True, alpha=0.5, linewidth=1.5,
                        label=f'High {metric.upper()} (n={len(df_good)})', bw_method=0.1)
        
        # Mark the best parameter value
        #ax.axvline(best_value, color='black', linestyle='--', linewidth=2.5,
                      #label=f'Best param: {best_value:.3f}')
        
        ax.set_ylabel('Density')
        ax.legend(fontsize=7, loc='lower right')
        ax.set_title(param_names.get(param, param), fontsize=10, fontweight='bold', y=1.0)

    # --- Off-Diagonal Plot Function (Scatter with Continuous Color) ---
    def plot_scatter_pair(ax, df, x_param, y_param, metric, best_params, cmap, norm, f1_threshold):

        # 1. Determine the "Good" subset based on the metric logic
        if metric == 'f1_score':
            df_good = df[df[metric] >= f1_threshold]
        else:
            median_val = df[metric].median()
            df_good = df[df[metric] >= median_val]
        
        # 2. Calculate correlation ONLY on the "Good" subset
        if len(df_good) > 1:
            # We already ensured param columns are float at the start
            corr = np.corrcoef(df_good[x_param], df_good[y_param])[0, 1]
        else:
            corr = np.nan

        # Scatter Plot (using the full df for visualization)
        scatter = ax.scatter(df[x_param], df[y_param],
                             c=df[metric], cmap=cmap, norm=norm,
                             alpha=0.7, s=10, edgecolors='black', linewidth=0.2)
        
        # Mark the best point
        #ax.scatter(best_params[x_param], best_params[y_param],
                   #color='white', s=200, marker='*', edgecolors='black', linewidth=1.5, zorder=10)
        
        # 3. Annotate the correlation
        corr_str = f'œÅ(Good) = {corr:.2f}' if not np.isnan(corr) else 'œÅ(Good) = N/A'
        ax.annotate(corr_str, xy=(0.05, 0.95), xycoords='axes fraction',
                      bbox=dict(boxstyle="round,pad=0.3", facecolor='white', alpha=0.9),
                      fontsize=8)
        
        return scatter

    # --- Axis Setup Function ---
    def setup_axes(ax, i, j, n_params, params, param_names):
        if i == n_params - 1:
            ax.set_xlabel(param_names.get(params[j], params[j]), fontsize=10)
        else:
            ax.set_xlabel('')
            ax.tick_params(labelbottom=False)

        if j == 0 and i != 0:
            ax.set_ylabel(param_names.get(params[i], params[i]), fontsize=10)
        else:
            ax.set_ylabel('')
            ax.tick_params(labelleft=False)

        ax.tick_params(labelsize=8)
        ax.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)

    # --- Main Plotting Loop ---
    for metric in metrics:
        print(f" ¬†Plotting {metric}...")
        
        best_params = find_center_of_excellence(df, metric, params, tolerance=0.05, alpha=0.9)

        
        n_params = len(params)
        fig, axes = plt.subplots(n_params, n_params, figsize=(18, 16))
        
        fig.suptitle(f'Visualizing Hyperparameter Interaction in Optimal Performance Regions\n' f'Two close magic numbers\n' 
                 f'Metric: {metric.upper()}' , fontsize=18, fontweight='bold', y=0.98)
        
        cmap_name_metric = cmap_dict.get(metric, cmap_name)
        cmap_obj = cm.get_cmap(cmap_name_metric)
        norm = Normalize(vmin=df[metric].min(), vmax=df[metric].max())

        last_scatter = None 

        for i in range(n_params):
            for j in range(n_params):
                ax = axes[i, j]
                
                if i == j: # Diagonal: KDE plot
                    ax.tick_params(labelleft=False, labelbottom=False)
                    ax.set_xlabel('')
                    ax.set_ylabel('')
                    plot_kde_for_param(ax, df, params[i], metric, best_params[params[i]], f1_threshold)
                elif i > j: # Lower Triangle: Scatter plot
                    last_scatter = plot_scatter_pair(ax, df, params[j], params[i], metric, best_params, cmap_obj, norm, f1_threshold)
                else: # Upper Triangle: Turn off axis
                    ax.axis('off')
                
                setup_axes(ax, i, j, n_params, params, param_names)

        # Add Color Bar
        if last_scatter:
            cbar_ax = fig.add_axes([0.94, 0.1, 0.02, 0.8])
            cbar = fig.colorbar(last_scatter, cax=cbar_ax)
            cbar.set_label(f'{metric.upper()} Value', rotation=270, labelpad=20, fontsize=12, fontweight='bold')

        plt.subplots_adjust(left=0.05, right=0.93, bottom=0.05, top=0.9, wspace=0.1, hspace=0.1)

        if save_fig == True:
            plt.savefig(f"CP_{file_name}_dpi{dpi}_{metric}.png", dpi=dpi, bbox_inches='tight')
            
        plt.show()

In [None]:
# Load your data
filename = "2m_s0-50_G0-1_o0-100_d0-100_g0.0-50.0_n1000_rd10000.csv"
df_results = pd.read_csv(filename)

# Check what your data looks like
print("Data shape:", df_results.shape)
print("Columns:", df_results.columns.tolist())
print("\nFirst few rows:")
print(df_results.head())

plot_corner_metric_continuous(
    df=df_results, 
    metrics=['f1_score', 'recall', 'precision'], 
    f1_threshold=0.9, 
    file_name=filename, 
    dpi=500, 
    save_fig=False
)

In [None]:
# ---------------------------------------------
# 1. Refactored Function: Optimized for Consistent Coloring (excluding 'pair_gap')
# ---------------------------------------------
def plot_corner_metric_continuous_nogap(df, file_name=None, metrics=['f1_score', 'recall', 'precision'], f1_threshold=0.9, dpi=300, save_fig=False):
    print("Generating Corner Plots with Consistent Coloring (excluding 'pair_gap')...")

    # Define color maps (using coolwarm consistently)
    cmap_dict = {
        'f1_score': cmap_name,
        'recall': cmap_name,
        'precision': cmap_name
    }

    # Parameter definitions (excluding 'pair_gap')
    param_names = {
        'sigma': 'Sigma ($\sigma$)',
        'gauss_threshold': 'Gauss Threshold',
        'overlap_threshold': 'Overlap Threshold',
        'center_distance': 'Center Distance (D)',
        'number_samples': 'Number of Samples'
    }
    params = list(param_names.keys())

    # --- üõ†Ô∏è FIX 3: Ensure all hyperparameter columns are float for correlation ---
    for p in params:
        if p in df.columns:
            try:
                # Convert the column to float for reliable correlation calculation
                df[p] = df[p].astype(float)
            except ValueError:
                print(f"Warning: Could not convert parameter '{p}' to float. Skipping conversion.")
    # ----------------------------------------------------------------------------


    # --- Diagonal Plot Function (KDE with F1 Threshold) ---
    def plot_kde_for_param(ax, df, param, metric, best_value, threshold):
        # This function now uses the 'kde_colors' derived from 'coolwarm'
        if metric == 'f1_score':
            df_good = df[df[metric] >= threshold]
            df_bad = df[df[metric] < threshold]
        else:
            median_val = df[metric].median()
            df_good = df[df[metric] >= median_val]
            df_bad = df[df[metric] < median_val]
        
        # Plot 'Bad' distribution
        if len(df_bad) > 0:
            sns.kdeplot(x=df_bad[param], ax=ax,
                        color=kde_colors['bad'], fill=True, alpha=0.5, linewidth=1.5,
                        label=f'Low {metric.upper()} (n={len(df_bad)})', bw_method=0.1)
        
        # Plot 'Good' distribution
        if len(df_good) > 0:
            sns.kdeplot(x=df_good[param], ax=ax,
                        color=kde_colors['good'], fill=True, alpha=0.5, linewidth=1.5,
                        label=f'High {metric.upper()} (n={len(df_good)})', bw_method=0.1)
        
        #ax.axvline(best_value, color='black', linestyle='--', linewidth=2.5,
                      #label=f'Best param: {best_value:.3f}')
        
        ax.set_ylabel('Density')
        ax.legend(fontsize=7, loc='lower right')
        ax.set_title(param_names.get(param, param), fontsize=10, fontweight='bold', y=1.0)

    # --- Off-Diagonal Plot Function (Scatter with Continuous Color and Selective Correlation) ---
    # FIX 2: Added f1_threshold to the signature for 'Good' subset filtering
    def plot_scatter_pair(ax, df, x_param, y_param, metric, best_params, cmap, norm, f1_threshold):
        
        # 1. Determine the "Good" subset based on the metric logic (for correlation)
        if metric == 'f1_score':
            df_good = df[df[metric] >= f1_threshold]
        else:
            median_val = df[metric].median()
            df_good = df[df[metric] >= median_val]
        
        # 2. Calculate correlation ONLY on the "Good" subset
        # This addresses your concern about noisy correlation across all points
        if len(df_good) > 1:
            corr = np.corrcoef(df_good[x_param], df_good[y_param])[0, 1]
        else:
            corr = np.nan # Cannot calculate correlation with 0 or 1 point

        # Scatter plot (uses the full df for visualization, colored by continuous metric)
        scatter = ax.scatter(df[x_param], df[y_param],
                             c=df[metric], cmap=cmap, norm=norm,
                             alpha=0.7, s=10, edgecolors='black', linewidth=0.2)
        
        # Mark the best point
        #ax.scatter(best_params[x_param], best_params[y_param],
                   #color='white', s=200, marker='*', edgecolors='black', linewidth=1.5, zorder=10)
        
        # 3. Annotate the correlation
        corr_str = f'œÅ(Good) = {corr:.2f}' if not np.isnan(corr) else 'œÅ(Good) = N/A'
        ax.annotate(corr_str, xy=(0.05, 0.95), xycoords='axes fraction',
                      bbox=dict(boxstyle="round,pad=0.3", facecolor='white', alpha=0.9),
                      fontsize=8)
        
        return scatter

    # --- Axis Setup Function ---
    def setup_axes(ax, i, j, n_params, params, param_names):
        # Set X-axis label only for the bottom row
        if i == n_params - 1:
            ax.set_xlabel(param_names.get(params[j], params[j]), fontsize=10)
        else:
            ax.set_xlabel('')
            ax.tick_params(labelbottom=False)

        # Set Y-axis label only for the left column (excluding diagonal)
        if j == 0 and i != 0:
            ax.set_ylabel(param_names.get(params[i], params[i]), fontsize=10)
        else:
            ax.set_ylabel('')
            ax.tick_params(labelleft=False)

        ax.tick_params(labelsize=8)
        ax.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)

    # --- Main Plotting Loop ---
    for metric in metrics:
        print(f" ¬†Plotting {metric}...")
        
        best_params = find_center_of_excellence(df, metric, params, tolerance=0.05, alpha=0.9)
        
        n_params = len(params)
        # Figure size adjusted for 5x5 grid (since 'pair_gap' was removed from the previous 6-parameter set)
        fig, axes = plt.subplots(n_params, n_params, figsize=(15, 13)) 
        
        fig.suptitle(f'Visualizing Hyperparameter Interaction in Optimal Performance Regions\n' f'One magic number\n' 
                 f'Metric: {metric.upper()}' , fontsize=18, fontweight='bold', y=0.98)
        
        cmap_name_metric = cmap_dict.get(metric, cmap_name)
        cmap_obj = cm.get_cmap(cmap_name_metric)
        norm = Normalize(vmin=df[metric].min(), vmax=df[metric].max())

        last_scatter = None 

        for i in range(n_params):
            for j in range(n_params):
                ax = axes[i, j]
                
                if i == j: # Diagonal: KDE plot
                    ax.tick_params(labelleft=False, labelbottom=False)
                    ax.set_xlabel('')
                    ax.set_ylabel('')
                    plot_kde_for_param(ax, df, params[i], metric, best_params[params[i]], f1_threshold)
                elif i > j: # Lower Triangle: Scatter plot
                    # FIX 2: Pass f1_threshold to the scatter function
                    last_scatter = plot_scatter_pair(ax, df, params[j], params[i], metric, best_params, cmap_obj, norm, f1_threshold)
                else: # Upper Triangle: Turn off axis
                    ax.axis('off')
                
                setup_axes(ax, i, j, n_params, params, param_names)

        # Add Color Bar
        if last_scatter:
            cbar_ax = fig.add_axes([0.94, 0.1, 0.02, 0.8])
            cbar = fig.colorbar(last_scatter, cax=cbar_ax)
            cbar.set_label(f'{metric.upper()} Value', rotation=270, labelpad=20, fontsize=12, fontweight='bold')

        plt.subplots_adjust(left=0.07, right=0.93, bottom=0.07, top=0.9, wspace=0.1, hspace=0.1)

        if save_fig == True:
            plt.savefig(f"CP_{filename}_dpi{dpi}_{metric}.png", dpi=dpi, bbox_inches='tight')

        plt.show()

In [None]:
# Load your data
filename = "1m_s0-10_g0-1_o0-100_D0-30_n1000_rd10000.csv"
df_results = pd.read_csv(filename)

# Check what your data looks like
print("Data shape:", df_results.shape)
print("Columns:", df_results.columns.tolist())
print("\nFirst few rows:")
print(df_results.head())

plot_corner_metric_continuous_nogap(
    df=df_results, 
    metrics=['f1_score', 'recall', 'precision'], 
    f1_threshold=0.9, 
    file_name=filename, 
    dpi=600, 
    save_fig=False
)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import matplotlib.cm as cm
import seaborn as sns # Assuming this is available, based on the original function



# Parameter definitions (copied from the original script)
param_names = {
    'sigma': 'Sigma ($\sigma$)',
    'gauss_threshold': 'Gauss Threshold',
    'overlap_threshold': 'Overlap Threshold',
    'center_distance': 'Center Distance (D)',
    'pair_gap': 'Pair Gap (g)',
    'number_samples': 'Number of Samples'
    
}
# --------------------------------------------------------------------------------

def plot_zoomed_pairwise_correlation(df: pd.DataFrame, 
                                     param_x: str, 
                                     param_y: str, 
                                     metric: str, 
                                     range_x: tuple, 
                                     range_y: tuple, 
                                     f1_threshold: float = 0.9,
                                     param_names: dict = param_names):
    """
    Plots a zoomed pairwise scatter correlation of two hyperparameters, 
    colored by a performance metric. The correlation coefficient (rho) 
    is calculated only on the 'Good' subset of data.

    Args:
        df (pd.DataFrame): DataFrame containing hyperparameter and metric results.
        param_x (str): Name of the hyperparameter for the X-axis.
        param_y (str): Name of the hyperparameter for the Y-axis.
        metric (str): Name of the performance metric ('f1_score', 'recall', 'precision').
        range_x (tuple): (min, max) for the X-axis zoom range.
        range_y (tuple): (min, max) for the Y-axis zoom range.
        f1_threshold (float, optional): The F1-score threshold for defining 
                                        the 'Good' subset if metric is 'f1_score'. 
                                        Defaults to 0.9.
        param_names (dict, optional): Dictionary of parameter names for plotting labels.
    """
    if metric not in ['f1_score', 'recall', 'precision']:
        raise ValueError("Metric must be 'f1_score', 'recall', or 'precision'.")
    if param_x not in df.columns or param_y not in df.columns:
        raise ValueError("One or both specified parameters are not in the DataFrame.")
    if metric not in df.columns:
        raise ValueError(f"Metric '{metric}' is not in the DataFrame.")

    # 1. Ensure hyperparameters are float (as in original function)
    for p in [param_x, param_y]:
        if p in df.columns:
            try:
                df[p] = df[p].astype(float)
            except ValueError:
                print(f"Warning: Could not convert parameter '{p}' to float.")

    # 2. Filter the DataFrame to the specified ranges (Zoom)
    df_zoomed = df[
        (df[param_x] >= range_x[0]) & (df[param_x] <= range_x[1]) &
        (df[param_y] >= range_y[0]) & (df[param_y] <= range_y[1])
    ].copy()

    if df_zoomed.empty:
        print("No data points found in the specified zoom range. Plotting empty figure.")
        fig, ax = plt.subplots(figsize=(8, 8))
        ax.set_title("No Data Points in Range", fontsize=14)
        ax.set_xlabel(param_names.get(param_x, param_x))
        ax.set_ylabel(param_names.get(param_y, param_y))
        plt.show()
        return

    # 3. Determine the "Good" subset (for correlation calculation)
    if metric == 'f1_score':
        df_good = df_zoomed[df_zoomed[metric] >= f1_threshold]
    else:
        median_val = df_zoomed[metric].median()
        df_good = df_zoomed[df_zoomed[metric] >= median_val]
    
    # 4. Calculate correlation ONLY on the "Good" subset
    corr = np.nan
    if len(df_good) > 1:
        corr = np.corrcoef(df_good[param_x], df_good[param_y])[0, 1]
    
    # 5. Setup Plotting components
    cmap_obj = cm.get_cmap(cmap_name)
    # Norm uses the min/max of the METRIC *within the zoomed range*
    norm = Normalize(vmin=df_zoomed[metric].min(), vmax=df_zoomed[metric].max())

    # 6. Generate Plot
    fig, ax = plt.subplots(figsize=(10, 8))
    
    scatter = ax.scatter(df_zoomed[param_x], df_zoomed[param_y],
                         c=df_zoomed[metric], cmap=cmap_obj, norm=norm,
                         alpha=0.8, s=40, edgecolors='black', linewidth=0.3)
    
    # Set labels and title
    ax.set_xlabel(param_names.get(param_x, param_x), fontsize=14)
    ax.set_ylabel(param_names.get(param_y, param_y), fontsize=14)
    ax.set_title(f'Pairwise Correlation: {param_names.get(param_x, param_x)} vs {param_names.get(param_y, param_y)}\n'
                 f' {metric.upper()} Metric', fontsize=16, fontweight='bold')
    
    # Set the explicit axis limits based on the input range (ensuring the 'zoom')
    ax.set_xlim(range_x)
    ax.set_ylim(range_y)

    # Annotate the correlation
    corr_str = f'œÅ(Good) = {corr:.2f}' if not np.isnan(corr) else 'œÅ(Good) = N/A (Insufficient Good Data)'
    ax.annotate(corr_str, xy=(0.05, 0.95), xycoords='axes fraction',
                bbox=dict(boxstyle="round,pad=0.5", facecolor='lightgray', alpha=0.8),
                fontsize=12, fontweight='bold')
    
    # Add Color Bar
    cbar = fig.colorbar(scatter, ax=ax)
    cbar.set_label(f'{metric.upper()} Value (Range: {norm.vmin:.2f} to {norm.vmax:.2f})', 
                   rotation=270, labelpad=25, fontsize=12, fontweight='bold')

    ax.grid(True, alpha=0.5, linestyle='--')
    plt.tight_layout()
    plt.show()
    # Save option (uncomment if needed)
    # plt.savefig(f"zoomed_correlation_{param_x}_{param_y}_{metric}.png", dpi=300, bbox_inches='tight')

In [None]:
import winsound
import time

def mario_one_up():
    notes = [1319, 1568, 2637, 2093, 2349, 3136, 2637]
    durations = [100, 100, 100, 100, 100, 100, 300]
    for n, d in zip(notes, durations):
        winsound.Beep(n, d)
        time.sleep(0.02)

mario_one_up()
