In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path
import re
import json

# --- 1. Configuration: PLEASE UPDATE THIS PATH ---
# This should be the 'original_base_save_path' from your experiment script.
BASE_RESULTS_PATH = Path("./results")
# ----------------------------------------------------

# --- Constants (usually no need to change these) ---
# The main log file with metrics per round.
CSV_FILENAME = "training_log.csv"
# The configuration snapshot saved for each run.
CONFIG_FILENAME = "config_snapshot.json"
# The file containing just the final evaluation metrics.
FINAL_METRICS_FILENAME = "final_metrics.json"


def load_all_run_logs(base_path: Path) -> pd.DataFrame:
    """
    Recursively finds all training log CSVs and combines them into a single DataFrame.

    For each run, it reads the training log, extracts metadata (run number, seed)
    from the directory path, and loads the run's specific configuration.

    Args:
        base_path: The root directory containing all experiment run folders.

    Returns:
        A single pandas DataFrame with the time-series data from all runs.
    """
    all_dfs = []
    print(f"üîç Searching for '{CSV_FILENAME}' files in '{base_path}'...")
    result_files = list(base_path.rglob(f"**/{CSV_FILENAME}"))

    if not result_files:
        print(f"‚ùå No result files named '{CSV_FILENAME}' found in '{base_path}'.")
        print("   Please check the 'BASE_RESULTS_PATH' variable.")
        return pd.DataFrame()

    print(f"‚úÖ Found {len(result_files)} result files. Loading full logs...")
    for result_file in result_files:
        try:
            df = pd.read_csv(result_file)
            run_path = result_file.parent
            match = re.search(r"run_(\d+)_seed_(\d+)", str(run_path))
            if match:
                df["run_id"] = int(match.group(1))
                df["seed"] = int(match.group(2))

            # --- Load Configuration from JSON ---
            config_file = run_path / CONFIG_FILENAME
            if config_file.exists():
                with open(config_file, 'r') as f:
                    config = json.load(f)
                    # Add relevant config parameters to the dataframe
                    if 'experiment' in config:
                        for key, value in config['experiment'].items():
                            df[f"param_{key}"] = str(value) # Use str to be safe
                    if 'model' in config:
                         for key, value in config['model'].items():
                            df[f"param_model_{key}"] = str(value)

            all_dfs.append(df)
        except Exception as e:
            print(f"‚ö†Ô∏è Could not load or process file {result_file}: {e}")

    if not all_dfs:
        return pd.DataFrame()

    combined_df = pd.concat(all_dfs, ignore_index=True)
    return combined_df

def load_final_metrics(base_path: Path) -> pd.DataFrame:
    """
    Loads the 'final_metrics.json' from all runs.
    """
    all_metrics = []
    print(f"\nüîç Searching for '{FINAL_METRICS_FILENAME}' files in '{base_path}'...")
    metric_files = list(base_path.rglob(f"**/{FINAL_METRICS_FILENAME}"))

    if not metric_files:
        print(f"‚ùå No final metrics files named '{FINAL_METRICS_FILENAME}' found.")
        return pd.DataFrame()

    print(f"‚úÖ Found {len(metric_files)} final metrics files. Loading...")
    for metric_file in metric_files:
        try:
            with open(metric_file, 'r') as f:
                metrics = json.load(f)

            # Extract metadata from path
            run_path = metric_file.parent
            match = re.search(r"run_(\d+)_seed_(\d+)", str(run_path))
            if match:
                metrics["run_id"] = int(match.group(1))
                metrics["seed"] = int(match.group(2))

            all_metrics.append(metrics)
        except Exception as e:
            print(f"‚ö†Ô∏è Could not load or process file {metric_file}: {e}")

    if not all_metrics:
        return pd.DataFrame()

    final_df = pd.DataFrame(all_metrics)
    return final_df

# --- 2. Load the Data ---
print("--- Starting Data Loading Process ---")
training_logs_df = load_all_run_logs(BASE_RESULTS_PATH)
final_metrics_df = load_final_metrics(BASE_RESULTS_PATH)
print("\n--- Data Loading Complete ---")


# --- 3. Preprocessing and Initial Analysis ---
if not training_logs_df.empty:
    print("\n--- Training Logs DataFrame (`training_logs_df`) ---")
    training_logs_df.info()
    # Add any preprocessing steps for the training logs here
    # Example: training_logs_df['accuracy'] = pd.to_numeric(training_logs_df['accuracy'], errors='coerce')
    print("\nHead of the training logs:")
    display(training_logs_df.head())


if not final_metrics_df.empty:
    print("\n\n--- Final Metrics DataFrame (`final_metrics_df`) ---")
    final_metrics_df.info()
    # Add any preprocessing steps for the final metrics here
    print("\nFinal metrics summary:")
    # Describe all numeric columns except for identifiers
    numeric_cols = final_metrics_df.select_dtypes(include='number').columns.drop(['run_id', 'seed', 'timestamp'], errors='ignore')
    display(final_metrics_df[numeric_cols].describe())
    print("\nHead of the final metrics:")
    display(final_metrics_df.head())

if training_logs_df.empty and final_metrics_df.empty:
     print("\nüõë No data was loaded. Please check your BASE_RESULTS_PATH.")
else:
     print("\n‚úÖ Data loaded successfully into `training_logs_df` and `final_metrics_df`.")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.lines import Line2D
from pathlib import Path

# This script assumes that the first cell ('notebook_cell_1.py') has been run,
# and the `final_metrics_df` DataFrame is available in the notebook's memory.

# --- 1. Configuration for Visualization ---
# TODO: Adjust these parameters to match your experiment's specifics.

# Define which adversary rates you want to see on the x-axis of the plot.
ADV_RATES_TO_PLOT = [0.2, 0.3, 0.4]

# The directory where you want to save the final plot.
FIGURE_SAVE_DIR = Path("./analysis_plots")
FIGURE_SAVE_DIR.mkdir(exist_ok=True) # Ensure the directory exists

# The aggregation method you are analyzing (for file naming).
AGG_METHOD_TO_ANALYZE = "FedAvg"

# --- 2. Helper Function to Save Figures ---
def save_figure_as_pdf(fig, output_directory: Path, base_filename: str, **details):
    """
    Saves a matplotlib figure to a PDF with a descriptive filename.
    """
    filename_parts = [base_filename]
    for key, value in details.items():
        # Sanitize value for filename
        clean_value = str(value).replace('.', '_')
        filename_parts.append(f"{key}_{clean_value}")
    
    final_filename = f"{'-'.join(filename_parts)}.pdf"
    save_path = output_directory / final_filename
    
    try:
        fig.savefig(save_path, bbox_inches='tight', format='pdf', dpi=300)
        print(f"‚úÖ Figure saved successfully to: {save_path}")
    except Exception as e:
        print(f"‚ö†Ô∏è Could not save figure: {e}")


# --- 3. Data Preprocessing for Plotting ---
# This section prepares the `final_metrics_df` for the specific plot you need.

# Create a copy to avoid modifying the original DataFrame
summary_df = final_metrics_df.copy()

# TODO: **IMPORTANT** - You need to create the 'ATTACK_METHOD' and 'ADV_RATE' columns
# based on the parameters loaded from your 'config_snapshot.json'.
# The example below assumes you have 'param_adversary_seller_config' in your config.
# Please adapt this logic to match your actual config structure.
if 'param_adversary_seller_config' in summary_df.columns:
    # Example: Create ATTACK_METHOD column
    summary_df['ATTACK_METHOD'] = np.where(
        summary_df['param_adversary_seller_config'].str.contains("'is_adversary': True"), 
        'Backdoor', 
        'No Attack'
    )
    # Example: Create ADV_RATE column (assuming it's a parameter)
    # This is a placeholder, you might need to extract it from a nested JSON string or another param
    if 'param_num_adversaries' in summary_df.columns and 'param_num_sellers' in summary_df.columns:
         summary_df['ADV_RATE'] = pd.to_numeric(summary_df['param_num_adversaries']) / pd.to_numeric(summary_df['param_num_sellers'])
    else:
        # Fallback if specific params aren't present
        summary_df['ADV_RATE'] = 0.0 
else:
    print("‚ö†Ô∏è  Warning: Could not determine ATTACK_METHOD or ADV_RATE from config params. Using dummy values.")
    summary_df['ATTACK_METHOD'] = 'Backdoor' # Fallback
    summary_df['ADV_RATE'] = 0.0 # Fallback

# TODO: Rename your metric columns to match the plotting script's expectations.
# For example, if your final accuracy is 'test_accuracy', rename it to 'FINAL_MAIN_ACC'.
summary_df = summary_df.rename(columns={
    'test_accuracy': 'FINAL_MAIN_ACC',
    'attack_success_rate': 'FINAL_ASR' # Assuming this is your ASR column name
})

# --- 4. Main Plotting Logic ---
if 'FINAL_MAIN_ACC' in summary_df.columns and 'FINAL_ASR' in summary_df.columns:
    # --- Data Preparation ---
    no_attack_perf = summary_df[summary_df['ATTACK_METHOD'] == 'No Attack'].copy()
    
    if not no_attack_perf.empty:
        no_attack_avg_acc = no_attack_perf['FINAL_MAIN_ACC'].mean()
        no_attack_avg_asr = 0.1  # ASR for No Attack is conceptually 0, plotted at 0.1 for visibility
    else:
        no_attack_avg_acc = np.nan
        no_attack_avg_asr = np.nan

    backdoor_attack_filtered = summary_df[
        (summary_df['ATTACK_METHOD'] == 'Backdoor') &
        (summary_df['ADV_RATE'].isin(ADV_RATES_TO_PLOT))
    ].copy()

    if not backdoor_attack_filtered.empty:
        # Average results for each adversary rate across different seeds
        backdoor_attack_avg_perf = backdoor_attack_filtered.groupby('ADV_RATE', as_index=False)[
            ['FINAL_MAIN_ACC', 'FINAL_ASR']
        ].mean().sort_values(by='ADV_RATE')
        adv_rates_with_data = sorted(backdoor_attack_avg_perf['ADV_RATE'].unique())
    else:
        backdoor_attack_avg_perf = pd.DataFrame(columns=['ADV_RATE', 'FINAL_MAIN_ACC', 'FINAL_ASR'])
        adv_rates_with_data = []
        print(f"Warning: No 'Backdoor' data found for ADV_RATEs {ADV_RATES_TO_PLOT}.")

    # --- Plotting ---
    fig, ax1 = plt.subplots(figsize=(7, 5)) # Standard figure size

    main_acc_color = 'mediumblue'
    asr_color = 'crimson'
    no_attack_color = 'dimgray'
    
    legend_elements = []
    x_min, x_max = (min(adv_rates_with_data), max(adv_rates_with_data)) if adv_rates_with_data else (min(ADV_RATES_TO_PLOT), max(ADV_RATES_TO_PLOT))

    # Plot "No Attack" Main Accuracy
    if pd.notna(no_attack_avg_acc):
        ax1.hlines(y=no_attack_avg_acc, xmin=x_min, xmax=x_max, color=no_attack_color, linestyle='--', linewidth=2)
        legend_elements.append(Line2D([0], [0], color=no_attack_color, linestyle='--', lw=2, label='Main Acc. (No Attack)'))

    # Plot "Backdoor" Main Accuracy
    if not backdoor_attack_avg_perf.empty:
        ax1.plot(backdoor_attack_avg_perf['ADV_RATE'], backdoor_attack_avg_perf['FINAL_MAIN_ACC'],
                 color=main_acc_color, linestyle='-', marker='o', linewidth=2, markersize=7)
        legend_elements.append(Line2D([0], [0], color=main_acc_color, linestyle='-', marker='o', lw=2, label='Main Acc. (Backdoor)'))

    ax1.set_xlabel('Adversary Rate', fontsize=12)
    ax1.set_ylabel('Main Task Accuracy', color=main_acc_color, fontsize=12)
    ax1.tick_params(axis='y', labelcolor=main_acc_color, labelsize=10)
    ax1.set_ylim(0, 1.05)
    ax1.grid(True, which='major', linestyle=':', linewidth=0.7)

    # Second y-axis for ASR
    ax2 = ax1.twinx()

    # Plot "Backdoor" ASR
    if not backdoor_attack_avg_perf.empty:
        ax2.plot(backdoor_attack_avg_perf['ADV_RATE'], backdoor_attack_avg_perf['FINAL_ASR'],
                 color=asr_color, linestyle=':', marker='s', linewidth=2, markersize=7)
        legend_elements.append(
            Line2D([0], [0], color=asr_color, linestyle=':', marker='s', lw=2, label='ASR (Backdoor)'))

    ax2.set_ylabel('Attack Success Rate (ASR)', color=asr_color, fontsize=12)
    ax2.tick_params(axis='y', labelcolor=asr_color, labelsize=10)
    ax2.set_ylim(0, 1.05)

    # Configure x-axis ticks
    ax1.set_xticks(ticks=ADV_RATES_TO_PLOT)
    ax1.set_xticklabels(labels=[f'{r:.1f}' for r in ADV_RATES_TO_PLOT], fontsize=10)
    
    # Combined Legend
    if legend_elements:
        fig.legend(handles=legend_elements, loc='lower center', bbox_to_anchor=(0.5, -0.05), ncol=3, frameon=False, fontsize=10)

    plt.title(f'{AGG_METHOD_TO_ANALYZE}: Accuracy vs. ASR', fontsize=14, pad=20)
    plt.tight_layout(rect=[0, 0.05, 1, 0.95])
    
    # Save and show the figure
    save_figure_as_pdf(fig=fig, output_directory=FIGURE_SAVE_DIR, base_filename="accuracy_vs_asr", aggregation=AGG_METHOD_TO_ANALYZE)
    plt.show()
    
else:
    print("‚ùå Plotting skipped. Could not find 'FINAL_MAIN_ACC' and 'FINAL_ASR' columns in the DataFrame.")
    print("   Please check the column renaming step in the preprocessing section.")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.lines import Line2D
from pathlib import Path
import matplotlib as mpl

# This script assumes that 'final_metrics_df' is available from the first cell.

# --- 1. Configuration & Style ---
FIGURE_SAVE_DIR = Path("./analysis_plots")
FIGURE_SAVE_DIR.mkdir(exist_ok=True)

# Global font and style adjustments for the plot
mpl.rcParams.update({
    'font.size': 16, 'axes.labelsize': 16, 'axes.titlesize': 18,
    'xtick.labelsize': 14, 'ytick.labelsize': 14, 'legend.fontsize': 12,
    'legend.title_fontsize': 13, 'pdf.fonttype': 42, 'ps.fonttype': 42,
})

# --- 2. Helper Function to Save Figures (for consistency) ---
def save_figure_as_pdf(fig, output_directory: Path, base_filename: str, **details):
    """Saves a matplotlib figure to a PDF with a descriptive filename."""
    filename_parts = [base_filename]
    for key, value in details.items():
        clean_value = str(value).replace('.', '_')
        filename_parts.append(f"{key}_{clean_value}")
    final_filename = f"{'-'.join(filename_parts)}.pdf"
    save_path = output_directory / final_filename
    try:
        fig.savefig(save_path, bbox_inches='tight', format='pdf', dpi=300)
        print(f"‚úÖ Figure saved successfully to: {save_path}")
    except Exception as e:
        print(f"‚ö†Ô∏è Could not save figure: {e}")

# --- 3. Data Preprocessing for Selection Rate Plot ---
# This section prepares `final_metrics_df` for this specific plot.
summary_df = final_metrics_df.copy()

# TODO: **IMPORTANT** - Create 'ATTACK_METHOD', 'ADV_RATE', and 'IS_SYBIL' columns
# from your loaded configuration parameters. Adapt this logic to your config structure.
if 'param_adversary_seller_config' in summary_df.columns:
    summary_df['ATTACK_METHOD'] = np.where(
        summary_df['param_adversary_seller_config'].str.contains("'is_adversary': True"), 'Backdoor', 'No Attack')
    # Example for IS_SYBIL, assuming it's in the sybil config dict
    summary_df['IS_SYBIL'] = np.where(
        summary_df['param_adversary_seller_config'].str.contains("'sybil_strategy': 'mimic'"), 'mimic', 'False')
    if 'param_num_adversaries' in summary_df.columns and 'param_num_sellers' in summary_df.columns:
         summary_df['ADV_RATE'] = pd.to_numeric(summary_df['param_num_adversaries']) / pd.to_numeric(summary_df['param_num_sellers'])
    else: summary_df['ADV_RATE'] = 0.0
else:
    print("‚ö†Ô∏è Warning: Could not determine ATTACK_METHOD/ADV_RATE from config. Using dummy values.")
    summary_df['ATTACK_METHOD'] = 'Backdoor'; summary_df['ADV_RATE'] = 0.0; summary_df['IS_SYBIL'] = 'False'

# --- 4. Main Plotting Logic for Selection Rate ---
# Define required columns from your final_metrics.json
baseline_cols_map = {
    0.1: 'no_attack_designated_malicious_selection_rate_0.1', 0.2: 'no_attack_designated_malicious_selection_rate_0.2',
    0.3: 'no_attack_designated_malicious_selection_rate_0.3', 0.4: 'no_attack_designated_malicious_selection_rate_0.4'
}
actual_mal_select_col = 'avg_adversary_selection_rate'

# Check if necessary columns exist
required_cols = list(baseline_cols_map.values()) + [actual_mal_select_col]
if not any(col in summary_df.columns for col in required_cols):
    print(f"‚ùå Plotting skipped. None of the required selection rate columns found in the DataFrame.")
else:
    # Prepare "No Attack" Baseline Data
    no_attack_runs = summary_df[summary_df['ATTACK_METHOD'] == 'No Attack'].copy()
    baseline_points = {}
    if not no_attack_runs.empty:
        for adv_rate, col_name in baseline_cols_map.items():
            if col_name in no_attack_runs.columns:
                mean_rate = no_attack_runs[col_name].mean()
                if pd.notna(mean_rate): baseline_points[adv_rate] = mean_rate
    
    baseline_series = pd.Series(baseline_points).sort_index()

    # Prepare "Backdoor Attack" Data
    adv_rates_to_plot = baseline_series.index.tolist() if not baseline_series.empty else [0.2, 0.3, 0.4]
    backdoor_data = summary_df[(summary_df['ATTACK_METHOD'] == 'Backdoor') & (summary_df['ADV_RATE'].isin(adv_rates_to_plot))].copy()
    
    actual_mal_select_avg = pd.DataFrame()
    if not backdoor_data.empty and actual_mal_select_col in backdoor_data.columns:
        actual_mal_select_avg = backdoor_data.groupby(['ADV_RATE', 'IS_SYBIL'], as_index=False)[actual_mal_select_col].mean()
    
    # --- Plotting ---
    if not actual_mal_select_avg.empty or not baseline_series.empty:
        fig, ax = plt.subplots(figsize=(8, 5.5))
        
        # Plot Control Group (Designated Adv Group with No Attack)
        if not baseline_series.empty:
            ax.plot(baseline_series.index, baseline_series.values, color='dimgray', linestyle=(0, (5, 2)), marker='D', label='Control Group (No Attack)')

        # Plot Attacker Group (Standard Backdoor)
        std_backdoor_data = actual_mal_select_avg[actual_mal_select_avg['IS_SYBIL'] == 'False']
        if not std_backdoor_data.empty:
            ax.plot(std_backdoor_data['ADV_RATE'], std_backdoor_data[actual_mal_select_col], color='orangered', linestyle='-', marker='o', label='Attacker Group (Standard)')

        # Plot Attacker Group (Sybil/Mimicry Backdoor)
        mimic_backdoor_data = actual_mal_select_avg[actual_mal_select_avg['IS_SYBIL'] == 'mimic']
        if not mimic_backdoor_data.empty:
            ax.plot(mimic_backdoor_data['ADV_RATE'], mimic_backdoor_data[actual_mal_select_col], color='darkviolet', linestyle='--', marker='s', label='Attacker Group (Sybil/Mimic)')

        ax.set_xlabel('Adversary Rate')
        ax.set_ylabel('Selection Rate')
        ax.set_xticks(ticks=adv_rates_to_plot)
        ax.set_xticklabels(labels=[f'{r:.1f}' for r in adv_rates_to_plot])
        ax.grid(True, which='major', linestyle=':', linewidth=0.6)
        ax.legend(title='Group & Behavior', frameon=True, edgecolor='lightgray')
        ax.set_ylim(bottom=0)

        plt.tight_layout()
        
        # Extract aggregation method for filename, if available
        agg_method = summary_df['param_aggregation_name'].iloc[0] if 'param_aggregation_name' in summary_df.columns else "Unknown"
        save_figure_as_pdf(fig=fig, output_directory=FIGURE_SAVE_DIR, base_filename="selection_rate", aggregation=agg_method)
        plt.show()
    else:
        print("‚ÑπÔ∏è Not enough data to plot Malicious Selection Rates after filtering.")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from pathlib import Path
import matplotlib as mpl

# This script assumes 'final_metrics_df' is available from the first cell.

# --- 1. Configuration & Style ---
FIGURE_SAVE_DIR = Path("./analysis_plots")
FIGURE_SAVE_DIR.mkdir(exist_ok=True)

# Global font and style adjustments
mpl.rcParams.update({
    'font.size': 16, 'axes.labelsize': 16, 'axes.titlesize': 18,
    'xtick.labelsize': 14, 'ytick.labelsize': 14, 'legend.fontsize': 12,
    'legend.title_fontsize': 13, 'pdf.fonttype': 42, 'ps.fonttype': 42,
})

# --- 2. Helper Function to Save Figures ---
def save_figure_as_pdf(fig, output_directory: Path, base_filename: str, **details):
    """Saves a matplotlib figure to a PDF with a descriptive filename."""
    filename_parts = [base_filename]
    for key, value in details.items():
        clean_value = str(value).replace('.', '_')
        filename_parts.append(f"{key}_{clean_value}")
    final_filename = f"{'-'.join(filename_parts)}.pdf"
    save_path = output_directory / final_filename
    try:
        fig.savefig(save_path, bbox_inches='tight', format='pdf', dpi=300)
        print(f"‚úÖ Figure saved successfully to: {save_path}")
    except Exception as e:
        print(f"‚ö†Ô∏è Could not save figure: {e}")

# --- 3. Data Preprocessing for Milestone Plot ---
summary_df = final_metrics_df.copy()

# Create 'ATTACK_METHOD', 'ADV_RATE', and 'IS_SYBIL' columns from config params
if 'param_adversary_seller_config' in summary_df.columns:
    summary_df['ATTACK_METHOD'] = np.where(
        summary_df['param_adversary_seller_config'].str.contains("'is_adversary': True"), 'Backdoor', 'No Attack')
    summary_df['IS_SYBIL'] = np.where(
        summary_df['param_adversary_seller_config'].str.contains("'sybil_strategy': 'mimic'"), 'mimic', 'False')
    if 'param_num_adversaries' in summary_df.columns and 'param_num_sellers' in summary_df.columns:
         summary_df['ADV_RATE'] = pd.to_numeric(summary_df['param_num_adversaries']) / pd.to_numeric(summary_df['param_num_sellers'])
    else: summary_df['ADV_RATE'] = 0.0
else:
    print("‚ö†Ô∏è Warning: Could not determine ATTACK_METHOD/ADV_RATE from config. Using dummy values.")
    summary_df['ATTACK_METHOD'] = 'Backdoor'; summary_df['ADV_RATE'] = 0.0; summary_df['IS_SYBIL'] = 'False'

# --- 4. Main Plotting Logic for Milestones ---

# --- Configuration for this plot ---
# 1. CHOOSE THE FIXED ADVERSARY RATE for comparing attack scenarios
FIXED_ADV_RATE_FOR_COMPARISON = 0.3

# 2. CHOOSE THE ACCURACY MILESTONES to display on the X-axis
# These must match the labels from your COST_TO_XXACC columns (e.g., "70", "80")
milestone_acc_labels = ["70", "80", "85"]
milestone_cols = [f'cost_to_{label}_percent_accuracy' for label in milestone_acc_labels]

# Verify all chosen milestone columns exist
missing_cols = [col for col in milestone_cols if col not in summary_df.columns]
if missing_cols:
    print(f"‚ùå Plotting skipped. Milestone columns missing: {', '.join(missing_cols)}.")
else:
    # --- Prepare Data for Bar Chart ---
    data_for_bars = []
    
    # 1. No Attack Data
    no_attack_data = summary_df[summary_df['ATTACK_METHOD'] == 'No Attack'].copy()
    if not no_attack_data.empty:
        for col, label in zip(milestone_cols, milestone_acc_labels):
            avg_cost = no_attack_data[col].mean()
            if pd.notna(avg_cost):
                data_for_bars.append({'Condition': 'No Attack', 'Milestone': f'{label}% Acc', 'Cost': avg_cost})

    # 2. Backdoor Attack (Standard)
    std_backdoor_data = summary_df[
        (summary_df['IS_SYBIL'] == 'False') &
        (np.isclose(summary_df['ADV_RATE'], FIXED_ADV_RATE_FOR_COMPARISON))
    ].copy()
    if not std_backdoor_data.empty:
        for col, label in zip(milestone_cols, milestone_acc_labels):
            avg_cost = std_backdoor_data[col].mean()
            if pd.notna(avg_cost):
                data_for_bars.append({'Condition': 'Backdoor (Standard)', 'Milestone': f'{label}% Acc', 'Cost': avg_cost})

    # 3. Backdoor Attack (Sybil/Mimicry)
    mimic_backdoor_data = summary_df[
        (summary_df['IS_SYBIL'] == 'mimic') &
        (np.isclose(summary_df['ADV_RATE'], FIXED_ADV_RATE_FOR_COMPARISON))
    ].copy()
    if not mimic_backdoor_data.empty:
        for col, label in zip(milestone_cols, milestone_acc_labels):
            avg_cost = mimic_backdoor_data[col].mean()
            if pd.notna(avg_cost):
                data_for_bars.append({'Condition': 'Backdoor (Sybil)', 'Milestone': f'{label}% Acc', 'Cost': avg_cost})
    
    df_for_bars = pd.DataFrame(data_for_bars)

    # --- Plotting Grouped Bar Chart ---
    if not df_for_bars.empty:
        plt.figure(figsize=(10, 6))

        palette = {'No Attack': 'dimgray', 'Backdoor (Standard)': 'orangered', 'Backdoor (Sybil)': 'darkviolet'}
        milestone_order = [f"{label}% Acc" for label in milestone_acc_labels]
        hue_order = ['No Attack', 'Backdoor (Standard)', 'Backdoor (Sybil)']

        sns.barplot(data=df_for_bars, x='Milestone', y='Cost', hue='Condition', 
                    order=milestone_order, hue_order=hue_order, palette=palette)

        plt.xlabel('Target Accuracy Milestone')
        plt.ylabel('Cost (e.g., Rounds) to Converge')
        plt.grid(True, which='major', linestyle=':', linewidth=0.7, axis='y')
        plt.gca().set_axisbelow(True)
        plt.legend(title='Attack Scenario', loc='upper left')

        agg_method = summary_df['param_aggregation_name'].iloc[0] if 'param_aggregation_name' in summary_df.columns else "Unknown"
        plt.title(f'{agg_method}: Cost to Reach Accuracy Milestones\n(Attacks at {FIXED_ADV_RATE_FOR_COMPARISON*100:.0f}% Adversary Rate)')
        
        plt.tight_layout()
        
        fig = plt.gcf()
        save_figure_as_pdf(fig=fig, output_directory=FIGURE_SAVE_DIR, base_filename="milestone_cost", aggregation=agg_method)
        plt.show()
    else:
        print("‚ÑπÔ∏è Not enough data to create the milestone cost bar chart after filtering.")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from pathlib import Path
import matplotlib as mpl

# This script assumes 'final_metrics_df' is available from the first cell.

# --- 1. Configuration & Style ---
FIGURE_SAVE_DIR = Path("./analysis_plots")
FIGURE_SAVE_DIR.mkdir(exist_ok=True)

# Global font and style adjustments
mpl.rcParams.update({
    'font.size': 16, 'axes.labelsize': 16, 'axes.titlesize': 18,
    'xtick.labelsize': 14, 'ytick.labelsize': 14, 'legend.fontsize': 12,
    'legend.title_fontsize': 13, 'pdf.fonttype': 42, 'ps.fonttype': 42,
})

# --- 2. Helper Function to Save Figures ---
def save_figure_as_pdf(fig, output_directory: Path, base_filename: str, **details):
    """Saves a matplotlib figure to a PDF with a descriptive filename."""
    filename_parts = [base_filename]
    for key, value in details.items():
        clean_value = str(value).replace('.', '_')
        filename_parts.append(f"{key}_{clean_value}")
    final_filename = f"{'-'.join(filename_parts)}.pdf"
    save_path = output_directory / final_filename
    try:
        fig.savefig(save_path, bbox_inches='tight', format='pdf', dpi=300)
        print(f"‚úÖ Figure saved successfully to: {save_path}")
    except Exception as e:
        print(f"‚ö†Ô∏è Could not save figure: {e}")

# --- 3. Data Preprocessing for Milestone Plot ---
summary_df = final_metrics_df.copy()

# Create 'ATTACK_METHOD', 'ADV_RATE', and 'IS_SYBIL' columns from config params
if 'param_adversary_seller_config' in summary_df.columns:
    summary_df['ATTACK_METHOD'] = np.where(
        summary_df['param_adversary_seller_config'].str.contains("'is_adversary': True"), 'Backdoor', 'No Attack')
    summary_df['IS_SYBIL'] = np.where(
        summary_df['param_adversary_seller_config'].str.contains("'sybil_strategy': 'mimic'"), 'mimic', 'False')
    if 'param_num_adversaries' in summary_df.columns and 'param_num_sellers' in summary_df.columns:
         summary_df['ADV_RATE'] = pd.to_numeric(summary_df['param_num_adversaries']) / pd.to_numeric(summary_df['param_num_sellers'])
    else: summary_df['ADV_RATE'] = 0.0
else:
    print("‚ö†Ô∏è Warning: Could not determine ATTACK_METHOD/ADV_RATE from config. Using dummy values.")
    summary_df['ATTACK_METHOD'] = 'Backdoor'; summary_df['ADV_RATE'] = 0.0; summary_df['IS_SYBIL'] = 'False'

# --- 4. Main Plotting Logic for Milestones ---

# --- Configuration for this plot ---
FIXED_ADV_RATE_FOR_COMPARISON = 0.3
milestone_acc_labels = ["70", "80", "85"]

# --- Auto-detect column names for convergence cost/rounds ---
# It will check for 'cost_to_...' first, then 'ROUNDS_TO_...'.
milestone_cols = []
if f'cost_to_{milestone_acc_labels[0]}_percent_accuracy' in summary_df.columns:
    milestone_cols = [f'cost_to_{label}_percent_accuracy' for label in milestone_acc_labels]
    y_axis_label = 'Cost (e.g., Rounds) to Converge'
    y_axis_col = 'Cost'
    print("‚ÑπÔ∏è Found milestone columns with 'cost_to_...' naming convention.")
elif f'ROUNDS_TO_{milestone_acc_labels[0]}ACC' in summary_df.columns:
    milestone_cols = [f'ROUNDS_TO_{label}ACC' for label in milestone_acc_labels]
    y_axis_label = 'Rounds to Converge'
    y_axis_col = 'Rounds to Converge'
    print("‚ÑπÔ∏è Found milestone columns with 'ROUNDS_TO_...' naming convention.")

# Verify all chosen milestone columns exist
missing_cols = [col for col in milestone_cols if col not in summary_df.columns]
if not milestone_cols or missing_cols:
    print(f"‚ùå Plotting skipped. Could not find a consistent set of milestone columns (e.g., 'cost_to_70_percent_accuracy' or 'ROUNDS_TO_70ACC').")
    if missing_cols:
        print(f"   Missing: {', '.join(missing_cols)}")
else:
    # --- Prepare Data for Bar Chart ---
    data_for_bars = []
    
    # 1. No Attack Data
    no_attack_data = summary_df[summary_df['ATTACK_METHOD'] == 'No Attack'].copy()
    if not no_attack_data.empty:
        for col, label in zip(milestone_cols, milestone_acc_labels):
            avg_cost = no_attack_data[col].mean()
            if pd.notna(avg_cost):
                data_for_bars.append({'Condition': 'No Attack', 'Milestone': f'{label}% Acc', y_axis_col: avg_cost})

    # 2. Backdoor Attack (Standard)
    std_backdoor_data = summary_df[
        (summary_df['ATTACK_METHOD'] == 'Backdoor') &
        (summary_df['IS_SYBIL'] == 'False') &
        (np.isclose(summary_df['ADV_RATE'], FIXED_ADV_RATE_FOR_COMPARISON))
    ].copy()
    if not std_backdoor_data.empty:
        for col, label in zip(milestone_cols, milestone_acc_labels):
            avg_cost = std_backdoor_data[col].mean()
            if pd.notna(avg_cost):
                data_for_bars.append({'Condition': 'Backdoor (Standard)', 'Milestone': f'{label}% Acc', y_axis_col: avg_cost})

    # 3. Backdoor Attack (Sybil/Mimicry)
    mimic_backdoor_data = summary_df[
        (summary_df['ATTACK_METHOD'] == 'Backdoor') &
        (summary_df['IS_SYBIL'] == 'mimic') &
        (np.isclose(summary_df['ADV_RATE'], FIXED_ADV_RATE_FOR_COMPARISON))
    ].copy()
    if not mimic_backdoor_data.empty:
        for col, label in zip(milestone_cols, milestone_acc_labels):
            avg_cost = mimic_backdoor_data[col].mean()
            if pd.notna(avg_cost):
                data_for_bars.append({'Condition': 'Backdoor (Sybil)', 'Milestone': f'{label}% Acc', y_axis_col: avg_cost})
    
    df_for_bars = pd.DataFrame(data_for_bars)

    # --- Plotting Grouped Bar Chart ---
    if not df_for_bars.empty:
        plt.figure(figsize=(10, 6))

        palette = {'No Attack': 'dimgray', 'Backdoor (Standard)': 'orangered', 'Backdoor (Sybil)': 'darkviolet'}
        milestone_order = [f"{label}% Acc" for label in milestone_acc_labels]
        hue_order = ['No Attack', 'Backdoor (Standard)', 'Backdoor (Sybil)']

        sns.barplot(data=df_for_bars, x='Milestone', y=y_axis_col, hue='Condition', 
                    order=milestone_order, hue_order=hue_order, palette=palette)

        plt.xlabel('Target Accuracy Milestone')
        plt.ylabel(y_axis_label)
        plt.grid(True, which='major', linestyle=':', linewidth=0.7, axis='y')
        plt.gca().set_axisbelow(True)
        plt.legend(title='Attack Scenario', loc='upper left')

        agg_method = summary_df['param_aggregation_name'].iloc[0] if 'param_aggregation_name' in summary_df.columns else "Unknown"
        plt.title(f'{agg_method}: Convergence Cost to Accuracy Milestones\n(Attacks at {FIXED_ADV_RATE_FOR_COMPARISON*100:.0f}% Adversary Rate)')
        
        plt.tight_layout()
        
        fig = plt.gcf()
        save_figure_as_pdf(fig=fig, output_directory=FIGURE_SAVE_DIR, base_filename="milestone_cost", aggregation=agg_method)
        plt.show()
    else:
        print("‚ÑπÔ∏è Not enough data to create the milestone cost bar chart after filtering.")



In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
from pathlib import Path

# This script assumes 'final_metrics_df' is available from the first cell.

# --- 1. Configuration & Style ---
FIGURE_SAVE_DIR = Path("./analysis_plots")
FIGURE_SAVE_DIR.mkdir(exist_ok=True)

# Global font and style adjustments
mpl.rcParams.update({
    'font.size': 16, 'axes.labelsize': 16, 'axes.titlesize': 18,
    'xtick.labelsize': 14, 'ytick.labelsize': 14, 'legend.fontsize': 12,
    'legend.title_fontsize': 13, 'pdf.fonttype': 42, 'ps.fonttype': 42,
})

# --- 2. Helper Function to Save Figures ---
def save_figure_as_pdf(fig, output_directory: Path, base_filename: str, **details):
    """Saves a matplotlib figure to a PDF with a descriptive filename."""
    filename_parts = [base_filename]
    for key, value in details.items():
        clean_value = str(value).replace('.', '_')
        filename_parts.append(f"{key}_{clean_value}")
    final_filename = f"{'-'.join(filename_parts)}.pdf"
    save_path = output_directory / final_filename
    try:
        fig.savefig(save_path, bbox_inches='tight', format='pdf', dpi=300)
        print(f"‚úÖ Figure saved successfully to: {save_path}")
    except Exception as e:
        print(f"‚ö†Ô∏è Could not save figure: {e}")

# --- 3. Data Preprocessing & Filtering ---
summary_df = final_metrics_df.copy()

# --- FILTERING CONFIGURATION ---
TARGET_DATASET = 'CIFAR-10'  # or None
TARGET_MODEL = None      # or None
TARGET_AGG_METHOD = None # or None
REPRESENTATIVE_ADV_RATE_FOR_ATTACK = 0.3 # Adv rate for attack scenarios
# ---

# Apply filters
print("--- Applying Filters for Cost Composition Plot ---")
if TARGET_DATASET and 'param_dataset_name' in summary_df.columns:
    summary_df = summary_df[summary_df['param_dataset_name'] == TARGET_DATASET]
    print(f"‚úÖ Filtered for Dataset: {TARGET_DATASET}")
if TARGET_MODEL and 'param_model_structure' in summary_df.columns:
    summary_df = summary_df[summary_df['param_model_structure'] == TARGET_MODEL]
    print(f"‚úÖ Filtered for Model: {TARGET_MODEL}")
if TARGET_AGG_METHOD and 'param_aggregation_name' in summary_df.columns:
    summary_df = summary_df[summary_df['param_aggregation_name'] == TARGET_AGG_METHOD]
    print(f"‚úÖ Filtered for Aggregation: {TARGET_AGG_METHOD}")

# Create helper columns
if 'param_adversary_seller_config' in summary_df.columns:
    summary_df['ATTACK_METHOD'] = np.where(
        summary_df['param_adversary_seller_config'].str.contains("'is_adversary': True"), 'Backdoor', 'No Attack')
    summary_df['IS_SYBIL'] = np.where(
        summary_df['param_adversary_seller_config'].str.contains("'sybil_strategy': 'mimic'"), 'mimic', 'False')
    if 'param_num_adversaries' in summary_df.columns and 'param_num_sellers' in summary_df.columns:
         summary_df['ADV_RATE'] = pd.to_numeric(summary_df['param_num_adversaries']) / pd.to_numeric(summary_df['param_num_sellers'])
    else: summary_df['ADV_RATE'] = 0.0
else:
    print("‚ö†Ô∏è Warning: Could not determine ATTACK_METHOD/ADV_RATE. Using dummy values.")
    summary_df['ATTACK_METHOD'] = 'Backdoor'; summary_df['ADV_RATE'] = 0.0; summary_df['IS_SYBIL'] = 'False'


# --- 4. Main Plotting Logic for Cost Composition ---
cost_benign_col = 'avg_cost_per_round_benign'
cost_mal_col = 'avg_cost_per_round_malicious'

if not (cost_benign_col in summary_df.columns and cost_mal_col in summary_df.columns):
    print(f"‚ùå Plotting skipped. Cost columns ('{cost_benign_col}', '{cost_mal_col}') not found.")
elif summary_df[[cost_benign_col, cost_mal_col]].isna().all().all():
    print(f"‚ùå Plotting skipped. Cost columns are entirely NaN.")
else:
    # --- Prepare Data for the 3 Scenarios ---
    # 1. No Attack Scenario
    no_attack_data = summary_df[summary_df['ATTACK_METHOD'] == 'No Attack']
    avg_no_attack_benign = no_attack_data[cost_benign_col].mean() if not no_attack_data.empty else 0
    avg_no_attack_mal = no_attack_data[cost_mal_col].mean() if not no_attack_data.empty else 0

    # 2. Backdoor Attack (Standard)
    std_backdoor_data = summary_df[
        (summary_df['ATTACK_METHOD'] == 'Backdoor') & (summary_df['IS_SYBIL'] == 'False') &
        (np.isclose(summary_df['ADV_RATE'], REPRESENTATIVE_ADV_RATE_FOR_ATTACK))]
    avg_std_backdoor_benign = std_backdoor_data[cost_benign_col].mean() if not std_backdoor_data.empty else 0
    avg_std_backdoor_mal = std_backdoor_data[cost_mal_col].mean() if not std_backdoor_data.empty else 0

    # 3. Backdoor Attack (Sybil/Mimicry)
    mimic_backdoor_data = summary_df[
        (summary_df['ATTACK_METHOD'] == 'Backdoor') & (summary_df['IS_SYBIL'] == 'mimic') &
        (np.isclose(summary_df['ADV_RATE'], REPRESENTATIVE_ADV_RATE_FOR_ATTACK))]
    avg_mimic_backdoor_benign = mimic_backdoor_data[cost_benign_col].mean() if not mimic_backdoor_data.empty else 0
    avg_mimic_backdoor_mal = mimic_backdoor_data[cost_mal_col].mean() if not mimic_backdoor_data.empty else 0

    # --- Data for Stacked Bar Chart ---
    scenarios = ['No Attack', 'Backdoor (Std)', 'Backdoor (Sybil)']
    benign_costs = [avg_no_attack_benign, avg_std_backdoor_benign, avg_mimic_backdoor_benign]
    malicious_costs = [avg_no_attack_mal, avg_std_backdoor_mal, avg_mimic_backdoor_mal]

    # --- Plotting ---
    if any(c > 0 for c in benign_costs + malicious_costs):
        fig, ax = plt.subplots(figsize=(8, 6))
        x_pos = np.arange(len(scenarios))

        ax.bar(x_pos, benign_costs, label='Cost from Benign Sellers', color='cornflowerblue')
        ax.bar(x_pos, malicious_costs, bottom=benign_costs, label='Cost from Malicious Sellers', color='salmon')

        ax.set_xlabel('Attack Scenario')
        ax.set_ylabel('Average Cost per Round')
        ax.set_xticks(x_pos)
        ax.set_xticklabels(scenarios)
        ax.grid(True, which='major', linestyle=':', linewidth=0.7, axis='y')
        ax.set_axisbelow(True)
        ax.legend(title='Cost Component')

        agg_method = summary_df['param_aggregation_name'].iloc[0] if 'param_aggregation_name' in summary_df.columns else "Unknown"
        plt.title(f'{agg_method}: Composition of Cost per Round\n(Attacks at {REPRESENTATIVE_ADV_RATE_FOR_ATTACK*100:.0f}% Adversary Rate)')
        
        plt.tight_layout()
        save_figure_as_pdf(fig=fig, output_directory=FIGURE_SAVE_DIR, base_filename="cost_composition", aggregation=agg_method)
        plt.show()
    else:
        print("‚ÑπÔ∏è Not enough data to plot Cost Composition bar chart after filtering.")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import matplotlib as mpl
from pathlib import Path

# This script assumes 'final_metrics_df' is available from the first cell.

# --- 1. Configuration & Style ---
FIGURE_SAVE_DIR = Path("./analysis_plots")
FIGURE_SAVE_DIR.mkdir(exist_ok=True)

# Global font and style adjustments
mpl.rcParams.update({
    'font.size': 16, 'axes.labelsize': 16, 'axes.titlesize': 18,
    'xtick.labelsize': 14, 'ytick.labelsize': 14, 'legend.fontsize': 12,
    'legend.title_fontsize': 13, 'pdf.fonttype': 42, 'ps.fonttype': 42,
})

# --- 2. Helper Function to Save Figures ---
def save_figure_as_pdf(fig, output_directory: Path, base_filename: str, **details):
    """Saves a matplotlib figure to a PDF with a descriptive filename."""
    filename_parts = [base_filename]
    for key, value in details.items():
        clean_value = str(value).replace('.', '_')
        filename_parts.append(f"{key}_{clean_value}")
    final_filename = f"{'-'.join(filename_parts)}.pdf"
    save_path = output_directory / final_filename
    try:
        fig.savefig(save_path, bbox_inches='tight', format='pdf', dpi=300)
        print(f"‚úÖ Figure saved successfully to: {save_path}")
    except Exception as e:
        print(f"‚ö†Ô∏è Could not save figure: {e}")

# --- 3. Data Preprocessing & Filtering ---
summary_df = final_metrics_df.copy()

# --- FILTERING CONFIGURATION for this specific plot ---
# Select a single, specific scenario to analyze in detail.
TARGET_AGG_METHOD = 'MartFL'    # The aggregation method to focus on
ATTACK_TO_ANALYZE = 'Backdoor'  # The attack method to focus on
ADV_RATE_TO_ANALYZE = 0.3       # The adversary rate to focus on
# ---

# Apply the main filters
print("--- Applying Filters for Discovery Impact Plot ---")
if TARGET_AGG_METHOD and 'param_aggregation_name' in summary_df.columns:
    summary_df = summary_df[summary_df['param_aggregation_name'] == TARGET_AGG_METHOD]
    print(f"‚úÖ Filtered for Aggregation: {TARGET_AGG_METHOD}")

# Create helper columns
if 'param_adversary_seller_config' in summary_df.columns:
    summary_df['ATTACK_METHOD'] = np.where(
        summary_df['param_adversary_seller_config'].str.contains("'is_adversary': True"), 'Backdoor', 'No Attack')
    summary_df['IS_SYBIL'] = np.where(
        summary_df['param_adversary_seller_config'].str.contains("'sybil_strategy': 'mimic'"), 'mimic', 'False')
    if 'param_num_adversaries' in summary_df.columns and 'param_num_sellers' in summary_df.columns:
         summary_df['ADV_RATE'] = pd.to_numeric(summary_df['param_num_adversaries']) / pd.to_numeric(summary_df['param_num_sellers'])
    else: summary_df['ADV_RATE'] = 0.0
else:
    print("‚ö†Ô∏è Warning: Could not determine ATTACK_METHOD/ADV_RATE. Using dummy values.")
    summary_df['ATTACK_METHOD'] = 'Backdoor'; summary_df['ADV_RATE'] = 0.0; summary_df['IS_SYBIL'] = 'False'

# TODO: **IMPORTANT** - Create columns for the parameters specific to this plot.
# Adapt this logic based on how these are stored in your config files.
if 'param_discovery_quality' in summary_df.columns:
    summary_df['discovery_quality'] = pd.to_numeric(summary_df['param_discovery_quality'], errors='coerce')
else:
    print("‚ö†Ô∏è Warning: 'param_discovery_quality' column not found.")
    summary_df['discovery_quality'] = 1.0 # Fallback

if 'param_buyer_data_mode' in summary_df.columns:
    summary_df['buyer_data_mode'] = summary_df['param_buyer_data_mode']
else:
    print("‚ö†Ô∏è Warning: 'param_buyer_data_mode' column not found.")
    summary_df['buyer_data_mode'] = 'default' # Fallback

# Rename final accuracy column for consistency with plotting logic
if 'test_accuracy' in summary_df.columns:
    summary_df = summary_df.rename(columns={'test_accuracy': 'FINAL_MAIN_ACC'})


# --- 4. Main Plotting Logic for Discovery Impact ---
required_cols = ['FINAL_MAIN_ACC', 'discovery_quality', 'buyer_data_mode', 'IS_SYBIL']
if not all(col in summary_df.columns for col in required_cols):
    print(f"‚ùå Plotting skipped. One or more required columns are missing: {', '.join(required_cols)}")
else:
    # Apply the specific scenario filters for this plot
    focus_df = summary_df[
        (summary_df['ATTACK_METHOD'] == ATTACK_TO_ANALYZE) &
        (np.isclose(summary_df['ADV_RATE'], ADV_RATE_TO_ANALYZE))
    ].copy()

    if not focus_df.empty:
        plt.figure(figsize=(12, 7))
        sns.lineplot(
            data=focus_df,
            x='discovery_quality',
            y='FINAL_MAIN_ACC',
            hue='buyer_data_mode',
            style='IS_SYBIL',
            markers=True,
            markersize=8,
            linewidth=2.5,
            err_style="band",
            errorbar=('ci', 95)
        )
        plt.xscale('log')
        plt.title(f'Impact of Buyer & Discovery Quality on Accuracy\n(Agg: {TARGET_AGG_METHOD}, Attack: {ATTACK_TO_ANALYZE}, AdvRate: {ADV_RATE_TO_ANALYZE})')
        plt.xlabel('Discovery Quality (Log Scale)')
        plt.ylabel('Final Main Accuracy')

        # Ensure x-ticks are clear and readable
        unique_qualities = sorted(focus_df['discovery_quality'].unique())
        plt.xticks(ticks=unique_qualities, labels=[str(q) for q in unique_qualities])

        plt.grid(True, which='both', linestyle='--', linewidth=0.5)
        plt.legend(title='Buyer Mode (Hue) & Sybil (Style)', bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout(rect=[0, 0, 0.85, 1]) # Adjust rect to make space for legend
        
        save_figure_as_pdf(
            fig=plt.gcf(), 
            output_directory=FIGURE_SAVE_DIR, 
            base_filename="discovery_impact", 
            agg=TARGET_AGG_METHOD,
            attack=ATTACK_TO_ANALYZE,
            adv_rate=ADV_RATE_TO_ANALYZE
        )
        plt.show()
    else:
        print(f"‚ÑπÔ∏è No data found for the specific scenario to plot discovery impact. Adjust filters.")


# geni

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
from pathlib import Path
from matplotlib.lines import Line2D

# This script assumes 'final_metrics_df' is available from the first cell.

# --- 1. Configuration & Style ---
FIGURE_SAVE_DIR = Path("./analysis_plots")
FIGURE_SAVE_DIR.mkdir(exist_ok=True)

# Global font and style adjustments
mpl.rcParams.update({
    'font.size': 16, 'axes.labelsize': 16, 'axes.titlesize': 18,
    'xtick.labelsize': 14, 'ytick.labelsize': 14, 'legend.fontsize': 12,
    'legend.title_fontsize': 13, 'pdf.fonttype': 42, 'ps.fonttype': 42,
})

# --- 2. Helper Function to Save Figures ---
def save_figure_as_pdf(fig, output_directory: Path, base_filename: str, **details):
    """Saves a matplotlib figure to a PDF with a descriptive filename."""
    filename_parts = [base_filename]
    for key, value in details.items():
        clean_value = str(value).replace('.', '_')
        filename_parts.append(f"{key}_{clean_value}")
    final_filename = f"{'-'.join(filename_parts)}.pdf"
    save_path = output_directory / final_filename
    try:
        fig.savefig(save_path, bbox_inches='tight', format='pdf', dpi=300)
        print(f"‚úÖ Figure saved successfully to: {save_path}")
    except Exception as e:
        print(f"‚ö†Ô∏è Could not save figure: {e}")

# --- 3. Data Preprocessing & Filtering ---
summary_df = final_metrics_df.copy()

# --- FILTERING CONFIGURATION ---
TARGET_DATASET = 'CIFAR-10'  # or None
TARGET_MODEL = None      # or None
TARGET_AGG_METHOD = 'MartFL' # or None
# ---

# Apply filters
print("--- Applying Filters for Gini Coefficient Plot ---")
if TARGET_DATASET and 'param_dataset_name' in summary_df.columns:
    summary_df = summary_df[summary_df['param_dataset_name'] == TARGET_DATASET]
    print(f"‚úÖ Filtered for Dataset: {TARGET_DATASET}")
if TARGET_MODEL and 'param_model_structure' in summary_df.columns:
    summary_df = summary_df[summary_df['param_model_structure'] == TARGET_MODEL]
    print(f"‚úÖ Filtered for Model: {TARGET_MODEL}")
if TARGET_AGG_METHOD and 'param_aggregation_name' in summary_df.columns:
    summary_df = summary_df[summary_df['param_aggregation_name'] == TARGET_AGG_METHOD]
    print(f"‚úÖ Filtered for Aggregation: {TARGET_AGG_METHOD}")

# Create helper columns
if 'param_adversary_seller_config' in summary_df.columns:
    summary_df['ATTACK_METHOD'] = np.where(
        summary_df['param_adversary_seller_config'].str.contains("'is_adversary': True"), 'Backdoor', 'No Attack')
    summary_df['IS_SYBIL'] = np.where(
        summary_df['param_adversary_seller_config'].str.contains("'sybil_strategy': 'mimic'"), 'mimic', 'False')
    if 'param_num_adversaries' in summary_df.columns and 'param_num_sellers' in summary_df.columns:
         summary_df['ADV_RATE'] = pd.to_numeric(summary_df['param_num_adversaries']) / pd.to_numeric(summary_df['param_num_sellers'])
    else: summary_df['ADV_RATE'] = 0.0
else:
    print("‚ö†Ô∏è Warning: Could not determine ATTACK_METHOD/ADV_RATE. Using dummy values.")
    summary_df['ATTACK_METHOD'] = 'Backdoor'; summary_df['ADV_RATE'] = 0.0; summary_df['IS_SYBIL'] = 'False'

# --- 4. Main Plotting Logic for Benign Gini Coefficient ---
ACTUAL_BENIGN_GINI_COLUMN = 'avg_benign_payment_gini'

if ACTUAL_BENIGN_GINI_COLUMN not in summary_df.columns:
    print(f"‚ùå Plotting skipped. Gini column '{ACTUAL_BENIGN_GINI_COLUMN}' not found.")
elif summary_df[ACTUAL_BENIGN_GINI_COLUMN].isna().all():
    print(f"‚ùå Plotting skipped. Gini column '{ACTUAL_BENIGN_GINI_COLUMN}' is entirely NaN.")
else:
    plot_data = summary_df.copy()
    plot_data.dropna(subset=[ACTUAL_BENIGN_GINI_COLUMN], inplace=True)

    # 1. "No Attack" Baseline
    no_attack_data = plot_data[plot_data['ATTACK_METHOD'] == 'No Attack']
    no_attack_avg_gini = no_attack_data[ACTUAL_BENIGN_GINI_COLUMN].mean() if not no_attack_data.empty else np.nan

    # 2. "Backdoor Attack" Data
    adv_rates_to_plot = [0.2, 0.3, 0.4]
    backdoor_data = plot_data[(plot_data['ATTACK_METHOD'] == 'Backdoor') & (plot_data['ADV_RATE'].isin(adv_rates_to_plot))]
    
    backdoor_gini_avg = pd.DataFrame()
    if not backdoor_data.empty:
        backdoor_gini_avg = backdoor_data.groupby(['ADV_RATE', 'IS_SYBIL'], as_index=False)[ACTUAL_BENIGN_GINI_COLUMN].mean()

    # --- Plotting ---
    if not backdoor_gini_avg.empty or pd.notna(no_attack_avg_gini):
        fig, ax = plt.subplots(figsize=(8, 5.5))
        legend_elements = []

        # Plot "No Attack" baseline
        if pd.notna(no_attack_avg_gini):
            ax.hlines(y=no_attack_avg_gini, xmin=min(adv_rates_to_plot), xmax=max(adv_rates_to_plot),
                      color='dimgray', linestyle=(0, (5, 2)), linewidth=2)
            legend_elements.append(Line2D([0], [0], color='dimgray', linestyle=(0, (5, 2)), lw=2, label='No Attack'))

        # Plot "Backdoor (Standard)"
        std_data = backdoor_gini_avg[backdoor_gini_avg['IS_SYBIL'] == 'False']
        if not std_data.empty:
            ax.plot(std_data['ADV_RATE'], std_data[ACTUAL_BENIGN_GINI_COLUMN], color='orangered', linestyle='-', marker='o')
            legend_elements.append(Line2D([0], [0], color='orangered', linestyle='-', marker='o', label='Backdoor (Standard)'))

        # Plot "Backdoor (Sybil)"
        mimic_data = backdoor_gini_avg[backdoor_gini_avg['IS_SYBIL'] == 'mimic']
        if not mimic_data.empty:
            ax.plot(mimic_data['ADV_RATE'], mimic_data[ACTUAL_BENIGN_GINI_COLUMN], color='darkviolet', linestyle='--', marker='s')
            legend_elements.append(Line2D([0], [0], color='darkviolet', linestyle='--', marker='s', label='Backdoor (Sybil)'))
        
        ax.set_xlabel('Adversary Rate')
        ax.set_ylabel('Benign Seller Payment Gini\n(0=Equal, 1=Unequal)')
        ax.set_xticks(ticks=adv_rates_to_plot)
        ax.set_xticklabels(labels=[f'{r:.1f}' for r in adv_rates_to_plot])
        ax.grid(True, which='major', linestyle=':', linewidth=0.6)
        ax.legend(handles=legend_elements, title='Attack Condition', loc='best')
        ax.set_ylim(bottom=0, top=1) # Gini is bounded by 0 and 1

        plt.title(f'{TARGET_AGG_METHOD}: Fairness of Benign Seller Payments')
        plt.tight_layout()
        save_figure_as_pdf(fig=fig, output_directory=FIGURE_SAVE_DIR, base_filename="gini_benign_fairness", agg=TARGET_AGG_METHOD)
        plt.show()

    else:
        print("‚ÑπÔ∏è Not enough data to create the Gini Coefficient plot after filtering.")
