# Consolidate Mitigated Bias Evaluation Results

This notebook reads the individual metric CSV files generated by `mitigated_bias_evaluation.ipynb` from the `mitigated_results/` directory and consolidates them into a single CSV file (`consolidated_mitigated_results.csv`).

The output format is designed to exactly match the structure of the original `consolidated_results.csv` for easy comparison between the baseline and mitigated scenarios.

**Update:** This version ensures that the domain 'movie' parsed from filenames is saved as 'movies' in the final consolidated CSV.

In [4]:
# =============================================================================
# SECTION A: IMPORTS & SETUP
# =============================================================================
import pandas as pd
import numpy as np
import os
import glob
import re # For parsing filenames

In [5]:
# =============================================================================
# SECTION B: CONFIGURATION
# =============================================================================

# --- Input/Output ---
mitigated_results_dir = 'mitigated_results03/' # Directory containing the individual CSVs
output_file = 'consolidated_mitigated_results.csv' # Name of the final consolidated file

# --- Expected Components ---
# List used for validation after parsing filenames.
# Note: We parse 'movie' but output 'movies'. Validation list reflects the *parsed* value.
expected_domains_parsed = ['music', 'movie']
expected_strategies = ['eva_two', 'eva_three']
expected_notions = ['pop_one', 'pop_two', 'pop_four']

# --- Output Format Definition ---
# Define the exact output columns in the desired order
output_columns = [
    'domain',
    'evaluation_strategy',
    'popularity_notion',
    'algorithm',
    'metric_type',
    'user_group',
    'comparison_group',
    'value',
    'p_value'
]

print(f"Input directory: {mitigated_results_dir}")
print(f"Output file: {output_file}")

Input directory: mitigated_results03/
Output file: consolidated_mitigated_results.csv


In [6]:
# =============================================================================
# SECTION C: CONSOLIDATION LOGIC
# =============================================================================
all_results_rows = [] # List to store data dictionaries for the final DataFrame

# Find all relevant CSV files in the directory
csv_files = glob.glob(os.path.join(mitigated_results_dir, '*.csv'))
print(f"Found {len(csv_files)} CSV files to process.")

# Regular expression to parse filenames
# Example: NDCGs_music_eva_two_pop_one.csv
filename_pattern = re.compile(r"(NDCGs|NDCG_ttests|PercentDeltaGAP|GAP_ttests)_(\w+)_(\w+)_(\w+)\.csv")

for filepath in csv_files:
    filename = os.path.basename(filepath)
    match = filename_pattern.match(filename)

    if not match:
        print(f"Skipping file with unexpected name: {filename}")
        continue

    metric_base, domain_parsed, eva_key, pop_key = match.groups()

    # Validate parsed components against expected values
    if domain_parsed not in expected_domains_parsed or eva_key not in expected_strategies or pop_key not in expected_notions:
        print(f"Warning: Parsed components from {filename} seem invalid ({domain_parsed}, {eva_key}, {pop_key}). Skipping.")
        continue

    # << --- START of CHANGE: Map 'movie' to 'movies' for output --- >>
    if domain_parsed == 'movie':
        domain_output = 'movies'
    else:
        domain_output = domain_parsed
    # << --- END of CHANGE --- >>

    try:
        # Read the CSV, assuming the first column is the algorithm name index
        df = pd.read_csv(filepath, index_col=0)

        # Process based on the metric type
        if metric_base == 'NDCGs' or metric_base == 'PercentDeltaGAP':
            metric_type = 'NDCG' if metric_base == 'NDCGs' else 'GAP'
            user_groups = ['low', 'med', 'high']
            for algo in df.index:
                for group in user_groups:
                    if group in df.columns:
                        value = df.loc[algo, group]
                        row = {
                            'domain': domain_output, # Use the mapped value
                            'evaluation_strategy': eva_key,
                            'popularity_notion': pop_key,
                            'algorithm': algo,
                            'metric_type': metric_type,
                            'user_group': group,
                            'comparison_group': 'N/A',
                            'value': value if pd.notna(value) else np.nan,
                            'p_value': np.nan
                        }
                        all_results_rows.append(row)
                    else:
                         print(f"Warning: Column '{group}' not found in {filename} for algo {algo}")

        elif metric_base == 'NDCG_ttests' or metric_base == 'GAP_ttests':
            metric_type = 'NDCG_TTEST' if metric_base == 'NDCG_ttests' else 'GAP_TTEST'
            comparison_groups = ['low-med', 'low-high', 'med-high']
            for algo in df.index:
                for comp_group in comparison_groups:
                    if comp_group in df.columns:
                        p_value = df.loc[algo, comp_group]
                        row = {
                            'domain': domain_output, # Use the mapped value
                            'evaluation_strategy': eva_key,
                            'popularity_notion': pop_key,
                            'algorithm': algo,
                            'metric_type': metric_type,
                            'user_group': 'N/A',
                            'comparison_group': comp_group,
                            'value': np.nan, # T-test rows don't have a primary 'value'
                            'p_value': p_value if pd.notna(p_value) else np.nan
                        }
                        all_results_rows.append(row)
                    else:
                        print(f"Warning: Column '{comp_group}' not found in {filename} for algo {algo}")

    except Exception as e:
        print(f"Error processing file {filename}: {e}")

# =============================================================================
# SECTION D: DATAFRAME CREATION & SAVING
# =============================================================================
if not all_results_rows:
    print("\nWarning: No data rows were generated. Cannot create consolidated file.")
else:
    consolidated_df = pd.DataFrame(all_results_rows)

    # Ensure correct column order
    consolidated_df = consolidated_df[output_columns]

    # Sort for consistency (optional, but helpful)
    consolidated_df = consolidated_df.sort_values(by=[
        'domain', 'evaluation_strategy', 'popularity_notion', 'algorithm', 'metric_type', 'user_group', 'comparison_group'
    ]).reset_index(drop=True) # Reset index after sorting

    # Save the consolidated file
    try:
        consolidated_df.to_csv(output_file, index=False)
        print(f"\nSuccessfully created consolidated file: {output_file}")
        print(f"Total rows consolidated: {len(consolidated_df)}")
    except Exception as e:
        print(f"\nError saving consolidated file {output_file}: {e}")

    # Display head and tail of the consolidated data
    print("\nConsolidated Data Head:")
    print(consolidated_df.head())
    print("\nConsolidated Data Tail:")
    print(consolidated_df.tail())

Found 48 CSV files to process.

