# Analysis of Popularity Bias Reproduction Results

This notebook analyzes the consolidated results from the popularity bias reproduction study, generating plots and tables to support the findings discussed in the main report. It focuses on visualizing the impact of domain, evaluation strategy, algorithm, and user grouping method (including the novel 'NicheConsumptionRate') on %ΔGAP and NDCG@10.

## 1. Setup and Configuration

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import seaborn as sns
import numpy as np
import os
import warnings

# Ignore common warnings (optional)
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

# Display options for pandas
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)

# --- Configuration ---
CSV_FILE_PATH = 'consolidated_results.csv'
OUTPUT_DIR = 'analysis_plots_tables_v2' # Changed output dir name
SIGNIFICANCE_THRESHOLD = 0.05

# Create output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)
print(f"Output will be saved to: {os.path.abspath(OUTPUT_DIR)}")

# Seaborn style
sns.set_theme(style="whitegrid")

Output will be saved to: c:\Users\Shreyash\Desktop\Files\Work\cs516\516 Project\Plots\analysis_plots_tables_v2


## 2. Helper Functions

In [2]:
def map_conceptual_group(row):
    """Maps specific user groups from different notions to conceptual groups."""
    pop_notion = row['popularity_notion']
    user_group = row['user_group']

    if pop_notion in ['pop_one', 'pop_two']:
        if user_group == 'low':
            return 'Niche-Oriented'
        elif user_group == 'med':
            return 'Diverse'
        elif user_group == 'high':
            return 'Blockbuster-Oriented'
    elif pop_notion == 'pop_four':
        if user_group == 'high': # High niche consumption rate -> Niche-Oriented
            return 'Niche-Oriented'
        elif user_group == 'med':
            return 'Diverse'
        elif user_group == 'low': # Low niche consumption rate -> Blockbuster-Oriented
            return 'Blockbuster-Oriented'
    return 'Unknown' # Should not happen with valid data

def map_descriptive_names(df):
    """Maps codes to more descriptive names for plots."""
    strategy_map = {
        'eva_two': 'UserTest',
        'eva_three': 'TrainItems'
    }
    notion_map = {
        'pop_one': 'PopularPercentage',
        'pop_two': 'AveragePopularity',
        'pop_four': 'NicheConsumptionRate'
    }
    # Use .get with default to avoid errors if unexpected values appear
    df['eval_strategy_desc'] = df['evaluation_strategy'].apply(lambda x: strategy_map.get(x, x))
    df['pop_notion_desc'] = df['popularity_notion'].apply(lambda x: notion_map.get(x, x))
    return df

def highlight_significant(val):
    """Highlights p-values below the significance threshold for display."""
    if pd.isna(val):
        return ''
    # Use background color for better visibility in tables
    bgcolor = 'background-color: yellow' if val < SIGNIFICANCE_THRESHOLD else ''
    return bgcolor

def plot_faceted_grouped_bar(data, y_metric, title_prefix, filename_prefix):
    """Generates faceted grouped bar charts (Metric by User Group within each notion/strategy)."""
    if data.empty:
        print(f"Skipping faceted plot for {y_metric} - no data.")
        return
    try:
        # Ensure categorical types for consistent ordering
        data['algorithm'] = pd.Categorical(data['algorithm'], categories=sorted(data['algorithm'].unique()), ordered=True)
        data['user_group'] = pd.Categorical(data['user_group'], categories=['low', 'med', 'high'], ordered=True)

        g = sns.catplot(
            data=data,
            x='algorithm',
            y=y_metric,
            hue='user_group',
            col='eval_strategy_desc',
            row='pop_notion_desc',
            kind='bar',
            sharey=False, # Allow different y-axis scales
            aspect=1.5,
            height=4,
            palette='viridis',
            errorbar=None # Omit error bars for clarity, or use ('ci', 95)
            # order=sorted(data['algorithm'].unique()), # Redundant with categorical
            # hue_order=['low', 'med', 'high'] # Redundant with categorical
        )
        g.fig.suptitle(f'{title_prefix}: {y_metric} by User Group (Facets: Pop Notion / Eval Strategy)', y=1.03)
        g.set_axis_labels("Algorithm", y_metric)
        g.set_titles(row_template="{row_name}", col_template="{col_name}")
        g.tick_params(axis='x', rotation=45)
        plt.tight_layout(rect=[0, 0, 1, 1]) # Adjust layout
        filepath = os.path.join(OUTPUT_DIR, f'{filename_prefix}_{y_metric}_faceted.png')
        plt.savefig(filepath, bbox_inches='tight')
        print(f"Saved faceted bar chart to {filepath}")
        # plt.show()
        plt.close(g.fig) # Close the specific figure
    except Exception as e:
        print(f"Error generating faceted bar chart for {y_metric}: {e}")

def plot_comparative_grouped_bar(data, y_metric, title_prefix, filename_prefix):
    """Generates comparative grouped bar charts (Metric by Pop Notion for each conceptual group)."""
    conceptual_groups = ['Niche-Oriented', 'Diverse', 'Blockbuster-Oriented']
    for concept_group in conceptual_groups:
        df_filtered = data[data['conceptual_group'] == concept_group].copy()
        if df_filtered.empty:
            print(f"Skipping comparative plot for {concept_group} - no data.")
            continue
        try:
            df_filtered['algorithm'] = pd.Categorical(df_filtered['algorithm'], categories=sorted(df_filtered['algorithm'].unique()), ordered=True)
            df_filtered['pop_notion_desc'] = pd.Categorical(df_filtered['pop_notion_desc'], categories=sorted(df_filtered['pop_notion_desc'].unique()), ordered=True)

            g = sns.catplot(
                data=df_filtered,
                x='algorithm',
                y=y_metric,
                hue='pop_notion_desc', # Compare the grouping methods
                col='eval_strategy_desc',
                row='domain',
                kind='bar',
                sharey=False,
                aspect=1.5,
                height=4,
                palette='magma',
                errorbar=None
                # order=sorted(df_filtered['algorithm'].unique()), # Redundant
                # hue_order=sorted(df_filtered['pop_notion_desc'].unique()) # Redundant
            )
            g.fig.suptitle(f'{title_prefix}: {y_metric} Comparison for {concept_group} Users', y=1.03)
            g.set_axis_labels("Algorithm", y_metric)
            g.set_titles(row_template="{row_name}", col_template="{col_name}")
            g.tick_params(axis='x', rotation=45)
            plt.tight_layout(rect=[0, 0, 1, 1])
            filepath = os.path.join(OUTPUT_DIR, f'{filename_prefix}_{y_metric}_compare_{concept_group}.png')
            plt.savefig(filepath, bbox_inches='tight')
            print(f"Saved comparative bar chart to {filepath}")
            # plt.show()
            plt.close(g.fig) # Close the specific figure
        except Exception as e:
            print(f"Error generating comparative bar chart for {concept_group} / {y_metric}: {e}")


def create_summary_table(data, metric_value_col, filename):
    """Creates and saves a pivot table summarizing the metric."""
    if data.empty:
        print(f"Skipping summary table {filename} - no data.")
        return None
    try:
        pivot = pd.pivot_table(
            data,
            index='algorithm',
            columns=['domain', 'eval_strategy_desc', 'pop_notion_desc', 'user_group'],
            values=metric_value_col
        )
        print(f"\n--- Summary Table: {metric_value_col} --- created (saving to file).")
        # Use display(pivot.style...) in notebook if desired
        filepath = os.path.join(OUTPUT_DIR, filename)
        pivot.to_csv(filepath)
        print(f"Saved table to {filepath}")
        return pivot
    except Exception as e:
        print(f"Error creating pivot table for {metric_value_col}: {e}")
        return None

def create_ttest_table(data, filename):
    """Creates and saves a pivot table for T-test p-values with highlighting."""
    if data.empty:
        print(f"Skipping t-test table {filename} - no data.")
        return None
    try:
        pivot = pd.pivot_table(
            data,
            index='algorithm',
            columns=['domain', 'eval_strategy_desc', 'pop_notion_desc', 'comparison_group'],
            values='p_value'
        )
        metric_name = data['metric_type'].iloc[0].replace('_TTEST','') if not data.empty else 'Unknown Metric'
        print(f"\n--- T-Test p-value Table: {metric_name} --- created (saving to Excel).")
        # Save styled excel
        excel_filepath = os.path.join(OUTPUT_DIR, filename.replace('.csv','.xlsx'))
        # Use Styler.applymap which is deprecated but widely compatible, or Styler.map for newer pandas
        try: # Try newer Styler.map first
          pivot.style.format("{:.4f}").map(highlight_significant).to_excel(excel_filepath, engine='openpyxl')
        except AttributeError:
          # Fallback to older Styler.applymap
           pivot.style.format("{:.4f}").applymap(highlight_significant).to_excel(excel_filepath, engine='openpyxl')
        print(f"Saved styled Excel table to {excel_filepath}")
        return pivot
    except ImportError:
         print(f"Error: 'openpyxl' required for Excel export. Install it (`pip install openpyxl`) and try again.")
         # Save as CSV as fallback
         csv_filepath = os.path.join(OUTPUT_DIR, filename)
         try:
            pivot.to_csv(csv_filepath)
            print(f"Saved unstyled CSV table as fallback to {csv_filepath}")
         except Exception as e_csv:
             print(f"Error saving T-test pivot table as CSV: {e_csv}")
         return None
    except Exception as e:
        print(f"Error creating or saving T-test pivot table: {e}")
        return None

def plot_bias_accuracy_scatter(df_merged, filename_prefix):
    """Plots %ΔGAP vs NDCG@10."""
    if df_merged.empty:
        print("Skipping Bias vs Accuracy scatter plot - no merged data.")
        return
    try:
        df_merged['algorithm'] = pd.Categorical(df_merged['algorithm'], categories=sorted(df_merged['algorithm'].unique()), ordered=True)
        df_merged['conceptual_group'] = pd.Categorical(df_merged['conceptual_group'], categories=['Niche-Oriented', 'Diverse', 'Blockbuster-Oriented'], ordered=True)

        g = sns.relplot(
            data=df_merged,
            x='value_gap',
            y='value_ndcg',
            hue='algorithm',
            style='conceptual_group', # Style distinguishes conceptual groups
            col='eval_strategy_desc',
            row='pop_notion_desc',
            kind='scatter',
            facet_kws={'sharex': False, 'sharey': False}, # Axes can vary
            aspect=1.5,
            height=4,
            palette='tab10', # Use a distinct palette for algorithms
            col_order = sorted(df_merged['eval_strategy_desc'].unique()),
            row_order = sorted(df_merged['pop_notion_desc'].unique()),
            style_order = ['Niche-Oriented', 'Diverse', 'Blockbuster-Oriented'],
            s=50 # Adjust point size
        )
        g.fig.suptitle('%ΔGAP vs NDCG@10 (Hue: Algorithm, Style: Conceptual Group)', y=1.03)
        g.set_axis_labels("%ΔGAP", "NDCG@10")
        g.set_titles(row_template="{row_name}", col_template="{col_name}")
        plt.tight_layout(rect=[0, 0, 1, 1])
        filepath = os.path.join(OUTPUT_DIR, f'{filename_prefix}_bias_vs_accuracy_scatter.png')
        plt.savefig(filepath, bbox_inches='tight')
        print(f"Saved scatter plot to {filepath}")
        # plt.show()
        plt.close(g.fig)
    except Exception as e:
        print(f"Error generating scatter plot: {e}")


def plot_fairness_gap(df_diff, diff_metric_col, y_label, title_suffix, filename_prefix):
    """Plots the calculated fairness gap (difference between groups)."""
    if df_diff.empty or diff_metric_col not in df_diff.columns:
        print(f"Skipping Fairness Gap plot for {y_label} - missing data or column.")
        return
    try:
        df_diff['algorithm'] = pd.Categorical(df_diff['algorithm'], categories=sorted(df_diff['algorithm'].unique()), ordered=True)
        df_diff['pop_notion_desc'] = pd.Categorical(df_diff['pop_notion_desc'], categories=sorted(df_diff['pop_notion_desc'].unique()), ordered=True)

        g = sns.catplot(
            data=df_diff,
            x='algorithm',
            y=diff_metric_col,
            hue='pop_notion_desc', # Compare how notions measure the gap
            col='eval_strategy_desc',
            row='domain',
            kind='bar',
            sharey=True, # Keep y-axis same for direct comparison of gap magnitude
            aspect=1.5,
            height=4,
            palette='crest',
            errorbar=None
            # order=sorted(df_diff['algorithm'].unique()) # Redundant
        )
        g.fig.suptitle(f'Fairness Gap Comparison ({title_suffix})', y=1.03)
        g.set_axis_labels("Algorithm", y_label)
        g.set_titles(row_template="{row_name}", col_template="{col_name}")
        g.tick_params(axis='x', rotation=45)
        # Add zero line using map
        g.map(plt.axhline, y=0, color='grey', linestyle='--', linewidth=0.8) 
        plt.tight_layout(rect=[0, 0, 1, 1])
        filepath = os.path.join(OUTPUT_DIR, f'{filename_prefix}_fairness_gap.png')
        plt.savefig(filepath, bbox_inches='tight')
        print(f"Saved fairness gap plot to {filepath}")
        # plt.show()
        plt.close(g.fig)
    except Exception as e:
         print(f"Error generating fairness gap plot for {y_label}: {e}")

def plot_strategy_impact_delta(df_diff, diff_metric_col, y_label, title_suffix, filename_prefix):
    """Plots the delta change in metric due to evaluation strategy."""
    if df_diff.empty or diff_metric_col not in df_diff.columns:
        print(f"Skipping Strategy Impact plot for {y_label} - missing data or column.")
        return
    try:
        df_diff['algorithm'] = pd.Categorical(df_diff['algorithm'], categories=sorted(df_diff['algorithm'].unique()), ordered=True)
        df_diff['conceptual_group'] = pd.Categorical(df_diff['conceptual_group'], categories=['Niche-Oriented', 'Diverse', 'Blockbuster-Oriented'], ordered=True)

        g = sns.catplot(
            data=df_diff,
            x='algorithm',
            y=diff_metric_col,
            hue='conceptual_group', # Show impact per conceptual group
            col='pop_notion_desc',
            row='domain',
            kind='bar',
            sharey=True,
            aspect=1.5,
            height=4,
            palette='flare',
            errorbar=None
            # order=sorted(df_diff['algorithm'].unique()), # Redundant
            # hue_order=['Niche-Oriented', 'Diverse', 'Blockbuster-Oriented'] # Redundant
        )
        g.fig.suptitle(f'Evaluation Strategy Impact ({title_suffix}: TrainItems - UserTest)', y=1.03)
        g.set_axis_labels("Algorithm", y_label)
        g.set_titles(row_template="{row_name}", col_template="{col_name}")
        g.tick_params(axis='x', rotation=45)
        g.map(plt.axhline, y=0, color='grey', linestyle='--', linewidth=0.8)
        plt.tight_layout(rect=[0, 0, 1, 1])
        filepath = os.path.join(OUTPUT_DIR, f'{filename_prefix}_strategy_impact.png')
        plt.savefig(filepath, bbox_inches='tight')
        print(f"Saved strategy impact plot to {filepath}")
        # plt.show()
        plt.close(g.fig)
    except Exception as e:
         print(f"Error generating strategy impact plot for {y_label}: {e}")

def plot_combined_metric_bar(df_merged, filename_prefix):
    """Plots NDCG as bar height and colors by %ΔGAP."""
    if df_merged.empty:
        print("Skipping Combined Metric plot - no merged data.")
        return
    try:
        # Determine global min/max for %ΔGAP for consistent color mapping
        vmin = df_merged['value_gap'].min()
        vmax = df_merged['value_gap'].max()
        # Ensure vmin and vmax are different for norm, handle case where all values are same
        if vmin == vmax:
            vmin -= 1 # Add a small range
            vmax += 1
        if pd.isna(vmin) or pd.isna(vmax):
             print("Warning: Cannot determine color range for combined plot due to NaN GAP values.")
             vmin, vmax = -100, 100 # Default range

        norm = mcolors.TwoSlopeNorm(vmin=vmin, vcenter=0, vmax=vmax) # Center color map at 0
        cmap = cm.coolwarm

        # Define categorical types
        df_merged['algorithm'] = pd.Categorical(df_merged['algorithm'], categories=sorted(df_merged['algorithm'].unique()), ordered=True)
        df_merged['user_group'] = pd.Categorical(df_merged['user_group'], categories=['low', 'med', 'high'], ordered=True)

        g = sns.catplot(
            data=df_merged,
            x='algorithm',
            y='value_ndcg',
            hue='user_group',
            col='eval_strategy_desc',
            row='pop_notion_desc',
            kind='bar',
            sharey=False,
            aspect=1.5,
            height=4,
            palette='viridis', # Base palette (will be overridden)
            errorbar=None,
            # order=sorted(df_merged['algorithm'].unique()), # Redundant
            # hue_order=['low', 'med', 'high'], # Redundant
            legend=False # Turn off default legend
        )
        g.fig.suptitle('Combined Metric: Bar=NDCG@10, Color=%ΔGAP', y=1.05)
        g.set_axis_labels("Algorithm", "NDCG@10")
        g.set_titles(row_template="{row_name}", col_template="{col_name}")

        # Iterate through axes to set bar colors based on GAP
        for ax in g.axes.flat:
            patches = [p for p in ax.patches if isinstance(p, plt.Rectangle)] # Get bar patches
            algo_order = sorted(df_merged['algorithm'].cat.categories)
            hue_order = sorted(df_merged['user_group'].cat.categories)
            num_hues = len(hue_order)
            num_algos = len(algo_order)

            if len(patches) != num_algos * num_hues:
                print(f"Warning: Patch/Bar mismatch in combined plot facet '{ax.get_title()}'. Skipping color update.")
                continue

            try:
                title_parts = ax.get_title().split(' | ')
                current_notion = title_parts[0].split(' = ')[1]
                current_strategy = title_parts[1].split(' = ')[1]
                # Infer domain from data shown in this axes
                # This relies on catplot using the filtered data for each axes
                # A potentially fragile way is to get the domain from the first bar's data
                # A safer way requires passing domain info explicitly
                # Let's assume the domain is consistent within the df_merged passed to *this call*
                current_domain = df_merged['domain'].unique()[0] # Assumes single domain if not faceted by it

                for i, patch in enumerate(patches):
                    algo_index = i // num_hues
                    hue_index = i % num_hues
                    current_algo = algo_order[algo_index]
                    current_user_group = hue_order[hue_index]

                    gap_val_series = df_merged[
                        (df_merged['algorithm'] == current_algo) &
                        (df_merged['user_group'] == current_user_group) &
                        (df_merged['pop_notion_desc'] == current_notion) &
                        (df_merged['eval_strategy_desc'] == current_strategy) &
                        (df_merged['domain'] == current_domain)
                    ]['value_gap']

                    if not gap_val_series.empty:
                        gap_val = gap_val_series.iloc[0]
                        if pd.notna(gap_val):
                            patch.set_facecolor(cmap(norm(gap_val)))
                        else:
                            patch.set_facecolor('grey') # Color for missing GAP data
                    else:
                         patch.set_facecolor('lightgrey') # Color if data lookup fails

            except Exception as e:
                 print(f"Error setting colors in combined plot for facet '{ax.get_title()}': {e}")

        # Add a colorbar manually
        try:
            sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
            sm.set_array([]) # You need this for the colorbar to work
            cbar = g.fig.colorbar(sm, ax=g.axes.ravel().tolist(), shrink=0.75, aspect=30, pad=0.1)
            cbar.set_label('%ΔGAP')
        except Exception as e:
            print(f"Error adding colorbar to combined plot: {e}")

        g.tick_params(axis='x', rotation=45)
        plt.tight_layout(rect=[0, 0, 1, 0.98]) # Adjust layout for title and colorbar
        filepath = os.path.join(OUTPUT_DIR, f'{filename_prefix}_combined_metric.png')
        plt.savefig(filepath, bbox_inches='tight')
        print(f"Saved combined metric plot to {filepath}")
        # plt.show()
        plt.close(g.fig)
    except Exception as e:
         print(f"Error generating combined metric plot: {e}")


def generate_algorithm_rankings(df_gap_diff, df_ndcg, scenario_filter, rank_metric, ascending, filename_prefix):
    """Generates and saves algorithm rankings based on a specific metric and scenario."""
    ranking_data = pd.Series(dtype=float)
    ylabel = "Unknown Metric"

    # Apply scenario filter
    df_gap_scenario = df_gap_diff.copy()
    df_ndcg_scenario = df_ndcg.copy()
    if scenario_filter:
        for col, val in scenario_filter.items():
            if col in df_gap_scenario.columns:
                df_gap_scenario = df_gap_scenario[df_gap_scenario[col] == val]
            if col in df_ndcg_scenario.columns:
                df_ndcg_scenario = df_ndcg_scenario[df_ndcg_scenario[col] == val]

    if rank_metric == 'Abs_GAP_Fairness_Gap':
        if 'GAP_Fairness_Gap' not in df_gap_scenario.columns or df_gap_scenario.empty:
             print(f"Skipping ranking by {rank_metric} for {scenario_filter}: Missing data.")
             return
        df_gap_scenario['Abs_GAP_Fairness_Gap'] = df_gap_scenario['GAP_Fairness_Gap'].abs()
        # Average rank across remaining facets (e.g., notions if not filtered)
        ranking_data = df_gap_scenario.groupby('algorithm')['Abs_GAP_Fairness_Gap'].mean().sort_values(ascending=ascending)
        ylabel = "Avg. Absolute %ΔGAP Fairness Gap (Niche - Blockbuster)"
    elif rank_metric == 'NDCG_Niche':
        df_ndcg_niche = df_ndcg_scenario[df_ndcg_scenario['conceptual_group'] == 'Niche-Oriented']
        if 'value_ndcg' not in df_ndcg_niche.columns or df_ndcg_niche.empty:
             print(f"Skipping ranking by {rank_metric} for {scenario_filter}: Missing data.")
             return
        ranking_data = df_ndcg_niche.groupby('algorithm')['value_ndcg'].mean().sort_values(ascending=ascending)
        ylabel = "Avg. NDCG@10 for Niche-Oriented Users"
    else:
        print(f"Unknown ranking metric: {rank_metric}")
        return

    if ranking_data.empty:
        print(f"Skipping ranking plot/table for {rank_metric} - no ranking data generated.")
        return

    # --- Create Table ---
    ranking_table = pd.DataFrame(ranking_data)
    ranking_table.columns = [ylabel]
    ranking_table['Rank'] = range(1, len(ranking_table) + 1)
    ranking_table_filename = os.path.join(OUTPUT_DIR, f'{filename_prefix}_ranking_{rank_metric}.csv')
    ranking_table.to_csv(ranking_table_filename)
    scenario_str = ', '.join([f'{k}={v}' for k, v in scenario_filter.items()]) if scenario_filter else 'Overall'
    print(f"\n--- Algorithm Ranking ({rank_metric}) for Scenario: {scenario_str} ---")
    # display(ranking_table) # Use if in notebook
    print(ranking_table)
    print(f"Saved ranking table to {ranking_table_filename}")

    # --- Create Plot ---
    try:
      plt.figure(figsize=(10, 6))
      sns.barplot(x=ranking_data.index, y=ranking_data.values, palette='rocket', order=ranking_data.index)
      plt.ylabel(ylabel)
      plt.xlabel("Algorithm")
      plt.title(f"Algorithm Ranking by {rank_metric}\nScenario: {scenario_str}")
      plt.xticks(rotation=45, ha='right')
      plt.tight_layout()
      filepath = os.path.join(OUTPUT_DIR, f'{filename_prefix}_ranking_{rank_metric}_bar.png')
      plt.savefig(filepath)
      print(f"Saved ranking plot to {filepath}")
      # plt.show()
      plt.close()
    except Exception as e:
       print(f"Error generating ranking plot for {rank_metric}: {e}")

## 3. Load and Preprocess Data

In [3]:
# 1. Load Data
print(f"Loading data from {CSV_FILE_PATH}...")
try:
    df = pd.read_csv(CSV_FILE_PATH)
    print(f"Successfully loaded {len(df)} rows.")
    # Display basic info and head
    print("\n--- Data Info ---")
    df.info()
    print("\n--- Data Head ---")
    print(df.head())
except FileNotFoundError:
    print(f"Error: CSV file not found at {CSV_FILE_PATH}")
    # Exit or handle appropriately in a real script
    # For notebook, we can just stop execution of subsequent cells
    raise # Stop notebook execution here

# 2. Preprocess Data
print("\nPreprocessing data...")
df['value'] = pd.to_numeric(df['value'], errors='coerce')
df['p_value'] = pd.to_numeric(df['p_value'], errors='coerce')

# Apply descriptive names
df = map_descriptive_names(df)

# Add conceptual group mapping
df['conceptual_group'] = df.apply(map_conceptual_group, axis=1)

# Drop rows where the primary metric value is NaN, as they can't be plotted/analyzed
initial_rows = len(df)
df.dropna(subset=['value'], inplace=True)
if initial_rows > len(df):
    print(f"Dropped {initial_rows - len(df)} rows due to NaN in 'value' column.")

# Separate DataFrames for metrics and t-tests
df_gap = df[df['metric_type'] == 'GAP'].copy()
df_ndcg = df[df['metric_type'] == 'NDCG'].copy()
df_gap_ttest = df[df['metric_type'] == 'GAP_TTEST'].copy()
df_ndcg_ttest = df[df['metric_type'] == 'NDCG_TTEST'].copy()

print("Preprocessing complete.")
print("\n--- Processed Data Head (GAP) ---")
print(df_gap.head())
print("\n--- Processed Data Head (NDCG) ---")
print(df_ndcg.head())

Loading data from consolidated_results.csv...
Successfully loaded 864 rows.

--- Data Info ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 864 entries, 0 to 863
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   domain               864 non-null    object 
 1   evaluation_strategy  864 non-null    object 
 2   popularity_notion    864 non-null    object 
 3   algorithm            864 non-null    object 
 4   metric_type          864 non-null    object 
 5   user_group           432 non-null    object 
 6   comparison_group     432 non-null    object 
 7   value                432 non-null    float64
 8   p_value              432 non-null    float64
dtypes: float64(2), object(7)
memory usage: 60.9+ KB

--- Data Head ---
  domain evaluation_strategy popularity_notion algorithm metric_type user_group comparison_group     value  p_value
0  music             eva_two           pop_one   MostPop       

## 4. Base Analysis: %ΔGAP and NDCG@10

### 4.1 %ΔGAP Analysis

In [4]:
print("\n--- Generating %ΔGAP Analysis (Base Plots & Tables) ---")

# Faceted Bar Chart (%ΔGAP by User Group within notion/strategy)
for domain in df_gap['domain'].unique():
    print(f"\nPlotting Faceted %ΔGAP for domain: {domain}")
    plot_faceted_grouped_bar(
        df_gap[df_gap['domain'] == domain],
        'value',
        f'%ΔGAP ({domain})',
        f'{domain}_gap'
    )

# Comparative Bar Chart (%ΔGAP by Pop Notion for conceptual groups)
print("\nPlotting Comparative %ΔGAP by Popularity Notion...")
plot_comparative_grouped_bar(
    df_gap,
    'value',
    '%ΔGAP Comparison',
    'gap_compare'
)

# Summary Table (%ΔGAP)
gap_summary_table = create_summary_table(df_gap, 'value', 'summary_table_gap.csv')

# T-test Table (%ΔGAP)
gap_ttest_table = create_ttest_table(df_gap_ttest, 'ttest_table_gap.csv')


--- Generating %ΔGAP Analysis (Base Plots & Tables) ---

Plotting Faceted %ΔGAP for domain: music


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['algorithm'] = pd.Categorical(data['algorithm'], categories=sorted(data['algorithm'].unique()), ordered=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['user_group'] = pd.Categorical(data['user_group'], categories=['low', 'med', 'high'], ordered=True)


Saved faceted bar chart to analysis_plots_tables_v2\music_gap_value_faceted.png

Plotting Faceted %ΔGAP for domain: movies


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['algorithm'] = pd.Categorical(data['algorithm'], categories=sorted(data['algorithm'].unique()), ordered=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['user_group'] = pd.Categorical(data['user_group'], categories=['low', 'med', 'high'], ordered=True)


Saved faceted bar chart to analysis_plots_tables_v2\movies_gap_value_faceted.png

Plotting Comparative %ΔGAP by Popularity Notion...
Saved comparative bar chart to analysis_plots_tables_v2\gap_compare_value_compare_Niche-Oriented.png
Saved comparative bar chart to analysis_plots_tables_v2\gap_compare_value_compare_Diverse.png
Saved comparative bar chart to analysis_plots_tables_v2\gap_compare_value_compare_Blockbuster-Oriented.png

--- Summary Table: value --- created (saving to file).
Saved table to analysis_plots_tables_v2\summary_table_gap.csv
Skipping t-test table ttest_table_gap.csv - no data.


### 4.2 NDCG@10 Analysis

In [5]:
print("\n--- Generating NDCG@10 Analysis (Base Plots & Tables) ---")

# Faceted Bar Chart (NDCG by User Group within notion/strategy)
for domain in df_ndcg['domain'].unique():
    print(f"\nPlotting Faceted NDCG@10 for domain: {domain}")
    plot_faceted_grouped_bar(
        df_ndcg[df_ndcg['domain'] == domain],
        'value',
        f'NDCG@10 ({domain})',
        f'{domain}_ndcg'
    )

# Comparative Bar Chart (NDCG by Pop Notion for conceptual groups)
print("\nPlotting Comparative NDCG@10 by Popularity Notion...")
plot_comparative_grouped_bar(
    df_ndcg,
    'value',
    'NDCG@10 Comparison',
    'ndcg_compare'
)

# Summary Table (NDCG)
ndcg_summary_table = create_summary_table(df_ndcg, 'value', 'summary_table_ndcg.csv')

# T-test Table (NDCG)
ndcg_ttest_table = create_ttest_table(df_ndcg_ttest, 'ttest_table_ndcg.csv')


--- Generating NDCG@10 Analysis (Base Plots & Tables) ---

Plotting Faceted NDCG@10 for domain: music


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['algorithm'] = pd.Categorical(data['algorithm'], categories=sorted(data['algorithm'].unique()), ordered=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['user_group'] = pd.Categorical(data['user_group'], categories=['low', 'med', 'high'], ordered=True)


Saved faceted bar chart to analysis_plots_tables_v2\music_ndcg_value_faceted.png

Plotting Faceted NDCG@10 for domain: movies


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['algorithm'] = pd.Categorical(data['algorithm'], categories=sorted(data['algorithm'].unique()), ordered=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['user_group'] = pd.Categorical(data['user_group'], categories=['low', 'med', 'high'], ordered=True)


Saved faceted bar chart to analysis_plots_tables_v2\movies_ndcg_value_faceted.png

Plotting Comparative NDCG@10 by Popularity Notion...
Saved comparative bar chart to analysis_plots_tables_v2\ndcg_compare_value_compare_Niche-Oriented.png
Saved comparative bar chart to analysis_plots_tables_v2\ndcg_compare_value_compare_Diverse.png
Saved comparative bar chart to analysis_plots_tables_v2\ndcg_compare_value_compare_Blockbuster-Oriented.png

--- Summary Table: value --- created (saving to file).
Saved table to analysis_plots_tables_v2\summary_table_ndcg.csv
Skipping t-test table ttest_table_ndcg.csv - no data.


## 5. Advanced Analysis Plots

### 5.1 Scatter Plot: Bias vs. Accuracy (%ΔGAP vs. NDCG@10)

In [6]:
print("\nPlotting %ΔGAP vs NDCG@10 scatter plot...")

# Merge GAP and NDCG data
df_merged_metrics = pd.merge(
    df_gap[['domain', 'evaluation_strategy', 'popularity_notion', 'algorithm', 'user_group', 'value', 'conceptual_group', 'eval_strategy_desc', 'pop_notion_desc']],
    df_ndcg[['domain', 'evaluation_strategy', 'popularity_notion', 'algorithm', 'user_group', 'value']],
    on=['domain', 'evaluation_strategy', 'popularity_notion', 'algorithm', 'user_group'],
    suffixes=('_gap', '_ndcg')
)

if not df_merged_metrics.empty:
    plot_bias_accuracy_scatter(df_merged_metrics, 'adv_')
else:
    print("Warning: Merged dataframe for scatter plot is empty. Check input data.")


Plotting %ΔGAP vs NDCG@10 scatter plot...
Saved scatter plot to analysis_plots_tables_v2\adv__bias_vs_accuracy_scatter.png


### 5.2 Delta Plots: Fairness Gaps

In [7]:
print("\nCalculating and plotting Fairness Gaps...")

# Prepare data by pivoting
df_fairness_comp = df[df['conceptual_group'].isin(['Niche-Oriented', 'Blockbuster-Oriented'])].copy()

# Pivot for GAP
pivot_gap_fairness = df_fairness_comp[df_fairness_comp['metric_type']=='GAP'].pivot_table(
    index=['domain', 'eval_strategy_desc', 'pop_notion_desc', 'algorithm'],
    columns='conceptual_group',
    values='value'
).reset_index()

# Pivot for NDCG
pivot_ndcg_fairness = df_fairness_comp[df_fairness_comp['metric_type']=='NDCG'].pivot_table(
    index=['domain', 'eval_strategy_desc', 'pop_notion_desc', 'algorithm'],
    columns='conceptual_group',
    values='value'
).reset_index()

# Calculate GAP difference (Niche - Blockbuster)
if 'Niche-Oriented' in pivot_gap_fairness.columns and 'Blockbuster-Oriented' in pivot_gap_fairness.columns:
    pivot_gap_fairness['GAP_Fairness_Gap'] = pivot_gap_fairness['Niche-Oriented'] - pivot_gap_fairness['Blockbuster-Oriented']
    plot_fairness_gap(pivot_gap_fairness.dropna(subset=['GAP_Fairness_Gap']), 'GAP_Fairness_Gap', '%ΔGAP Diff (Niche - Blockbuster)', '%ΔGAP', 'adv_gap')
else:
    print("Warning: Cannot calculate GAP Fairness Gap - missing Niche or Blockbuster groups in pivot.")
    # Assign empty df to avoid error later if needed for ranking
    pivot_gap_fairness['GAP_Fairness_Gap'] = np.nan 

# Calculate NDCG difference (Blockbuster - Niche)
if 'Niche-Oriented' in pivot_ndcg_fairness.columns and 'Blockbuster-Oriented' in pivot_ndcg_fairness.columns:
    pivot_ndcg_fairness['NDCG_Fairness_Gap'] = pivot_ndcg_fairness['Blockbuster-Oriented'] - pivot_ndcg_fairness['Niche-Oriented'] # Higher is better for Blockbuster
    plot_fairness_gap(pivot_ndcg_fairness.dropna(subset=['NDCG_Fairness_Gap']), 'NDCG_Fairness_Gap', 'NDCG@10 Diff (Blockbuster - Niche)', 'NDCG@10', 'adv_ndcg')
else:
     print("Warning: Cannot calculate NDCG Fairness Gap - missing Niche or Blockbuster groups in pivot.")
     pivot_ndcg_fairness['NDCG_Fairness_Gap'] = np.nan


Calculating and plotting Fairness Gaps...
Saved fairness gap plot to analysis_plots_tables_v2\adv_gap_fairness_gap.png
Saved fairness gap plot to analysis_plots_tables_v2\adv_ndcg_fairness_gap.png


### 5.3 Delta Plots: Evaluation Strategy Impact

In [8]:
print("\nCalculating and plotting Evaluation Strategy Impact...")

# Pivot GAP data by strategy
pivot_gap_strategy = df_gap.pivot_table(
    index=['domain', 'pop_notion_desc', 'algorithm', 'conceptual_group'],
    columns='eval_strategy_desc',
    values='value'
).reset_index()

# Pivot NDCG data by strategy
pivot_ndcg_strategy = df_ndcg.pivot_table(
    index=['domain', 'pop_notion_desc', 'algorithm', 'conceptual_group'],
    columns='eval_strategy_desc',
    values='value'
).reset_index()

# Calculate GAP difference (TrainItems - UserTest)
if 'TrainItems' in pivot_gap_strategy.columns and 'UserTest' in pivot_gap_strategy.columns:
    pivot_gap_strategy['GAP_Strategy_Diff'] = pivot_gap_strategy['TrainItems'] - pivot_gap_strategy['UserTest']
    plot_strategy_impact_delta(pivot_gap_strategy.dropna(subset=['GAP_Strategy_Diff']), 'GAP_Strategy_Diff', '%ΔGAP Diff', '%ΔGAP', 'adv_gap')
else:
    print("Warning: Cannot calculate GAP Strategy Impact - missing TrainItems or UserTest columns.")
    pivot_gap_strategy['GAP_Strategy_Diff'] = np.nan

# Calculate NDCG difference (TrainItems - UserTest)
if 'TrainItems' in pivot_ndcg_strategy.columns and 'UserTest' in pivot_ndcg_strategy.columns:
    pivot_ndcg_strategy['NDCG_Strategy_Diff'] = pivot_ndcg_strategy['TrainItems'] - pivot_ndcg_strategy['UserTest']
    plot_strategy_impact_delta(pivot_ndcg_strategy.dropna(subset=['NDCG_Strategy_Diff']), 'NDCG_Strategy_Diff', 'NDCG@10 Diff', 'NDCG@10', 'adv_ndcg')
else:
    print("Warning: Cannot calculate NDCG Strategy Impact - missing TrainItems or UserTest columns.")
    pivot_ndcg_strategy['NDCG_Strategy_Diff'] = np.nan


Calculating and plotting Evaluation Strategy Impact...
Saved strategy impact plot to analysis_plots_tables_v2\adv_gap_strategy_impact.png
Saved strategy impact plot to analysis_plots_tables_v2\adv_ndcg_strategy_impact.png


### 5.4 Algorithm Rankings

In [9]:
print("\nGenerating Algorithm Rankings...")

# Scenario 1: Rank by absolute GAP fairness gap under TrainItems using NicheConsumptionRate (averaged over domains)
filter_scen1 = {
    'eval_strategy_desc': 'TrainItems',
    'pop_notion_desc': 'NicheConsumptionRate'
}
generate_algorithm_rankings(pivot_gap_fairness, None, filter_scen1, 'Abs_GAP_Fairness_Gap', ascending=True, filename_prefix='adv_rank_fairness_trainitems_pop4')

# Scenario 2: Rank by NDCG for Niche users under TrainItems using NicheConsumptionRate (averaged over domains)
filter_scen2 = {
    'eval_strategy_desc': 'TrainItems',
    'pop_notion_desc': 'NicheConsumptionRate',
    #'conceptual_group': 'Niche-Oriented' # Filter is applied inside the function
}
df_ndcg_niche = df_ndcg[df_ndcg['conceptual_group'] == 'Niche-Oriented'] # Pre-filter for niche users
generate_algorithm_rankings(None, df_ndcg_niche, filter_scen2, 'NDCG_Niche', ascending=False, filename_prefix='adv_rank_accuracy_trainitems_pop4')

# Add more scenarios if needed, e.g., filtering by domain:
# filter_scen3 = {
#     'domain': 'music',
#     'eval_strategy_desc': 'TrainItems',
#     'pop_notion_desc': 'NicheConsumptionRate'
# }
# generate_algorithm_rankings(pivot_gap_fairness, None, filter_scen3, 'Abs_GAP_Fairness_Gap', ascending=True, filename_prefix='adv_rank_fairness_trainitems_pop4_music')



Generating Algorithm Rankings...


AttributeError: 'NoneType' object has no attribute 'copy'

### 5.5 Combined Metric Plot (Experimental)

In [None]:
# print("\nGenerating Combined Metric Plot (Bar=NDCG, Color=%ΔGAP)...")
# # This plot can be complex and might require adjustments
# if not df_merged_metrics.empty:
#     # May need to filter df_merged_metrics further if generating per-domain plots
#     plot_combined_metric_bar(df_merged_metrics, 'adv_') 
# else:
#     print("Skipping Combined Metric plot - merged data empty.")

## 6. Analysis Complete

In [None]:
print(f"\n--- Analysis Script Finished. Plots and tables saved to '{OUTPUT_DIR}' directory. ---")