In [5]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler().set_output(transform='pandas')

# Load the CSV files
stroke_df = pd.read_csv('./f_results/stroke_results.csv')
wids_df = pd.read_csv('./f_results/wids_results.csv')
diabetes_df = pd.read_csv('./f_results/diabetes_results.csv')

In [7]:
min_max_scaler = MinMaxScaler().set_output(transform='pandas')

df_l = [stroke_df, wids_df, diabetes_df]
for df in df_l:
    print(scaler.fit_transform(df[df.columns[2:]]))
    df[df.columns[2:]] = scaler.fit_transform(df[df.columns[2:]]) 

    balanced_acc       acc  precision    recall        f1  eq_opp_diff  \
0       0.057195  0.760000   0.087287  0.081081  0.000000     0.743697   
1       0.057195  0.760000   0.087287  0.081081  0.000000     0.743697   
2       0.057195  0.760000   0.087287  0.081081  0.000000     0.743697   
3       0.057195  0.760000   0.087287  0.081081  0.000000     0.743697   
4       0.057195  0.760000   0.087287  0.081081  0.000000     0.743697   
..           ...       ...        ...       ...       ...          ...   
86      0.904336  0.850370   0.654927  0.513514  0.713980     0.071429   
87      0.768996  0.598519   0.399098  0.675676  0.448208     0.285714   
88      0.085634  0.583704   0.169648  0.270270  0.123696     0.915966   
89      0.952580  0.936296   0.968421  0.459459  0.974689     0.000000   
90      0.951311  0.880000   0.750537  0.513514  0.809114     0.071429   

    avg_odd_diff       spd  disparate_impact  theil_idx  fair_score  
0       0.788311  0.827965          0.517

In [17]:
import numpy as np
from scipy import stats
from statsmodels.stats.anova import anova_lm
from statsmodels.formula.api import ols

# Helper function to calculate relative changes
def calc_relative_changes(df):
    results = []
    
    # Get unique models
    models = df['model'].unique()
    
    for model in models:
        # Get baseline metrics for this model
        baseline = df[(df['model'] == model) & (df['BM'] == 'baseline')].iloc[0]
        
        # Get all configurations for this model
        configs = df[df['model'] == model]
        
        for _, config in configs.iterrows():
            if config['BM'] != 'baseline':
                # Calculate relative changes
                acc_change = (config['acc'] - baseline['acc']) / baseline['acc']
                fair_change = config['fair_score'] - baseline['fair_score']
                
                results.append({
                    'model': model,
                    'BM': config['BM'],
                    'acc_change': acc_change,
                    'fair_change': fair_change,
                    'balanced_acc_change': (config['balanced_acc'] - baseline['balanced_acc']) / baseline['balanced_acc'],
                    'dataset': 'diabetes',
                    'effectiveness': acc_change - fair_change  # Calculate effectiveness here for ANOVA
                })
    
    return pd.DataFrame(results)

# Calculate changes for both datasets
diabetes_changes = calc_relative_changes(diabetes_df)
stroke_changes = calc_relative_changes(stroke_df)
wids_changes = calc_relative_changes(wids_df)
diabetes_changes['dataset'] = 'diabetes'
stroke_changes['dataset'] = 'stroke'
wids_changes['dataset'] = 'wids'

# Combine results
all_changes = pd.concat([diabetes_changes, stroke_changes, wids_changes])
all_changes.isin(np.inf)

  acc_change = (config['acc'] - baseline['acc']) / baseline['acc']
  acc_change = (config['acc'] - baseline['acc']) / baseline['acc']


TypeError: only list-like or dict-like objects are allowed to be passed to DataFrame.isin(), you passed a 'float'

In [11]:
# Perform Two-way ANOVA analyses
# 1. For effectiveness (combined metric)
effectiveness_model = ols('effectiveness ~ C(model) + C(BM) + C(model):C(BM)', data=all_changes).fit()
effectiveness_anova = anova_lm(effectiveness_model, typ=2)

# 2. For accuracy changes
accuracy_model = ols('acc_change ~ C(model) + C(BM) + C(model):C(BM)', data=all_changes).fit()
accuracy_anova = anova_lm(accuracy_model, typ=2)

# 3. For fairness changes
fairness_model = ols('fair_change ~ C(model) + C(BM) + C(model):C(BM)', data=all_changes).fit()
fairness_anova = anova_lm(fairness_model, typ=2)

# Calculate effect sizes (partial eta-squared) for each ANOVA
def calculate_partial_eta_squared(aov):
    aov['pes'] = aov['sum_sq'] / (aov['sum_sq'] + aov['sum_sq'].sum())
    return aov

effectiveness_anova = calculate_partial_eta_squared(effectiveness_anova)
accuracy_anova = calculate_partial_eta_squared(accuracy_anova)
fairness_anova = calculate_partial_eta_squared(fairness_anova)

# Aggregate results by BM strategy
strategy_performance = all_changes.groupby('BM').agg({
    'acc_change': ['mean', 'std'],
    'fair_change': ['mean', 'std'],
    'effectiveness': ['mean', 'std']
}).round(4)

# Find best performing strategies
best_strategies = all_changes[
    (all_changes['acc_change'] > -0.1) & 
    (all_changes['fair_change'] < 0)
].groupby('BM')['effectiveness'].mean().sort_values(ascending=False)

# Add interaction analysis
# Calculate mean effectiveness for each model-BM combination
interaction_analysis = all_changes.pivot_table(
    values='effectiveness',
    index='model',
    columns='BM',
    aggfunc='mean'
).round(4)

d_val = {
    'strategy_performance': strategy_performance,
    'best_strategies': best_strategies,
    'effectiveness_anova': effectiveness_anova,
    'accuracy_anova': accuracy_anova,
    'fairness_anova': fairness_anova,
    'interaction_analysis': interaction_analysis,
    'detailed_changes': all_changes
}

# To interpret ANOVA results:
# - Low p-values (<0.05) in the ANOVA tables indicate significant effects
# - Partial eta-squared (pes) values indicate effect sizes:
#   - Small effect: ~0.01
#   - Medium effect: ~0.06
#   - Large effect: ~0.14

ValueError: array must not contain infs or NaNs

In [None]:
d_val['interaction_analysis']