In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('./data/all_universe_predictions.csv')

In [None]:
# 1. Mark each prediction as ambiguous
#df['is_ambiguous'] = df['pred_set'].apply(lambda labels: 1 if len(labels) > 1 else 0)

In [3]:
from ast import literal_eval

df['pred_set'] = df['pred_set'].apply(
    lambda x: literal_eval(x) if isinstance(x, str) else x
)

df['is_ambiguous'] = df['pred_set'].apply(lambda preds: 1 if len(preds) > 1 else 0)

# Melt the subgroup flags into one column so you can group by them

In [4]:
# List of binary subgroup columns
subgroup_cols = ['frau1', 'nongerman', 'nongerman_male', 'nongerman_female']

# Melt to long format: one row per (row, subgroup) where value == 1
df_long = df.melt(
    id_vars=['UniverseID', 'feature_set', 'model', 'is_ambiguous'], #'threshold_policy'
    value_vars=subgroup_cols,
    var_name='subgroup',
    value_name='is_member'
)

# Filter to keep only the subgroup memberships (i.e., rows where the person is in that group)
df_long = df_long[df_long['is_member'] == 1]

# in subgroup column it says "other 9%", for what does it stand? 


# Compute the fraction of ambiguous predictions per (universe, subgroup)

In [5]:
group_cols = ['UniverseID', 'feature_set', 'model', 'subgroup'] #'threshold_policy',
fractions = df_long.groupby(group_cols)['is_ambiguous'].mean().reset_index()
fractions.rename(columns={'is_ambiguous': 'ambiguity_fraction'}, inplace=True)

# For each universe, compute the maximum pairwise difference in ambiguity fraction between subgroups

In [7]:
# Compute disparity per universe = max - min ambiguity_fraction across subgroups
disparity_df = fractions.groupby(['feature_set', 'model'])['ambiguity_fraction']\
                        .agg(lambda x: x.max() - x.min())\
                        .reset_index(name='ambiguity_disparity')

## add universe id 


# fANOVA

In [None]:
import numpy as np
from ConfigSpace import ConfigurationSpace, CategoricalHyperparameter
from fanova import fANOVA

In [None]:
# Define the ConfigSpace with categorical decision options
cs = ConfigurationSpace()
for col in ['feature_set', 'model', 'threshold_policy']:
    choices = sorted(disparity_df[col].unique().tolist())
    cs.add(CategoricalHyperparameter(col, choices))


In [None]:
# Encode categorical values as integer codes
#encoders = {
#    col: {val: idx for idx, val in enumerate(sorted(disparity_df[col].unique()))}
#    for col in ['feature_set', 'model', 'threshold_policy']
#}
#
#X = np.vstack([
#    disparity_df['feature_set'].map(encoders['feature_set']),
#    disparity_df['model'].map(encoders['model']),
#    disparity_df['threshold_policy'].map(encoders['threshold_policy']),
#]).T
#
#Y = disparity_df['ambiguity_disparity'].to_numpy()

In [None]:
hp_order = [hp.name for hp in cs.values()]
X = disparity_df[hp_order].copy()

In [None]:
# Map categorical values to integers (same as before)
for col in hp_order:
    encoder = {val: i for i, val in enumerate(sorted(disparity_df[col].unique()))}
    X[col] = X[col].map(encoder)

X_array = X.to_numpy(dtype=float)  # ensure float dtype


In [None]:
Y = disparity_df['ambiguity_disparity'].to_numpy()

In [None]:
# Run fANOVA
fanova = fANOVA(X_array, Y, config_space=cs)


In [None]:
# Get individual importance scores
importance = {
    'feature_set': fanova.quantify_importance((0,))['individual importance'],
    'model': fanova.quantify_importance((1,))['individual importance'],
    'threshold_policy': fanova.quantify_importance((2,))['individual importance'],
}

In [None]:
# Print results
print("fANOVA results (variance explained in ambiguity disparity):")
for key, val in importance.items():
    print(f"  {key}: {val * 100:.2f}%")

# fANOVA v2

In [13]:
import numpy as np
if not hasattr(np, 'float'):
    np.float = float  # Patch for fanova compatibility


In [14]:
# Step 1: Build ConfigSpace and add hyperparameters
from ConfigSpace import ConfigurationSpace, CategoricalHyperparameter

cs = ConfigurationSpace()
cs.add(
    CategoricalHyperparameter("feature_set", sorted(disparity_df["feature_set"].unique())),
    CategoricalHyperparameter("model", sorted(disparity_df["model"].unique())),
    #CategoricalHyperparameter("threshold_policy", sorted(disparity_df["threshold_policy"].unique()))
)

# Step 2: Get correct hyperparameter order
hp_order = [hp.name for hp in list(cs.values())]

# Step 3: Encode categorical variables using consistent mapping
X_df = disparity_df[hp_order].copy()
for col in hp_order:
    encoder = {val: i for i, val in enumerate(sorted(X_df[col].unique()))}
    X_df[col] = X_df[col].map(encoder)
X_df = X_df.astype(float)

# Step 4: Now pass this labeled DataFrame directly to fANOVA
from fanova import fANOVA

Y = disparity_df["ambiguity_disparity"].to_numpy()
fanova = fANOVA(X_df, Y, config_space=cs)


In [15]:
# Step 1: Extract hyperparameters in correct order (no deprecated method)
hp_list = list(cs.values())

importance_scores = {}

# Step 2: Loop through in order and extract individual importance scores
for i, hp in enumerate(hp_list):
    try:
        result = fanova.quantify_importance((i,))
        importance_scores[hp.name] = result["individual importance"]
    except Exception as e:
        print(f"Could not compute importance for {hp.name}: {e}")

# Step 3: Format and display
importance_df = pd.DataFrame.from_dict(
    importance_scores, orient="index", columns=["individual importance"]
)
importance_df.sort_values("individual importance", ascending=False, inplace=True)

print("fANOVA: Individual variance explained per decision factor (%):")
print((importance_df * 100).round(2))


Could not compute importance for feature_set: 'SwigPyObject' object has no attribute 'mean'
Could not compute importance for model: 'SwigPyObject' object has no attribute 'mean'
fANOVA: Individual variance explained per decision factor (%):
Empty DataFrame
Columns: [individual importance]
Index: []
swig/python detected a memory leak of type 'rfr::util::weighted_running_statistics< double > *', no destructor found.
swig/python detected a memory leak of type 'rfr::util::weighted_running_statistics< double > *', no destructor found.


In [11]:
import numpy as np
print("Var(Y):", np.var(Y))
# extremly small variance, so not much to explain

Var(Y): 5.778320607927732e-05


In [12]:
print("X shape:", X_df.shape)
print("Unique values per column:")
print(X_df.nunique())


X shape: (6, 2)
Unique values per column:
feature_set    2
model          3
dtype: int64
