In [1]:
%cd ~/cma/CMA_Fairness_v2

/dss/dsshome1/0C/ra93lal2/cma/CMA_Fairness_v2


  bkms = self.shell.db.get('bookmarks', {})
  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


The following cell holds the definition of our parameters, these values can be overriden by rendering the with e.g. the following command:

papermill -p alpha 0.2 -p ratio 0.3 universe_analysis.ipynb output/test_run.ipynb

In [2]:
import os
print("Current working directory:", os.getcwd())

Current working directory: /dss/dsshome1/0C/ra93lal2/cma/CMA_Fairness_v2


In [3]:
run_no = 0
universe_id = "test"
universe = {
    #"scale": "scale", # "scale", "do-not-scale",
    #"encode_categorical": "one-hot", # "ordinal", "one-hot"
    "model": "elasticnet", # "logreg", "penalized_logreg", "rf", "gbm", "elasticnet"
    "cutoff": ["quantile_0.15", "quantile_0.30"],
    "exclude_features": "nationality-sex", # "none", "nationality", "sex", "nationality-sex"
    "exclude_subgroups": "keep-all", # "keep-all", "drop-non-german"
    "eval_fairness_grouping": ["majority-minority", "nationality-all"]
}

output_dir="./output"
seed=0

In [4]:
import json
# Parse universe into dict if it is passed as a string
if isinstance(universe, str):
    universe = json.loads(universe)

In [5]:
# Auto-reload the custom package
%load_ext autoreload
%autoreload 1
%aimport fairness_multiverse

In [6]:
from fairness_multiverse.universe import UniverseAnalysis

universe_analysis = UniverseAnalysis(
    run_no = run_no,
    universe_id = universe_id,
    universe = universe,
    output_dir=output_dir,
)

In [7]:
import numpy as np
parsed_seed = int(seed)
np.random.seed(parsed_seed)
print(f"Using Seed: {parsed_seed}")

Using Seed: 0


# Loading Data

Load siab_train, siab_test, siab_calib and/or 
load siab_train_features, siab_train_labels

In [None]:
# Do I need to load siab? Delete this cell?

from pathlib import Path
import pandas as pd

# File paths
raw_file = Path("data/raw/siab.csv")
cache_file = Path("data/siab_cached.csv.gz")

# Ensure cache directory exists
cache_file.parent.mkdir(parents=True, exist_ok=True)

# Load with simple caching
if cache_file.exists():
    print(f"Loading SIAB data from cache: {cache_file}")
    siab = pd.read_csv(cache_file, compression='gzip')
else:
    print(f"Cache not found. Reading raw SIAB data: {raw_file}")
    siab = pd.read_csv(raw_file)
    siab.to_csv(cache_file, index=False, compression='gzip')
    print(f"Cached SIAB data to: {cache_file}")

# Now use `siab` DataFrame as needed
print(siab.shape)


In [8]:
import pandas as pd

X_train = pd.read_csv("./data/X_train.csv")
y_train = pd.read_csv("./data/y_train.csv")

In [9]:
X_test = pd.read_csv("./data/X_test.csv")
y_true = pd.read_csv("./data/y_test.csv")

In [10]:
# Calibration data for conformal
X_calib = pd.read_csv("./data/X_calib.csv")
y_calib = pd.read_csv("./data/y_calib.csv")

In [11]:
# Auxiliary data needed downstream in the pipeline

org_train = X_train.copy()
org_test = X_test.copy()
org_calib = X_calib.copy()

In [None]:
siab

Pre-Processing for Selected Task -> skipped. I think I don't need this

# Preprocessing Data

In [12]:
# EXCLUDE PROTECTED FEATURES
# ----------------------
# "exclude_features": "none", # "nationality", "sex", "nationality-sex"

excluded_features = universe["exclude_features"].split("-") # split, e.g.: "nationality-sex" -> ["nationality", "sex"]
excluded_features_dictionary = {
    "nationality": ["maxdeutsch1", "maxdeutsch.Missing."],
    "sex": ["frau1"],
}


In [13]:
# Code nice names to column names

excluded_features_columns = [
    excluded_features_dictionary[f] for f in excluded_features if len(f) > 0 and f != "none"
]

In [14]:
from utils import flatten_once

excluded_features_columns = flatten_once(excluded_features_columns)

In [15]:
if len(excluded_features_columns) > 0:
    print(f"Dropping features: {excluded_features_columns}")
    X_train.drop(excluded_features_columns, axis=1, inplace=True)

Dropping features: ['maxdeutsch1', 'maxdeutsch.Missing.', 'frau1']


In [16]:
if len(excluded_features_columns) > 0:
    print(f"Dropping features: {excluded_features_columns}")
    X_test.drop(excluded_features_columns, axis=1, inplace=True)

Dropping features: ['maxdeutsch1', 'maxdeutsch.Missing.', 'frau1']


In [17]:
if len(excluded_features_columns) > 0:
    print(f"Dropping features: {excluded_features_columns}")
    X_calib.drop(excluded_features_columns, axis=1, inplace=True)

Dropping features: ['maxdeutsch1', 'maxdeutsch.Missing.', 'frau1']


In [18]:
# EXCLUDE CERTAIN SUBGROUPS
# ----------------------

mode = universe.get("exclude_subgroups", "keep-all") 
# Fetches the exclude_subgroups setting from the universe dict.
# Defaults to "keep-all" if the key is missing.

In [19]:
if mode == "keep-all":
    keep_mask = pd.Series(True, index=org_train.index)

# org_train contains the original feature columns from features_org (in Simson)
# features_org contains unprocessed features, for me X_train at beginning ???
# For keep-all, creates a boolean Series (keep_mask) of all True, so no rows are removed.

elif mode == "drop-non-german":
    keep_mask = org_train["maxdeutsch1"] == 1 # ??? what about missing values?

else:
    raise ValueError(f"Unsupported mode for exclude_subgroups: {mode}")


In [20]:
n_drop = (~keep_mask).sum() # Calculates how many rows are set to be dropped
if n_drop > 0:
    pct = n_drop / len(keep_mask) * 100
    print(f"Dropping {n_drop} rows ({pct:.2f}%) where mode='{mode}'")

In [21]:
X_train = X_train[keep_mask]

In [22]:
y_train = y_train[keep_mask]

# Model Training

In [23]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier

if (universe["model"] == "logreg"):
    model = LogisticRegression() #penalty="none") #, solver="newton-cg", max_iter=1) # include random_state=19 ?
elif (universe["model"] == "penalized_logreg"):
    model = LogisticRegression(penalty="l2", C=1.0) #, solver="newton-cg", max_iter=1)
elif (universe["model"] == "rf"):
    model = RandomForestClassifier(n_estimators=100, n_jobs=-1)
elif (universe["model"] == "gbm"):
    model = GradientBoostingClassifier()
elif (universe["model"] == "elasticnet"):
    model = LogisticRegression(penalty = 'elasticnet', solver = 'saga', l1_ratio = 0.5, max_iter=5000) # which solver to use?
else:
    raise "Unsupported universe.model"

In [24]:
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

model = Pipeline([
    #("continuous_processor", continuous_processor),
    #("categorical_preprocessor", categorical_preprocessor),
    #("scale", StandardScaler() if universe["scale"] == "scale" else None),
    ("model", model),
])

In [25]:
model.fit(X_train, y_train.values.ravel())

In [26]:
from fairness_multiverse.universe import predict_w_threshold

In [27]:
probs_test = model.predict_proba(X_test)

'''
Below code returns a boolean array (or binary 0/1 array depending on how it’s used) where each element 
is True if the probability of class 1 is greater than or equal to the threshold, and False otherwise.
'''
y_pred_default = predict_w_threshold(probs_test, 0.5)

# Naive prediction
accuracy_score(y_true = y_true, y_pred = y_pred_default)

0.8491472522572734

In [28]:
model.predict(X_test)

array([0, 0, 0, ..., 0, 0, 0])

# Conformal Prediction

In [29]:
# Miscoverage level for conformal prediction (10% allowed error rate => 90% target coverage)
alpha = 0.1

In [30]:
probs_calib = model.predict_proba(X_calib)

In [31]:
y_calib = y_calib.values.ravel().astype(int)

In [32]:
from fairness_multiverse.conformal import compute_nc_scores

# Compute nonconformity scores on calibration set (1 - probability of true class)
nc_scores = compute_nc_scores(probs_calib, y_calib)

In [33]:
from fairness_multiverse.conformal import find_threshold

# Find conformal threshold q_hat for the given alpha (split conformal method)
q_hat = find_threshold(nc_scores, alpha)

In [34]:
q_hat

0.6872911580834083

In [35]:
from fairness_multiverse.conformal import predict_conformal_sets

# Generate prediction sets for each test example
pred_sets = predict_conformal_sets(model, X_test, q_hat)

In [36]:
y_true = y_true.squeeze()

In [37]:
from fairness_multiverse.conformal import evaluate_sets

# Evaluate coverage and average set size on test data
metrics = evaluate_sets(pred_sets, y_true)

In [38]:
metrics

{'coverage': 0.9065098651209452, 'avg_size': 1.1332292943930442}

In [39]:
example_universe = universe.copy()
universe_model = example_universe.get("model")
universe_exclude_features = example_universe.get("exclude_features")
universe_exclude_subgroups = example_universe.get("exclude_subgroups")

In [40]:
cp_metrics_dict = {
    "universe_id": [universe_id],
    "universe_model": [universe_model],
    "universe_exclude_features": [universe_exclude_features],
    "universe_exclude_subgroups": [universe_exclude_subgroups],
    "q_hat": [q_hat],
    "coverage": [metrics["coverage"]],
    "avg_size": [metrics["avg_size"]],
}

In [41]:
cp_metrics_dict

{'universe_id': ['test'],
 'universe_model': ['elasticnet'],
 'universe_exclude_features': ['nationality-sex'],
 'universe_exclude_subgroups': ['keep-all'],
 'q_hat': [0.6872911580834083],
 'coverage': [0.9065098651209452],
 'avg_size': [1.1332292943930442]}

In [42]:
cp_metrics_df = pd.DataFrame(cp_metrics_dict)

In [43]:
cp_metrics_df.head()

Unnamed: 0,universe_id,universe_model,universe_exclude_features,universe_exclude_subgroups,q_hat,coverage,avg_size
0,test,elasticnet,nationality-sex,keep-all,0.687291,0.90651,1.133229


In [44]:
output_folder = "results/cp_metrics_alpha_test"
os.makedirs(output_folder, exist_ok=True)

In [48]:
output_file = os.path.join(output_folder, f"metrics_{universe_id}.csv")

In [None]:
cp_metrics_df.to_csv(output_file, index=False)

In [None]:
# Conditional Coverage & looking at subgroups

In [49]:
def build_cp_groups(pred_sets, y_test, test_idx, X_test_full):
    """
    Build a DataFrame of conformal prediction results with subgroup info.
    - pred_sets: list of prediction sets for each test sample (in order of test_idx).
    - y_test: Series or array of true labels for test samples.
    - test_idx: index of the test samples corresponding to pred_sets.
    - X_test_full: DataFrame of test features **including protected attributes**.
    """
    cp_df = pd.DataFrame(index=test_idx.copy())
    # Store prediction sets (convert each to a set of ints for consistency)
    cp_df['pred_set']   = pd.Series(pred_sets, index=test_idx).apply(lambda s: {int(x) for x in s})
    cp_df['true_label'] = y_test.reindex(test_idx)
    # Bring in protected attributes from the full test set
    cp_df['frau1']    = X_test_full.loc[test_idx, 'frau1']          # 1 = female, 0 = male
    # Derive 'nongerman' flag ('maxdeutsch1' indicates German (1) vs non-German (0))
    cp_df['nongerman'] = np.where(X_test_full.loc[test_idx, 'maxdeutsch1'] == 0, 1, 0)
    # Handle missing nationality information if applicable
    if 'maxdeutsch.Missing.' in X_test_full.columns:
        missing_mask = (X_test_full.loc[test_idx, 'maxdeutsch.Missing.'] == 1)
        cp_df.loc[missing_mask, 'nongerman'] = np.nan
    # Derive intersectional subgroup flags
    cp_df['nongerman_male']   = np.where((cp_df['nongerman'] == 1) & (cp_df['frau1'] == 0), 1, 0)
    cp_df['nongerman_female'] = np.where((cp_df['nongerman'] == 1) & (cp_df['frau1'] == 1), 1, 0)
    # (Optional) drop samples with missing subgroup info
    cp_df = cp_df.dropna(subset=['nongerman'])
    return cp_df

In [50]:
%whos

Variable                       Type                Data/Info
------------------------------------------------------------
GradientBoostingClassifier     ABCMeta             <class 'sklearn.ensemble.<...>dientBoostingClassifier'>
LogisticRegression             type                <class 'sklearn.linear_mo<...>stic.LogisticRegression'>
Pipeline                       ABCMeta             <class 'sklearn.pipeline.Pipeline'>
RandomForestClassifier         ABCMeta             <class 'sklearn.ensemble.<...>.RandomForestClassifier'>
StandardScaler                 type                <class 'sklearn.preproces<...>ng._data.StandardScaler'>
UniverseAnalysis               type                <class 'fairness_multiver<...>iverse.UniverseAnalysis'>
X_calib                        DataFrame                  employed_before  r<...>86692 rows x 157 columns]
X_test                         DataFrame                  employed_before  r<...>89710 rows x 157 columns]
X_train                        DataFrame  

In [51]:
cp_groups_df = build_cp_groups(pred_sets, y_true, X_test.index, org_test)
#needs universe_id and setting

In [53]:
cp_groups_df

Unnamed: 0,pred_set,true_label,frau1,nongerman,nongerman_male,nongerman_female
0,{0},0,1,0.0,0,0
1,{0},0,0,0.0,0,0
2,{0},0,1,1.0,0,1
3,{0},0,0,0.0,0,0
4,{0},0,1,0.0,0,0
...,...,...,...,...,...,...
89704,{0},0,0,0.0,0,0
89705,"{0, 1}",0,0,0.0,0,0
89706,{0},0,0,0.0,0,0
89708,{0},0,1,0.0,0,0


In [54]:
cp_groups_df[
    (cp_groups_df['frau1'] == 0) &
    (cp_groups_df['nongerman'] == 1)
]

Unnamed: 0,pred_set,true_label,frau1,nongerman,nongerman_male,nongerman_female
7,{0},1,0,1.0,1,0
12,{0},1,0,1.0,1,0
19,{0},0,0,1.0,1,0
50,{0},0,0,1.0,1,0
90,{0},0,0,1.0,1,0
...,...,...,...,...,...,...
89664,{0},0,0,1.0,1,0
89679,{0},0,0,1.0,1,0
89680,{0},0,0,1.0,1,0
89681,{0},0,0,1.0,1,0


In [55]:
# 1) define coverage = 1 if true_label is in the predicted set
cp_groups_df['coverage'] = cp_groups_df.apply(
    lambda r: int(r['true_label'] in r['pred_set']),
    axis=1
)

In [56]:
cp_groups_df

Unnamed: 0,pred_set,true_label,frau1,nongerman,nongerman_male,nongerman_female,coverage
0,{0},0,1,0.0,0,0,1
1,{0},0,0,0.0,0,0,1
2,{0},0,1,1.0,0,1,1
3,{0},0,0,0.0,0,0,1
4,{0},0,1,0.0,0,0,1
...,...,...,...,...,...,...,...
89704,{0},0,0,0.0,0,0,1
89705,"{0, 1}",0,0,0.0,0,0,1
89706,{0},0,0,0.0,0,0,1
89708,{0},0,1,0.0,0,0,1


In [58]:
subgroups = ['frau1','nongerman','nongerman_male','nongerman_female']

# Coverage for subgroup==1
coverage_pos = {
    g: cp_groups_df.loc[cp_groups_df[g]==1, 'coverage'].mean()
    for g in subgroups
}

# (Optionally) also for subgroup==0
coverage_neg = {
    g: cp_groups_df.loc[cp_groups_df[g]==0, 'coverage'].mean()
    for g in subgroups
}

pd.DataFrame({
    'cov_if_0': coverage_neg,
    'cov_if_1': coverage_pos
})



Unnamed: 0,cov_if_0,cov_if_1
frau1,0.908014,0.905513
nongerman,0.906896,0.907146
nongerman_male,0.904718,0.922183
nongerman_female,0.909011,0.882265


# (Fairness) Metrics

In [None]:
# do I need to include maxdeutsch1.missing?

import numpy as np

colname_to_bin = "maxdeutsch1"
majority_value = org_train[colname_to_bin].mode()[0]

org_test["majmin"] = np.where(org_test[colname_to_bin] == majority_value, "majority", "minority")

In [None]:
example_universe = universe.copy()
example_universe["cutoff"] = example_universe["cutoff"][0]
example_universe["eval_fairness_grouping"] = example_universe["eval_fairness_grouping"][0]
fairness_dict, metric_frame = universe_analysis.compute_metrics(
    example_universe,
    y_pred_prob=probs_test,
    y_test=y_true,
    org_test=org_test,
)

# Overall

Fairness
Main fairness target: Equalized Odds. Seems to be a better fit than equal opportunity, since we're not only interested in Y = 1. Seems to be a better fit than demographic parity, since we also care about accuracy, not just equal distribution of preds.

Pick column for computation of fairness metrics

Performance
Overall performance measures, most interesting in relation to the measures split by group below

In [None]:
metric_frame.overall

By Group

In [None]:
metric_frame.by_group

In [None]:
# In a graphic
metric_frame.by_group.plot.bar(
    subplots=True,
    layout=[3, 3],
    legend=False,
    figsize=[12, 8],
    title="Show all metrics",
)

# Final Output

In [None]:
sub_universes = universe_analysis.generate_sub_universes()
len(sub_universes)

In [None]:
def filter_sub_universe_data(sub_universe, org_test):
    # Keep all rows — no filtering
    keep_rows_mask = np.ones(org_test.shape[0], dtype=bool)

    print(f"[INFO] Keeping all rows: {keep_rows_mask.sum()} rows retained.")
    return keep_rows_mask

In [None]:
final_output = universe_analysis.generate_final_output(
    y_pred_prob=probs_test,
    y_test=y_true,
    org_test=org_test,
    save=True,
    filter_data=filter_sub_universe_data
)
final_output