In [9]:
from fairness.data import load_csv
from fairness.preprocess import add_age_group, map_binary_column, preprocess_tabular, make_train_test_split
from fairness.groups import create_intersectional_groups

# 1) Load raw data
df_raw = load_csv("fairness/data/heart.csv")

# 2) Create fairness-only protected attribute
df_raw = add_age_group(df_raw)

# Optional: map Sex if it's M/F (only if needed for your dataset)
# df_raw = map_binary_column(df_raw, col="Sex", mapping={"M": 1, "F": 0})

# 3) Build modelling dataframe: drop age_group BEFORE one-hot encoding
df_model = preprocess_tabular(df_raw, drop_cols=["age_group"])

# 4) Split (no need to drop age_group here, because df_model doesn't have it)
split = make_train_test_split(
    df_model,
    target_col="HeartDisease",
    drop_cols=[],
)

# 5) Create groups from RAW df, aligned to X_test indices
protected_test = df_raw.loc[split.X_test.index]
groups, _, counts = create_intersectional_groups(
    protected_test,
    ["Sex", "age_group"]
)

# 6) Fit a model (example: logistic regression)
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(max_iter=1000)
model.fit(split.X_train, split.y_train)
y_pred = model.predict(split.X_test)




STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=1000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [12]:
# intersectional group label per test sammple
print(groups)


# counts of each intersectional group in the test set
print("Intersectional group counts in the test set:")
for label, n in counts.items():
    print(f"{label}: {int(n)}")

# predicted labels
print("Predicted labels:")
print(y_pred)

# true labels
print("True labels:")
print(split.y_test)



['Sex=M|age_group=young', 'Sex=M|age_group=older', 'Sex=M|age_group=older', 'Sex=M|age_group=young', 'Sex=F|age_group=older', 'Sex=M|age_group=older', 'Sex=M|age_group=older', 'Sex=F|age_group=older', 'Sex=F|age_group=older', 'Sex=M|age_group=young', 'Sex=M|age_group=young', 'Sex=M|age_group=older', 'Sex=F|age_group=young', 'Sex=F|age_group=young', 'Sex=M|age_group=older', 'Sex=M|age_group=young', 'Sex=F|age_group=older', 'Sex=M|age_group=young', 'Sex=M|age_group=older', 'Sex=M|age_group=young', 'Sex=M|age_group=young', 'Sex=M|age_group=young', 'Sex=M|age_group=older', 'Sex=M|age_group=older', 'Sex=F|age_group=young', 'Sex=F|age_group=young', 'Sex=M|age_group=young', 'Sex=M|age_group=older', 'Sex=F|age_group=older', 'Sex=M|age_group=older', 'Sex=M|age_group=young', 'Sex=M|age_group=older', 'Sex=M|age_group=young', 'Sex=M|age_group=young', 'Sex=M|age_group=older', 'Sex=M|age_group=older', 'Sex=M|age_group=young', 'Sex=M|age_group=young', 'Sex=M|age_group=young', 'Sex=M|age_group=older',