# Detect unfair

In [1]:
import pandas as pd

loan_dataset = pd.read_csv("../datasets/loan_train.csv", index_col=0)
loan_dataset.head()

numeric_features = [
    "education", "age", "income", "loan_size", 
    "payment_timing", "year", "job_stability"
]

categorical_features = ["ZIP", "rent", "occupation"]

X = loan_dataset.copy().drop(["minority", "default"], axis=1)
y = loan_dataset.copy()["default"]

In [2]:
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Create preprocessor of features
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())]
)

categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)

# Create pipeline
clf = Pipeline(
    steps=[
        ('preprocessor', preprocessor),
        ('classifier', LogisticRegression())
    ]
)

# Create cross-validatio strategy
cv = StratifiedKFold(n_splits=10)

# Compute accuracy of model
cv_results = cross_validate(
    clf, X, y, cv=cv
)

In [7]:
from sklearn.model_selection import cross_val_predict

y_pred = cross_val_predict(
    clf, X, y, cv=cv
)

In [8]:
from sklearn.metrics import recall_score

score = recall_score(y, y_pred)

In [9]:
score

1.0

In [11]:
from fairlearn.metrics import MetricFrame

grouped_metric = MetricFrame(
    recall_score, y, y_pred,
    sensitive_features=loan_dataset["minority"]
)
grouped_metric.overall
grouped_metric.by_group.to_dict()

# Overall recall =  0.5
# >>> print("recall by groups = ", grouped_metric.by_group.to_dict())
# recall by groups =  {'a': 0.0, 'b': 0.5, 'c': 0.75, 'd': 0.0}

{0: 1.0, 1: 1.0}

In [12]:
gender_grouped_metric = MetricFrame(
    recall_score, y, y_pred,
    sensitive_features=loan_dataset["sex"]
)
gender_grouped_metric.overall
gender_grouped_metric.by_group.to_dict()

{0: 1.0, 1: 1.0}