## Homework 1 XAI Machine Learning UW
### Paweł Fijałkowski

In [132]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_sample_weight

import pandas as pd
import numpy as np

#### TASK 1

In [14]:
total_blue = 100
total_red = total_blue / 9

# model[is_enrolled][will_use_xai] = number of people in this group
model_red = {True: {True: total_red / 4, False: total_red / 4}, False: {True: total_red / 4, False: total_red / 4}}
model_blue = { True: {True: 60, False: 5}, False: {True: 20, False: 15} } 

def sum_dict(dict_: dict) -> int:
    return sum([v for d in dict_.values() for v in (d.values() if isinstance(d, dict) else [d])])

In [23]:
def demographic_parity():
    """
    How different is the probability of enrollment for blue/red groups?
    """
    red_fraction_enrolled = sum_dict(model_red[True]) / total_red
    blue_fraction_enrolled = sum_dict(model_blue[True]) / total_blue
    
    return red_fraction_enrolled / blue_fraction_enrolled

def equal_opportunity():
    """
    How different is the probability of enrollment for those who will benefit the training?
    """
    red_fraction_benefit = model_red[True][True] / (model_red[True][True] + model_red[False][True]) 
    blue_fraction_benefit = model_blue[True][True] / (model_blue[True][True] + model_blue[False][True]) 

    return red_fraction_benefit / blue_fraction_benefit

def predictive_rate_parity():
    """
    How different is the probability that a given enrolled individual will use XAI based on group?
    """
    red_fraction_will_use = model_red[True][True] / sum_dict(model_red[True])
    blue_fraction_will_use = model_blue[True][True] / sum_dict(model_blue[True])

    return red_fraction_will_use / blue_fraction_will_use


In [27]:
f'Demographic Parity: {demographic_parity():.4f}, Equal Opportunity: {equal_opportunity():.4f}, Preditctive Rate Parity: {predictive_rate_parity():.4f}'

'Demographic Parity: 0.7692, Equal Opportunity: 0.6667, Preditctive Rate Parity: 0.5417'

#### TASK 2

In [81]:
data = pd.read_csv('bank-additional-full.csv', delimiter=";")
data['young'] = data['age'] < 60
data.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y,young
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,...,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no,True
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,...,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no,True
2,37,services,married,high.school,no,yes,no,telephone,may,mon,...,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no,True
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,...,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no,True
4,56,services,married,high.school,no,no,yes,telephone,may,mon,...,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no,True


In [82]:
label_encoder = LabelEncoder()
for column in data.columns:
    if data[column].dtype == 'object':
        data[column] = label_encoder.fit_transform(data[column])

In [83]:
X, y = data.drop('y', axis=1), data['y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [84]:
rf_classifier = RandomForestClassifier(n_estimators=3, max_depth=5, max_features='sqrt')
rf_classifier.fit(X_train, y_train)

In [85]:
y_pred_train = rf_classifier.predict(X_train)
y_pred_test = rf_classifier.predict(X_test)

In [86]:
f'Accuracy train: {accuracy_score(y_train, y_pred_train):.4f}, test: {accuracy_score(y_test, y_pred_test):.4f}'

'Accuracy train: 0.9044, test: 0.9013'

In [88]:
df_model1 = X_test.copy()
df_model1['true'] = y_test
df_model1['predicted'] = y_pred_test

In [94]:
def calculate_fairness_metrics(confusion_matrix: np.array):
    return {
        'demographic_parity': confusion_matrix[0].sum() / confusion_matrix.sum(),
        'equal_opportunity': confusion_matrix[0, 0] / confusion_matrix[:, 0].sum(),
        'predictive_rate_parity': (confusion_matrix[0, 0] / confusion_matrix[0].sum()) / (confusion_matrix[1, 1] / confusion_matrix[1].sum())
    }

In [97]:
def get_fairness(df_model: pd.DataFrame, feature: str):
    results = []
    for value, group in df_model.groupby(feature):
        cm = confusion_matrix(group['true'], group['predicted'], labels=[1, 0])
        fm = calculate_fairness_metrics(cm)
        results.append({**{feature: value}, **fm})
        
    return pd.DataFrame.from_records(results)


In [98]:
get_fairness(df_model1, 'young')

Unnamed: 0,young,demographic_parity,equal_opportunity,predictive_rate_parity
0,False,0.378601,0.688889,0.37139
1,True,0.105441,0.723404,0.20351


In [None]:
lr = LogisticRegression()
lr.fit(X_train, y_train)

In [113]:
y_pred_train_lr = lr.predict(X_train)
y_pred_test_lr = lr.predict(X_test)

In [114]:
f'Accuracy train: {accuracy_score(y_train, y_pred_train_lr):.4f}, test: {accuracy_score(y_test, y_pred_test_lr):.4f}'

'Accuracy train: 0.9080, test: 0.9099'

In [115]:
df_model2 = X_test.copy()
df_model2['true'] = y_test
df_model2['predicted'] = y_pred_test_lr

In [116]:
get_fairness(df_model2, 'young')

Unnamed: 0,young,demographic_parity,equal_opportunity,predictive_rate_parity
0,False,0.378601,0.666667,0.444263
1,True,0.105441,0.667946,0.423044


In [134]:
sample_weights = compute_sample_weight({False: len(X_train) / np.sum(X_train['young'] == False), True: len(X_train) / np.sum(X_train['young'] == True)}, X_train["young"])
rf_classifier_bias_mit = RandomForestClassifier(n_estimators=3, max_depth=5, max_features='sqrt')
rf_classifier_bias_mit.fit(X_train, y_train, sample_weight=sample_weights)

In [135]:
y_pred_train_bias = rf_classifier_bias_mit.predict(X_train)
y_pred_test_bias = rf_classifier_bias_mit.predict(X_test)

In [136]:
f'Accuracy train: {accuracy_score(y_train, y_pred_train_bias):.4f}, test: {accuracy_score(y_test, y_pred_test_bias):.4f}'

'Accuracy train: 0.9052, test: 0.9025'

In [139]:
df_model3 = X_test.copy()
df_model3['true'] = y_test
df_model3['predicted'] = y_pred_test_bias

In [140]:
get_fairness(df_model3, 'young')

Unnamed: 0,young,demographic_parity,equal_opportunity,predictive_rate_parity
0,False,0.378601,0.683333,0.509799
1,True,0.105441,0.721774,0.214405


# Comments

### Statistical Parity
The first model showed disparity across groups. The second model (with different hyperparameters) improved this slightly, but biases remained.
### Equal Opportunity
The second model increased fairness in positive outcomes but still showed disparities.
### Predictive Parity
Minimal change between models, indicating limited impact of model changes on this metric.
### Bias Mitigation
Data balancing improved Statistical Parity and Equal Opportunity. Predictive Parity remained largely unchanged.
### Performance vs Fairness:
Fairer models (Statistical Parity, Equal Opportunity) showed a slight performance drop, revealing a trade-off between accuracy and fairness.
### Correlation:
Statistical Parity and Equal Opportunity improved together. Predictive Parity showed less correlation with the other metrics. Fairness improvements didn't always lead to better predictive accuracy.





