# Import Libraries

In [None]:
#use the following line to install the required libraries if needed
#pip install '...'

# !pip install fairlearn
# !pip install dalex -U
# !pip install -U scikit-learn
# !pip install -U pandas
# !pip install -U aif360
# !pip install -U plotly

In [52]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import sklearn
import dalex as dx

from copy import copy

from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.tree import DecisionTreeClassifier

{
    "numpy": np.__version__,
    "pandas": pd.__version__,
    "matplotlib": matplotlib.__version__,
    "seaborn": sns.__version__,
    "sklearn": sklearn.__version__,
    "dalex": dx.__version__,
}

{'numpy': '2.3.3',
 'pandas': '2.3.2',
 'matplotlib': '3.10.6',
 'seaborn': '0.13.2',
 'sklearn': '1.7.2',
 'dalex': '1.7.2'}

In [53]:
df = pd.read_csv("./stackoverflow_full.csv", index_col=0)
target = "Employed"

### Insert here the "no changes model cells"

## ML prerequisites

In [54]:
#split your data set in 2 parts : training and testing

X_train, X_test, y_train, y_test = train_test_split(
    df.drop(columns=target),
    df[target],
    test_size=0.3,
    random_state=42
)

In [55]:
# Protected attribute is 0 if a man or non binary and 0 if a woman plus the age

protected = (pd.Series(np.where(X_test["Gender"] == "Woman", '1', '0'), index=X_test.index) 
             + '_' 
             + X_test.Age)
protected_train = (pd.Series(np.where(X_train["Gender"] == "Woman", '1', '0').astype(str), index=X_train.index) 
                   + '_' 
                   + X_train.Age)

# Privileged population is men under 35 years old
privileged = '0_<35'

In [56]:
preprocessor = make_column_transformer(
      ("passthrough", make_column_selector(dtype_include=np.number)),
      (OneHotEncoder(handle_unknown="ignore"), make_column_selector(dtype_include=object))
)

#You can change the Decision tree hyperparameters or the classifier below

clf_decisiontree = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', DecisionTreeClassifier(max_depth=10, random_state=123))
])

In [57]:
# clf_decisiontree.fit(df.drop(columns=[target]), df[target])
clf_decisiontree.fit(X_train, y_train)

0,1,2
,steps,"[('preprocessor', ...), ('classifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('passthrough', ...), ('onehotencoder', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,categories,'auto'
,drop,
,sparse_output,True
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,123
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [58]:
# exp_decisiontree = dx.Explainer(clf_decisiontree, df.drop(columns=[target]), df[target], verbose=False)
exp_decisiontree = dx.Explainer(clf_decisiontree, X_test, y_test, verbose=True)

Preparation of a new explainer is initiated

  -> data              : 22039 rows 13 cols
  -> target variable   : Parameter 'y' was a pandas.Series. Converted to a numpy.ndarray.
  -> target variable   : 22039 values
  -> model_class       : sklearn.tree._classes.DecisionTreeClassifier (default)
  -> label             : Not specified, model's class short name will be used. (default)
  -> predict function  : <function yhat_proba_default at 0x0000020519B09BC0> will be used (default)
  -> predict function  : Accepts only pandas.DataFrame, numpy.ndarray causes problems.
  -> predicted values  : min = 0.0, mean = 0.536, max = 1.0
  -> model type        : classification will be used (default)
  -> residual function : difference between y and yhat (default)
  -> residuals         : min = -1.0, mean = -0.000922, max = 1.0
  -> model_info        : package sklearn

A new explainer has been created!


In [59]:
exp_decisiontree.model_performance().result

Unnamed: 0,recall,precision,f1,accuracy,auc
DecisionTreeClassifier,0.804868,0.788158,0.796425,0.779845,0.860599


In [60]:
fairness_decisiontree = exp_decisiontree.model_fairness(protected=protected, privileged=privileged)

In [61]:
fairness_decisiontree.fairness_check(epsilon = 0.8) # default epsilon

Bias detected in 2 metrics: FPR, STP

Conclusion: your model is not fair because 2 or more criteria exceeded acceptable limits set by epsilon.

Ratios of metrics, based on '0_<35'. Parameter 'epsilon' was set to 0.8 and therefore metrics should be within (0.8, 1.25)
            TPR       ACC       PPV       FPR       STP
0_>35  0.969325  0.989770  0.982346  0.942085  0.937943
1_<35  0.947239  1.006394  0.979823  0.768340  0.833333
1_>35  0.900613  1.046036  1.034048  0.459459  0.673759


In [62]:
fairness_decisiontree.plot(verbose=False)





In [28]:
#retrain the model
clf_decisiontree.fit(X_train, y_train)

# Explicateur Dalex (syntaxe correcte)
exp_decisiontree = dx.Explainer(clf_decisiontree, X_test, y_test, verbose=True)


Preparation of a new explainer is initiated

  -> data              : 22039 rows 13 cols
  -> target variable   : Parameter 'y' was a pandas.Series. Converted to a numpy.ndarray.
  -> target variable   : 22039 values
  -> model_class       : sklearn.tree._classes.DecisionTreeClassifier (default)
  -> label             : Not specified, model's class short name will be used. (default)
  -> predict function  : <function yhat_proba_default at 0x0000020519B09BC0> will be used (default)
  -> predict function  : Accepts only pandas.DataFrame, numpy.ndarray causes problems.
  -> predicted values  : min = 0.0, mean = 0.536, max = 1.0
  -> model type        : classification will be used (default)
  -> residual function : difference between y and yhat (default)
  -> residuals         : min = -1.0, mean = -0.00132, max = 1.0
  -> model_info        : package sklearn

A new explainer has been created!
  -> residuals         : min = -1.0, mean = -0.00132, max = 1.0
  -> model_info        : package sk

In [29]:

# Fairness
fairness_decisiontree = exp_decisiontree.model_fairness(protected=protected, privileged=privileged)


In [30]:
fairness_decisiontree.fairness_check(epsilon=0.8)


No bias was detected!

Conclusion: your model is fair in terms of checked fairness criteria.

Ratios of metrics, based on '0_<35'. Parameter 'epsilon' was set to 0.8 and therefore metrics should be within (0.8, 1.25)
            TPR       ACC       PPV       FPR       STP
0_>35  1.015834  0.982669  0.958692  1.006954  1.007742
1_<35  1.042631  0.972270  0.898451  0.977747  1.000000
1_>35  1.031669  0.996534  0.860585  0.869263  0.927742


###

### Strategy 1: Pre-processing: Resampling

The next thing that can come to mind is to resample the data. Dalex provide 2 types of resampling methods and 1 reweighting method. In this tutorial only the basic resampling is showed.

#### Training

In [63]:
preprocessor = make_column_transformer(
      ("passthrough", make_column_selector(dtype_include=np.number)),
      (OneHotEncoder(handle_unknown="ignore"), make_column_selector(dtype_include=object))
)

#You can change the Decision tree hyperparameters or the classifier below

clf_decisiontree = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', DecisionTreeClassifier(max_depth=10, random_state=123))
])

In [64]:
#split your data set in 2 parts : training and testing

X_train, X_test, y_train, y_test = train_test_split(
    df.drop(columns=target),
    df[target],
    test_size=0.3,
    random_state=42
)

In [41]:
X_train = X_train.drop(columns=["Gender", "Age"], errors="ignore")

bins = [0, 2, 10, 25, float('inf')]
labels = ['null', 'faible', 'moyen', 'élevé']
X_train['ComputerSkills_level'] = pd.cut(X_train['ComputerSkills'], bins=bins, labels=labels, right=False)
X_test['ComputerSkills_level'] = pd.cut(X_test['ComputerSkills'], bins=bins, labels=labels, right=False)

X_train = X_train.drop(columns=["ComputerSkills"], errors="ignore")
X_test = X_test.drop(columns=["ComputerSkills"], errors="ignore")



In [65]:
# clf_decisiontree.fit(df.drop(columns=[target]), df[target])
clf_decisiontree.fit(X_train, y_train)

0,1,2
,steps,"[('preprocessor', ...), ('classifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('passthrough', ...), ('onehotencoder', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,categories,'auto'
,drop,
,sparse_output,True
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,123
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [66]:
from dalex.fairness import resample
clf_resampled = copy(clf_decisiontree) # Create a copy to not alter the main object

# Resampling observations
indices_uniform = resample(protected_train, y_train, verbose = False)

# Re-fit model with resampled data
clf_resampled.fit(X_train.reset_index(drop=True).iloc[indices_uniform, :], y_train.reset_index(drop=True)[indices_uniform])

0,1,2
,steps,"[('preprocessor', ...), ('classifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('passthrough', ...), ('onehotencoder', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,categories,'auto'
,drop,
,sparse_output,True
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,123
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [68]:
exp_decisiontree_resampled = dx.Explainer(clf_resampled, X_test, y_test, verbose=True)

Preparation of a new explainer is initiated

  -> data              : 22039 rows 13 cols
  -> target variable   : Parameter 'y' was a pandas.Series. Converted to a numpy.ndarray.
  -> target variable   : 22039 values
  -> model_class       : sklearn.tree._classes.DecisionTreeClassifier (default)
  -> label             : Not specified, model's class short name will be used. (default)
  -> predict function  : <function yhat_proba_default at 0x0000020519B09BC0> will be used (default)
  -> predict function  : Accepts only pandas.DataFrame, numpy.ndarray causes problems.
  -> predicted values  : min = 0.0, mean = 0.536, max = 1.0
  -> model type        : classification will be used (default)
  -> residual function : difference between y and yhat (default)
  -> residuals         : min = -1.0, mean = -0.00116, max = 1.0
  -> model_info        : package sklearn

A new explainer has been created!


#### Algorithmic performance

In [69]:
exp_decisiontree_resampled.model_performance().result

Unnamed: 0,recall,precision,f1,accuracy,auc
DecisionTreeClassifier,0.817419,0.778092,0.79727,0.777576,0.856868


#### Fairness performance

In [70]:
fairness_decisiontree_resampled = exp_decisiontree_resampled.model_fairness(
    protected, privileged, label='DecisionTreeClassifier_resampled')

fairness_decisiontree_resampled.fairness_check(epsilon = 0.8)


Bias detected in 2 metrics: FPR, STP

Conclusion: your model is not fair because 2 or more criteria exceeded acceptable limits set by epsilon.

Ratios of metrics, based on '0_<35'. Parameter 'epsilon' was set to 0.8 and therefore metrics should be within (0.8, 1.25)
            TPR       ACC       PPV       FPR       STP
0_>35  1.035847  0.996144  0.957016  1.135135  1.030303
1_<35  0.969098  1.001285  0.958281  0.868726  0.871658
1_>35  0.927070  1.021851  0.963338  0.667954  0.745098


__Compare performance of the first model and the resampled one (visually)__

In [71]:
fairness_decisiontree.plot([fairness_decisiontree_resampled])







In [72]:
fairness_decisiontree.plot([fairness_decisiontree_resampled], type='radar')

- Is this strategy effective in terms of algorithmic performance?
- What comment can you make based on the fairness metric result?
- Could you think of a way to improve this strategy (not necessarly on python but as a complementary idea to this solution)?

If you have more time or wish to compare, feel free to try the other strategies and compare the results (you can plot the fairness metrics to have a visual comparison).