In [1]:
#! pip install fairlearn
#https://fairlearn.github.io/api_reference/index.html

In [2]:
from azureml.core import Experiment, Run, Workspace
import azureml.core

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.20.0


In [None]:
ws = Workspace.from_config()

print(ws.name)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
from shap.datasets import adult  # shap is only used its dataset utility
adult()[0].head()

In [None]:
#If the person is elegable for a loan
pd.DataFrame(adult()[1]).head(10)

In [None]:
#Look at the function definition
import inspect
print(inspect.getsource(adult))

In [None]:
X_raw, y_true = adult(display=True) #Raw dataset
X_raw.head()

## We do not want sex or race affecting the model so we remove them to avoid any bias

In [None]:
X_raw, y_true = adult(display=False) #Factorize the cats
#'remove 'Sex' and 'Race' from the model so they do not cause bias 
X = X_raw.drop(labels=['Sex','Race'], axis=1)

X.head()


In [None]:
#Convert y to binary 1 or 0
y_true = y_true * 1 #convert y to bianry numeric
pd.DataFrame(y_true).head(3)

### Train a model

In [None]:
#! pip install fairlearn

In [None]:
#from fairlearn.metrics import group_summary
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from sklearn.model_selection import train_test_split


#experiment = Experiment(workspace=ws, name="FairlearnDemo1") 
#run = experiment.start_logging(snapshot_directory=None) #Don't create a snapshot

#Split the data
#X_train, X_test, y_train, y_test = train_test_split(X, y_true, test_size=0.3, random_state=1) # 70% training and 30% test

#Train a tree
classifier = DecisionTreeClassifier()
classifier.fit(X, y_true)

#Run the predictions
y_pred = classifier.predict(X)

#See the accuracy
metrics.accuracy_score(y_true, y_pred)

#run.log('Accuracy', metrics.accuracy_score(y_true, y_pred))


In [None]:
#!pip install --upgrade scikit-learn
import sklearn
print(sklearn.__version__)

In [None]:
#from sklearn.metrics import confusion_matrix, plot_confusion_matrix
#print(confusion_matrix(y_true, y_pred))
#plot_confusion_matrix(classifier, X, y_true)

from sklearn.metrics import confusion_matrix, plot_confusion_matrix
import matplotlib.pyplot as plt

print(confusion_matrix(y_true, y_pred))
cm = confusion_matrix(y_true, y_pred)

plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Wistia)
#run.log_image("ConfusionMatrix", path=None, plot=plt)


## Bring the sex column into a dataset so we can leter check any bias that has worked its way into the dataset 

In [None]:
sex = X_raw['Sex'].apply(lambda sex: "female" if sex == 0 else "male") #convert sex to male/female
pd.DataFrame(sex).head()

## We can see in the original data we had more males then females 

In [None]:
import matplotlib.pyplot as plt
dfgroup = pd.DataFrame(sex).groupby(['Sex'])['Sex'].count()
plt.bar(['female','male'], dfgroup)

In [None]:
#print('Accuracy of the model between females and males')
#group_summary(accuracy_score, y_true, y_pred, sensitive_features=sex)

#Model has greater accuracy for females
#'female': 0.9958221149382601, 'male': 0.9668196420376319

In [None]:
#from fairlearn.metrics import selection_rate_group_summary
##Percent of data points marked as True 
#print('female and male chance of getting a loan')
#selection_rate_group_summary(y_true, y_pred, sensitive_features=sex)
##Males have a higher percentage chance of getting a loan
##'female': 0.1065824900194968, 'male': 0.28347865993575033
##Bias in previous loans entering the model

## Fairlearn Dashboard

In [None]:
# jupyter nbextension enable --py widgetsnbextension --sys-prefix

In [None]:
from fairlearn.widget import FairlearnDashboard
FairlearnDashboard(sensitive_features=sex,
                       sensitive_feature_names=['sex'],
                       y_true=y_true,
                       y_pred={"initial model": y_pred})

## /\ is the disparity on who will get a loan a problem? We will need ot look at explinations to understand more about the model

# Show with regression

In [None]:
regressionageX = X
age = regressionageX['Age']
regressionageX = regressionageX.drop(labels=['Age'], axis=1)

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn import metrics
from sklearn.metrics import mean_squared_error, r2_score

#Train
clin = LinearRegression()
clin.fit(regressionageX, age)

#Run the predictions
y_age_pred = clin.predict(regressionageX)

#See the accuracy
#metrics.accuracy_score(y_true, y_pred)

print(mean_squared_error(age, y_age_pred, multioutput='raw_values'))
print(r2_score(age, y_age_pred))

fig = plt.figure()
ax = plt.axes()

ax.plot(age.sort_values(0).values, label = "TrueAge")
ax.plot(pd.DataFrame(y_age_pred).sort_values(0).values, label = "PredAge")
plt.legend()

In [None]:
#Actual vs predicted
fig = plt.figure()
ax = plt.axes()
ax.plot(age.head(50), pd.DataFrame(age).head(50), 'o')

#Actual vs predicted
fig = plt.figure()
ax = plt.axes()
ax.plot(age.head(50), pd.DataFrame(y_age_pred).head(50), 'o')

In [None]:
from fairlearn.widget import FairlearnDashboard
FairlearnDashboard(sensitive_features=sex.head(500),
                       sensitive_feature_names=['sex'],
                       y_true=age.head(500),
                       y_pred={"initial model": pd.DataFrame(y_age_pred).head(500)})

## Add more features to investigate (race)

In [None]:
#Get the race details
Xrace, delme = adult(display=True) #Raw dataset
Xrace = Xrace[['Race']]
Xrace

In [None]:
from fairlearn.metrics import group_summary, selection_rate_group_summary

In [None]:
#Accuracy by race
group_summary(accuracy_score, y_true, y_pred, sensitive_features=Xrace)

In [None]:
#Percentage of by race
selection_rate_group_summary(y_true, y_pred, sensitive_features=Xrace)

In [None]:
#Put sex and reace into one dataframe
sexrace = pd.concat([sex, Xrace], axis=1)

In [None]:
from fairlearn.widget import FairlearnDashboard
FairlearnDashboard(sensitive_features=sexrace,
                       sensitive_feature_names=['sex', 'race'],
                       y_true=y_true,
                       y_pred={"initial model": y_pred})

# Run the mitigator to get a less bias model

In [None]:
%%time
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, GridSearch
constraint = DemographicParity()
classifier = DecisionTreeClassifier()
mitigator = ExponentiatedGradient(classifier, constraint)
mitigator.fit(X, y_true, sensitive_features=sex)
y_pred_mitigated = mitigator.predict(X)
selection_rate_group_summary(y_true, y_pred_mitigated, sensitive_features=sex)

In [None]:
#View the results
FairlearnDashboard(sensitive_features=sex,
                       sensitive_feature_names=['sex'],
                       y_true=y_true,
                       y_pred={"initial model": y_pred, "mitigated model": y_pred_mitigated})

## Create some more models

In [None]:
%%time
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, GridSearch
mitigatorExponentiatedGradient = ExponentiatedGradient(DecisionTreeClassifier(), DemographicParity())
mitigatorExponentiatedGradient.fit(X, y_true, sensitive_features=sex)
y_pred_mitigatedExponentiatedGradient = mitigatorExponentiatedGradient.predict(X)

In [None]:
%%time
mitigatorGridSearch = GridSearch(DecisionTreeClassifier(), DemographicParity())
mitigatorGridSearch.fit(X, y_true, sensitive_features=sex)
y_pred_mitigatedGridSearch = mitigatorGridSearch.predict(X)

In [None]:
#Create a basic logistic regression model 
from sklearn.linear_model import LogisticRegression

lg = LogisticRegression(solver='liblinear', fit_intercept=True)
lg.fit(X, y_true)
y_pred_lg = lg.predict(X)

In [None]:
%%time
from sklearn.neural_network import MLPClassifier

clf = MLPClassifier(hidden_layer_sizes=(20,20,20), activation='relu', solver='adam', max_iter=500)
clf.fit(X, y_true)
y_pred_MLP = clf.predict(X)

In [None]:
FairlearnDashboard(sensitive_features=sex,
                       sensitive_feature_names=['sex'],
                       y_true=y_true,
                       y_pred={"initial model": y_pred, "mitigated model Exponentiated Gradient": y_pred_mitigatedExponentiatedGradient, "Logistic Regression": y_pred_lg, "NN": y_pred_MLP})