In [47]:
from sklearn import svm
from sklearn.compose import ColumnTransformer
from sklearn.datasets import fetch_openml
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import make_column_selector as selector
from sklearn.pipeline import Pipeline
import pandas as pd

In [26]:
from shap.datasets import adult  # shap is only used its dataset utility
X, y_true = adult(display=False)


In [49]:
f = lambda x: 1 if x==True else 0
y_true = pd.DataFrame(y_true).applymap(f)

In [50]:
   
# Extract the items we want
X_raw = X
y = y_true #(y_true == '>50K') * 1

In [51]:
print(X_raw["Race"].value_counts().to_dict())

{4: 27816, 2: 3124, 1: 1039, 0: 311, 3: 271}


In [52]:
A = X_raw[['Sex','Race']]
X_raw = X_raw.drop(labels=['Sex', 'Race'],axis = 1)

In [53]:
(X_train, X_test, y_train, y_test, A_train, A_test) = train_test_split(
    X_raw, y, A, test_size=0.3, random_state=12345, stratify=y
)

# Ensure indices are aligned between X, y and A,
# after all the slicing and splitting of DataFrames
# and Series

#X_train = X_train.reset_index(drop=True)
#X_test = X_test.reset_index(drop=True)
#y_train = y_train.reset_index(drop=True)
#y_test = y_test.reset_index(drop=True)
#A_train = A_train.reset_index(drop=True)
#A_test = A_test.reset_index(drop=True)

In [54]:
lr_predictor = LogisticRegression(solver='liblinear', fit_intercept=True)

lr_predictor.fit(X_train, y_train)

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

In [55]:
svm_predictor = svm.SVC()

svm_predictor.fit(X_train, y_train)

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [56]:
from azureml.core import Workspace, Experiment, Model

ws = Workspace.from_config()
ws.get_details()

{'id': '/subscriptions/6a0ec27b-1203-46fe-ade0-8c3003d5e4bc/resourceGroups/MLServiceWrokspace/providers/Microsoft.MachineLearningServices/workspaces/MLServiceWrokspace',
 'name': 'MLServiceWrokspace',
 'identity': {'principal_id': '35e6df99-65b7-4ba4-a3fd-54d75cc01f59',
  'tenant_id': '72f988bf-86f1-41af-91ab-2d7cd011db47',
  'type': 'SystemAssigned'},
 'location': 'northeurope',
 'type': 'Microsoft.MachineLearningServices/workspaces',
 'tags': {},
 'sku': 'Basic',
 'workspaceid': 'b77f06f9-80dd-4231-9153-6a77c47edd72',
 'sdkTelemetryAppInsightsKey': 'aea5acff-0312-49cc-a3e2-8a4470df11d5',
 'description': '',
 'friendlyName': '',
 'creationTime': '2019-08-30T12:33:32.7059504+00:00',
 'containerRegistry': '/subscriptions/6a0ec27b-1203-46fe-ade0-8c3003d5e4bc/resourceGroups/MLServiceWrokspace/providers/Microsoft.ContainerRegistry/registries/mlservicewrobf2fd1fc',
 'keyVault': '/subscriptions/6a0ec27b-1203-46fe-ade0-8c3003d5e4bc/resourcegroups/mlservicewrokspace/providers/microsoft.keyvaul

In [57]:
#Register the models
import joblib
import os

os.makedirs('models', exist_ok=True)
def register_model(name, model):
    print("Registering ", name)
    model_path = "models/{0}.pkl".format(name)
    joblib.dump(value=model, filename=model_path)
    registered_model = Model.register(model_path=model_path,
                                      model_name=name,
                                      workspace=ws)
    print("Registered ", registered_model.id)
    return registered_model.id

In [58]:
model_dict = {}

lr_reg_id = register_model("fairness_linear_regression", lr_predictor)
model_dict[lr_reg_id] = lr_predictor
svm_reg_id = register_model("fairness_svm", svm_predictor)
model_dict[svm_reg_id] = svm_predictor

Registering  fairness_linear_regression
Registering model fairness_linear_regression
Registered  fairness_linear_regression:2
Registering  fairness_svm
Registering model fairness_svm
Registered  fairness_svm:2


In [59]:
ys_pred = {}
for n, p in model_dict.items():
    ys_pred[n] = p.predict(X_test)

In [62]:
from fairlearn.widget import FairlearnDashboard

FairlearnDashboard(sensitive_features=A_test, 
                   sensitive_feature_names=['Sex', 'Race'],
                   y_true=y_test,
                   y_pred=ys_pred)

FairlearnWidget(value={'true_y': [[0], [0], [0], [0], [0], [0], [1], [0], [0], [0], [1], [0], [0], [0], [0], […

<fairlearn.widget._fairlearn_dashboard.FairlearnDashboard at 0x7f30a5cd3940>

In [64]:
sf = { 'Race': A_test.Race, 'Sex': A_test.Sex }

from fairlearn.metrics._group_metric_set import _create_group_metric_set

dash_dict = _create_group_metric_set(y_true=y_test,
                                     predictions=ys_pred,
                                     sensitive_features=sf,
                                     prediction_type='binary_classification')

In [65]:
from azureml.contrib.fairness import upload_dashboard_dictionary, download_dashboard_by_upload_id

In [70]:
run

Experiment,Id,Type,Status,Details Page,Docs Page
Fairlearndemo1,715cbad4-e613-44f2-88e1-05d38f016478,,Running,Link to Azure Machine Learning studio,Link to Documentation


In [66]:
exp = Experiment(ws, "Fairlearndemo1")
print(exp)

run = exp.start_logging()
try:
    dashboard_title = "Sample fairlearn upload"
    upload_id = upload_dashboard_dictionary(run,
                                            dash_dict,
                                            dashboard_name=dashboard_title)
    print("\nUploaded to id: {0}\n".format(upload_id))

    downloaded_dict = download_dashboard_by_upload_id(run, upload_id)
finally:
    run.complete()

Experiment(Name: Fairlearndemo1,
Workspace: MLServiceWrokspace)


INFO:/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/contrib/fairness/_dashboard_validation.py:Starting validation of dashboard dictionary
INFO:/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/contrib/fairness/_dashboard_validation.py:Validation of dashboard dictionary successful
INFO:/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/contrib/fairness/_azureml_validation.py:Validating model ids exist
INFO:/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/contrib/fairness/_azureml_validation.py:Checking fairness_linear_regression:2
INFO:/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/contrib/fairness/_azureml_validation.py:Checking fairness_svm:2
INFO:/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/contrib/fairness/_azureml_validation.py:Validation of model ids complete
INFO:/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/contrib/fairness/_fairness_client.py:Uploading y_true
INFO:az


Uploaded to id: ad9d6066-8d41-42a6-a628-8ca957fd10ce



INFO:azureml.FairnessArtifactClient:Downloading from azureml.fairness/dashboard.metrics/ad9d6066-8d41-42a6-a628-8ca957fd10ce/y_pred/d9301368-fcd7-4afb-97ae-bf8d7cf343d1.json
INFO:azureml.FairnessArtifactClient:Downloading from azureml.fairness/dashboard.metrics/ad9d6066-8d41-42a6-a628-8ca957fd10ce/y_pred/a4464e83-4d4f-4504-8c29-1c365d07aa70.json
INFO:/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/contrib/fairness/_fairness_client.py:Populating sensitive features
INFO:azureml.FairnessArtifactClient:Downloading from azureml.fairness/dashboard.metrics/ad9d6066-8d41-42a6-a628-8ca957fd10ce/sensitive_features_column/b9323c34-3fc2-48fe-aaff-c53a88b6c345.json
INFO:azureml.FairnessArtifactClient:Downloading from azureml.fairness/dashboard.metrics/ad9d6066-8d41-42a6-a628-8ca957fd10ce/sensitive_features_column/c567f8d8-43ce-4790-8813-b805ec9cd87c.json
INFO:/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/contrib/fairness/_fairness_client.py:Populating metrics
INFO:

In [67]:
print(dash_dict == downloaded_dict)

True
