In [77]:
#!pip install raiwidgets

In [1]:
from sklearn import svm
import pandas as pd
import zipfile
from lightgbm import LGBMClassifier

# Explainer Used: Mimic Explainer
from interpret.ext.blackbox import MimicExplainer
from interpret.ext.glassbox import LinearExplainableModel
from interpret.ext.glassbox import LGBMExplainableModel

In [4]:
from raiwidgets import ResponsibleAIDashboard
from responsibleai import RAIInsights

In [5]:
from shap.datasets import adult
X, y_true = adult(display=False)

#train_data = pd.read_csv('adult-train.csv')
#test_data = pd.read_csv('adult-test.csv')

In [13]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

def split_label(dataset, target_feature):
    X = dataset.drop([target_feature], axis=1)
    y = dataset[[target_feature]]
    return X, y

def create_classification_pipeline(X, y, target_feature):
    pipe_cfg = {
        'num_cols': X.dtypes[X.dtypes == 'int64'].index.values.tolist(),
        'cat_cols': X.dtypes[X.dtypes == 'object'].index.values.tolist(),
    }
    num_pipe = Pipeline([
        ('num_imputer', SimpleImputer(strategy='median')),
        ('num_scaler', StandardScaler())
    ])
    cat_pipe = Pipeline([
        ('cat_imputer', SimpleImputer(strategy='constant', fill_value='?')),
        ('cat_encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))
    ])
    feat_pipe = ColumnTransformer([
        ('num_pipe', num_pipe, pipe_cfg['num_cols']),
        ('cat_pipe', cat_pipe, pipe_cfg['cat_cols'])
    ])

    # Append classifier to preprocessing pipeline.
    # Now we have a full prediction pipeline.
    pipeline = Pipeline(steps=[('preprocessor', feat_pipe),
                               ('model', LGBMClassifier())])

    return pipeline

target_feature = 'income'
categorical_features = ['workclass', 'education', 'marital-status',
                        'occupation', 'relationship', 'race', 'gender', 'native-country']

train_data = pd.read_csv('adult-train.csv', skipinitialspace=True)
test_data = pd.read_csv('adult-test.csv', skipinitialspace=True)

X_train_original, y_train = split_label(train_data, target_feature)
X_test_original, y_test = split_label(test_data, target_feature)

pipeline = create_classification_pipeline(X_train_original, y_train, target_feature)

y_train = y_train[target_feature].to_numpy()
y_test = y_test[target_feature].to_numpy()


# Take 500 samples from the test data
test_data_sample = test_data.sample(n=500, random_state=5)


In [14]:
model = pipeline.fit(X_train_original, y_train)

In [15]:
from raiwidgets import ResponsibleAIDashboard
from responsibleai import RAIInsights

In [16]:
rai_insights = RAIInsights(model, train_data, test_data_sample, target_feature, 'classification',
                           categorical_features=categorical_features)

The feature name education-num contains `-` which has issues with pandas version 0.25.3. Please upgrade your pandas to 1.x
The feature name marital-status contains `-` which has issues with pandas version 0.25.3. Please upgrade your pandas to 1.x
The feature name capital-gain contains `-` which has issues with pandas version 0.25.3. Please upgrade your pandas to 1.x
The feature name capital-loss contains `-` which has issues with pandas version 0.25.3. Please upgrade your pandas to 1.x
The feature name hours-per-week contains `-` which has issues with pandas version 0.25.3. Please upgrade your pandas to 1.x
The feature name native-country contains `-` which has issues with pandas version 0.25.3. Please upgrade your pandas to 1.x


In [17]:
# Interpretability
rai_insights.explainer.add()
# Error Analysis
rai_insights.error_analysis.add()
# Counterfactuals: accepts total number of counterfactuals to generate, the label that they should have, and a list of 
                # strings of categorical feature names
rai_insights.counterfactual.add(total_CFs=10, desired_class='opposite')

In [18]:
rai_insights.compute()

100%|██████████| 500/500 [03:11<00:00,  2.62it/s]  
categorical_feature in Dataset is overridden.
New categorical_feature is [1, 3, 5, 6, 7, 8, 9, 13]


UndefinedVariableError: name 'education_' is not defined

In [None]:
ResponsibleAIDashboard(rai_insights)