## Explainabe AI for Vertex AI models

This notebook covers two APIs for explainability for 
1. Explainable AI for feature attribution
2. What-IF tool

In [None]:
from typing import Dict

from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value

In [2]:
def predict_tabular_classification(
    project="29437252490",
    endpoint_id="5406193120658325504",
    location="us-central1",
    instance_dict={"Step":500, "Type":"DEBIT", "Amount":500, "oldbalanceOrg":1000, 
                   "newbalanceOrig":500, "oldbalanceDest":6500, "newbalanceDest":7000},
    api_endpoint: str = "us-central1-aiplatform.googleapis.com"
):
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
    # for more info on the instance schema, please use get_model_sample.py
    # and look at the yaml found in instance_schema_uri
    instance = json_format.ParseDict(instance_dict, Value())
    instances = [instance]
    parameters_dict = {}
    parameters = json_format.ParseDict(parameters_dict, Value())
    endpoint = client.endpoint_path(
        project=project, location=location, endpoint=endpoint_id
    )
    response = client.predict(
        endpoint=endpoint, instances=instances
    )
    print("response")
    print(" deployed_model_id:", response.deployed_model_id)
    # See gs://google-cloud-aiplatform/schema/predict/prediction/tables_classification.yaml for the format of the predictions.
    predictions = response.predictions
    for prediction in predictions:
        print(" prediction:", dict(prediction))

In [3]:
predict_tabular_classification()

response
 deployed_model_id: 7709977844104822784
 prediction: {'classes': ['false', 'true'], 'scores': [0.9998745322227478, 0.0001253637601621449]}


In [6]:
def explain_tabular_classification(
    project="29437252490",
    endpoint_id="5406193120658325504",
    location="us-central1",
    instance_dict={"Step":500, "Type":"DEBIT", "Amount":500, "oldbalanceOrg":1000, 
                   "newbalanceOrig":500, "oldbalanceDest":6500, "newbalanceDest":7000},
    api_endpoint: str = "us-central1-aiplatform.googleapis.com"
):
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
    # for more info on the instance schema, please use get_model_sample.py
    # and look at the yaml found in instance_schema_uri
    instance = json_format.ParseDict(instance_dict, Value())
    instances = [instance]
    parameters_dict = {}
    parameters = json_format.ParseDict(parameters_dict, Value())
    endpoint = client.endpoint_path(
        project=project, location=location, endpoint=endpoint_id
    )
    response = client.explain(
        endpoint=endpoint, instances=instances
    )
    for explanation in response.explanations:
        print(" explanation")
        # Feature attributions.
        attributions = explanation.attributions
        for attribution in attributions:
            print(attribution)
#             print("  attribution")
#             print("   baseline_output_value:", attribution.baseline_output_value)
#             print("   instance_output_value:", attribution.instance_output_value)
#             print("   output_display_name:", attribution.output_display_name)
#             print("   approximation_error:", attribution.approximation_error)
#             print("   output_name:", attribution.output_name)
#             output_index = attribution.output_index
#             for output_index in output_index:
#                 print("   output_index:", output_index)

#     for prediction in response.predictions:
#         print(prediction)

In [7]:
explain_tabular_classification()

 explanation
baseline_output_value: 0.9999809265136719
instance_output_value: 0.9998745322227478
feature_attributions {
  struct_value {
    fields {
      key: "Amount"
      value {
        number_value: -0.2038215305656195
      }
    }
    fields {
      key: "Step"
      value {
        number_value: -0.05122543266043067
      }
    }
    fields {
      key: "Type"
      value {
        number_value: 0.03956622071564198
      }
    }
    fields {
      key: "newbalanceDest"
      value {
        number_value: -0.006619255989789963
      }
    }
    fields {
      key: "newbalanceOrig"
      value {
        number_value: 0.1480723987333477
      }
    }
    fields {
      key: "oldbalanceDest"
      value {
        number_value: -0.0006055310368537903
      }
    }
    fields {
      key: "oldbalanceOrg"
      value {
        number_value: 0.07452673651278019
      }
    }
  }
}
output_index: 0
output_display_name: "false"
approximation_error: 0.0055834491653066086
output_name: "sc

In [9]:
import warnings

import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from google.cloud import storage
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (average_precision_score, classification_report,
                             confusion_matrix, f1_score)
from sklearn.model_selection import train_test_split
warnings.filterwarnings("ignore")
from witwidget.notebook.visualization import WitConfigBuilder, WitWidget

In [12]:
X = pd.read_csv(
    "gs://cloud-samples-data/vertex-ai/managed_notebooks/fraud_detection/fraud_detection_data.csv"
)
X.drop(["nameOrig", "nameDest", "isFlaggedFraud"], axis=1, inplace=True)
y = X[["isFraud"]]
X = X.drop(["isFraud"], axis=1)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, shuffle=False
)

pos_samples = y_test[y_test["isFraud"] == 1].sample(50).index
neg_samples = y_test[y_test["isFraud"] == 0].sample(50).index
test_samples_y = pd.concat([y_test.loc[pos_samples], y_test.loc[neg_samples]])
test_samples_X = X_test.loc[test_samples_y.index].copy()

In [13]:
# configure the target and class-labels
TARGET_FEATURE = "isFraud"
LABEL_VOCAB = ["not-fraud", "fraud"]
# function to return predictions from the deployed Model
def endpoint_predict(instances: list):
    prediction = endpoint.predict(instances=instances)
    preds = [[1 - i, i] for i in prediction.predictions]
    return preds


# Combine the features and labels into one array for the What-If Tool
test_examples = np.hstack(
    (test_samples_X.to_numpy(), test_samples_y.to_numpy().reshape(-1, 1))
)

# Configure the WIT with the prediction function
config_builder = (
    WitConfigBuilder(
        test_examples.tolist(), test_samples_X.columns.tolist() + ["isFraud"]
    )
    .set_custom_predict_fn(endpoint_predict)
    .set_target_feature(TARGET_FEATURE)
    .set_label_vocab(LABEL_VOCAB)
)

# run the WIT-widget
WitWidget(config_builder, height=400)

WitWidget(config={'model_type': 'classification', 'label_vocab': ['not-fraud', 'fraud'], 'feature_names': ['st…