In [None]:
import joblib
import pandas as pd
import json 

from azureml.core import Workspace
from azureml.core.dataset import Dataset

from azureml.interpret.scoring.scoring_explainer import TreeScoringExplainer, LinearScoringExplainer, ScoringExplainer, save
from azureml.train.automl.run import AutoMLRun
from azureml.train.automl.runtime.automl_explain_utilities import automl_setup_model_explanations
from azureml.interpret import MimicWrapper
from automl.client.core.common.constants import MODEL_PATH

from sklearn.model_selection import train_test_split

## Generate and pickle an Explainer Object

In [None]:
# get workspace from config
ws = Workspace.from_config()

Connect to classifier experiment and automl run.

In [None]:
experiment = ws.experiments[<EXPERIMENT NAME>]
automl_run = AutoMLRun(experiment, run_id = <RUN ID>)

In [None]:
best_run = automl_run.get_best_child()

Download the model locally. 

In [None]:
best_run.download_file(name=MODEL_PATH, output_file_path='Models/model.pkl')

In [None]:
fitted_model = joblib.load('Models/model.pkl')

### Load dataset as Pandas Dataframe
* Run 1st cell to read data from .csv (faster)
* Run 2nd cell to read data from AML Dataset, doesn't require an additional csv

In [None]:
# Uncomment and run this cell to read data from file - faster than reading from Dataset in AML

# df = pd.read_csv('data/data.csv',index_col=0)

In [None]:
# Run this cell to read data from Dataset (takes longer than above cell, but doesn't require extra data.csv file)

dataset = Dataset.get_by_name(ws, name=<DATA SET NAME>, version="latest")
df = dataset.to_pandas_dataframe()

In [None]:
# Drop the labeled column to get the training set.
target = '<TARGET COLUMN NAME>'
X = df.drop(columns=[target])
y = df[target]

Split your data and setup the explainer

In [None]:
# Split into train & test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2,
                                                    random_state=0)

In [None]:
# Setup the class for explaining the AutoML models
automl_explainer_setup_obj = automl_setup_model_explanations(fitted_model, 
                                                             task='classification',
                                                             X=X_train,
                                                             X_test=X_test,
                                                             y=y_train)

In [None]:
# Initialize the Mimic Explainer
explainer = MimicWrapper(ws, automl_explainer_setup_obj.automl_estimator, 
                         explainable_model=automl_explainer_setup_obj.surrogate_model,
                         init_dataset=automl_explainer_setup_obj.X_transform,
                         run=best_run,
                         features=automl_explainer_setup_obj.engineered_feature_names,
                         feature_maps=[automl_explainer_setup_obj.feature_map],
                         classes=automl_explainer_setup_obj.classes)


In [None]:
# Use the Mimic Explainer to compute and visualize the global raw feature importance
raw_explanations = explainer.explain(['local', 'global'], get_raw=True,
                                     raw_feature_names=automl_explainer_setup_obj.raw_feature_names,
                                     eval_dataset=automl_explainer_setup_obj.X_test_transform)
print(raw_explanations.get_feature_importance_dict())

### Test local explanations

* Run 1st cell to initialize scoring explainer - use TreeScoringExplainer if tree-based model
* Run 2nd cell to load pickled scoring explainer if already saved

In [None]:
# Initialize a Linear Scoring Explainer for linear models
scoring_explainer = LinearScoringExplainer(explainer, initialization_examples=automl_explainer_setup_obj.X_test_transform, feature_maps=[automl_explainer_setup_obj.feature_map])

In [None]:
# Uncomment out this cell if you have a pickled scoring explainer already saved

# scoring_explainer = joblib.load('models/scoring_explainer.pkl')

In [None]:
# To test you can use a dummy input sample or pull one from the test data set. If you're making a dummy input sample it should look something like this
input_sample = pd.DataFrame([
    {
        "<FEATURENAME1>": "VALUE1:",
        "<FEATURENAME2>": "VALUE2"
    }
])
input_sample

In [None]:
# Setup for inferencing explanations
data = input_sample # Can change value to X_test[:5]
automl_explainer_setup_obj = automl_setup_model_explanations(fitted_model,X_test=data, task='classification')
raw_local_importance_values = scoring_explainer.explain(automl_explainer_setup_obj.X_test_transform, get_raw=True)
print(raw_local_importance_values)

In [None]:
# Get explanations with feature names, reverse sorted by importance score

num_records = data.shape[0]
explanations = []
for i in range(num_records):
    exp_dict = dict(zip(automl_explainer_setup_obj.raw_feature_names,raw_local_importance_values[i]))
    sorted_exp_dict = dict(sorted(exp_dict.items(), key=lambda item: item[1], reverse=True))
    explanations.append(sorted_exp_dict)

print(explanations)

In [None]:
# Get explanations with feature names, formatted as list of json objects

num_records = data.shape[0]
num_features = len(automl_explainer_setup_obj.raw_feature_names)
explanations_all = []
for i in range(num_records):
    explanations = []
    for j in range(num_features):
        exp_dict = {automl_explainer_setup_obj.raw_feature_names[j]: raw_local_importance_values[i][j]}
        explanations.append(exp_dict)
    explanations_all.append(explanations)
print(explanations_all)

### Pickle explainer and upload to be registered and accessed in scoring script


In [None]:
# Pickle scoring explainer locally
save(scoring_explainer, exist_ok=True)

In [None]:
# Upload the scoring explainer to the automl run
best_run.upload_file('outputs/scoring_explainer-1.pkl', 'scoring_explainer.pkl') 

In [None]:
scoring_explainer_model = best_run.register_model(model_name='scoring_explainer', model_path='outputs/scoring_explainer-1.pkl')