# Model Explanation avec Azure ML service
Modélisation avec Auto Explain

In [1]:
import logging
import os
import random

import pandas as pd
import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.train.automl.run import AutoMLRun

> Aide : https://docs.microsoft.com/en-us/python/api/overview/azure/ml/intro?view=azure-ml-py#install

In [2]:
#!pip install azureml-sdk[explain]

In [3]:
# Version Azure ML service
import azureml.core
print("Version Azure ML service :", azureml.core.VERSION)

Version Azure ML service : 1.0.17


In [4]:
# Version Python
import sys
sys.version

'3.6.7 |Anaconda, Inc.| (default, Oct 28 2018, 19:44:12) [MSC v.1915 64 bit (AMD64)]'

In [6]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'automl-local-classification'
# project folder
project_folder = './sample_projects/automl-local-classification-model-explanation'

experiment=Experiment(ws, experiment_name)

output = {}
output['SDK version'] = azureml.core.VERSION
output['Workspace Name'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Project Directory'] = project_folder
output['Experiment Name'] = experiment.name
pd.set_option('display.max_colwidth', -1)
pd.DataFrame(data = output, index = ['']).T

Found the config file in: C:\Users\seretkow\notebooks\Labs Azure ML service\aml_config\config.json


Unnamed: 0,Unnamed: 1
SDK version,1.0.17
Workspace Name,MLServiceWorkspace
Resource Group,mlserviceresourcegroup
Location,westeurope
Project Directory,./sample_projects/automl-local-classification-model-explanation
Experiment Name,automl-local-classification


In [7]:
from azureml.telemetry import set_diagnostics_collection
set_diagnostics_collection(send_diagnostics=True)

Turning diagnostics collection on. 


In [8]:
from sklearn import datasets

iris = datasets.load_iris()
y = iris.target
X = iris.data

features = iris.feature_names

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.1,
                                                    random_state=100,
                                                    stratify=y)

X_train = pd.DataFrame(X_train, columns=features)
X_test = pd.DataFrame(X_test, columns=features)

In [9]:
print(y_train)

[1 2 2 1 0 2 2 0 1 1 1 2 2 2 1 0 1 0 1 0 1 2 2 2 1 0 1 0 0 2 0 1 0 1 0 1 2
 0 1 0 0 2 1 1 2 0 0 2 0 0 2 1 2 0 2 2 0 0 2 2 2 0 2 1 1 1 0 1 1 1 1 1 2 2
 1 2 1 1 1 0 0 0 1 2 2 1 1 1 1 0 0 0 0 2 0 1 0 0 2 2 0 2 1 0 1 1 2 2 2 2 0
 1 2 2 1 0 2 0 2 1 0 1 2 0 0 0 0 2 0 1 2 0 2 2 1]


## On recherche uniquement les SVM

In [10]:
automl_config = AutoMLConfig(task = 'classification',
                             debug_log = 'automl_errors.log',
                             primary_metric = 'AUC_weighted',
                             iteration_timeout_minutes = 10,
                             iterations = 10,
                             verbosity = logging.INFO,
                             X = X_train, 
                             y = y_train,
                             X_valid = X_test,
                             y_valid = y_test,
                             whitelist_models=['SVM'],
                             model_explainability=True,
                             path=project_folder)

In [11]:
local_run = experiment.submit(automl_config, show_output=True)

Running on local machine
Parent Run ID: AutoML_e90080d3-c3e9-4285-967d-22a10f0da104
********************************************************************************************************************
ITERATION: The iteration being evaluated.
PIPELINE: A summary description of the pipeline being evaluated.
SAMPLING %: Percent of the training data to sample.
DURATION: Time taken for the current iteration.
METRIC: The result of computing score on the fitted pipeline.
BEST: The best observed score thus far.
********************************************************************************************************************

 ITERATION   PIPELINE                                       SAMPLING %  DURATION      METRIC      BEST
         0   PCA SVM                                        100.0000    0:00:16       1.0000    1.0000
         1   RobustScaler SVM                               100.0000    0:00:10       0.5000    1.0000
         2   StandardScalerWrapper SVM                      100

In [12]:
# Affichage dans Azure
local_run

Experiment,Id,Type,Status,Details Page,Docs Page
automl-local-classification,AutoML_e90080d3-c3e9-4285-967d-22a10f0da104,automl,Completed,Link to Azure Portal,Link to Documentation


In [13]:
from azureml.widgets import RunDetails
RunDetails(local_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': True, 'log_level': 'INFO', 'sd…

In [14]:
best_run, fitted_model = local_run.get_output()
print(best_run)

Run(Experiment: automl-local-classification,
Id: AutoML_e90080d3-c3e9-4285-967d-22a10f0da104_9,
Type: None,
Status: Completed)


In [15]:
print(fitted_model)

Pipeline(memory=None,
     steps=[('prefittedsoftvotingclassifier', PreFittedSoftVotingClassifier(classification_labels=None,
               estimators=[('SVM', Pipeline(memory=None,
     steps=[('StandardScalerWrapper', <automl.client.core.common.model_wrappers.StandardScalerWrapper object at 0x000001E7932F7B00>), ('SVCWrapp...01, verbose=False))]))],
               flatten_transform=None, weights=[0.6, 0.1, 0.1, 0.1, 0.1]))])


In [16]:
from azureml.train.automl.automlexplainer import retrieve_model_explanation

shap_values, expected_values, overall_summary, overall_imp, per_class_summary, per_class_imp = \
    retrieve_model_explanation(best_run)

In [17]:
#Overall feature importance
print(overall_imp)
print(overall_summary)

['petal length (cm)', 'petal width (cm)', 'sepal length (cm)', 'sepal width (cm)']
[0.2767375251917168, 0.08345572767681432, 0.02455398826098376, 0.010993147956425822]


In [18]:
#Class-level feature importance
print(per_class_imp)
print(per_class_summary)

[['petal length (cm)', 'petal width (cm)', 'sepal length (cm)', 'sepal width (cm)'], ['petal length (cm)', 'petal width (cm)', 'sepal length (cm)', 'sepal width (cm)'], ['petal length (cm)', 'petal width (cm)', 'sepal width (cm)', 'sepal length (cm)']]
[[0.3095092339822719, 0.04699373644073957, 0.03160226029992267, 0.012453071618082564], [0.30375331060564215, 0.08950383958728395, 0.033901277735775066, 0.009312081638080881], [0.21695003098723625, 0.11386960700241947, 0.01121429061311402, 0.008158426747253552]]


In [19]:
from azureml.train.automl.automlexplainer import explain_model

shap_values, expected_values, overall_summary, overall_imp, per_class_summary, per_class_imp = \
    explain_model(fitted_model, X_train, X_test)

100%|█████████████████████████████████████████████████████████████████████████████████| 15/15 [00:00<00:00, 137.15it/s]


In [20]:
print(overall_imp)
print(overall_summary)

['petal length (cm)', 'petal width (cm)', 'sepal length (cm)', 'sepal width (cm)']
[0.2767375251917168, 0.08345572767681432, 0.02455398826098376, 0.010993147956425822]


In [21]:
from azureml.widgets import RunDetails
RunDetails(local_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': True, 'log_level': 'INFO', 'sd…