In [None]:
%pip install --upgrade shap

In [None]:
%pip install --upgrade transformers

In [None]:
from azureml.core import Workspace

ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

In [None]:
from azureml.core import Dataset

# Get a dataset by name
train_ds = Dataset.get_by_name(workspace=ws, name='Stackoverflow dataset')
data = train_ds.to_pandas_dataframe()
data.columns = ['idx', 'description', 'classification']
data.head(3)

In [None]:
import keras
import tensorflow as tf

from transformers import BertTokenizer, TFBertPreTrainedModel, TFBertMainLayer
from transformers.modeling_tf_utils import get_initializer
class TFBertForMultiClassification(TFBertPreTrainedModel):
    def __init__(self, config, *inputs, **kwargs):
        super(TFBertForMultiClassification, self).__init__(config, *inputs, **kwargs)
        self.num_labels = config.num_labels
        self.bert = TFBertMainLayer(config, name='bert')
        self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
        self.classifier = tf.keras.layers.Dense(config.num_labels,
                                                kernel_initializer=get_initializer(config.initializer_range),
                                                name='classifier',
                                                activation='softmax')
    def call(self, inputs, **kwargs):
        outputs = self.bert(inputs, **kwargs)
        pooled_output = outputs[1]
        pooled_output = self.dropout(pooled_output, training=kwargs.get('training', False))
        logits = self.classifier(pooled_output)
        outputs = (logits,) + outputs[2:]  # add hidden states and attention if they are here
        return outputs  # logits, (hidden_states), (attentions)
    
max_seq_length = 128
labels = ['azure-web-app-service', 'azure-storage', 'azure-devops', 'azure-virtual-machine', 'azure-functions']
loaded_model = TFBertForMultiClassification.from_pretrained('./model', num_labels=len(labels))
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
print("Model loaded from disk.")

In [None]:
import copy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM
import shap
import scipy as sp
# from datasets import load_dataset
# import torch

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased",use_fast=True)
# model = AutoModelForSequenceClassification.from_pretrained('bert-base-cased')
# config = AutoConfig.from_pretrained('./model/config.json')
# model = AutoModelForSequenceClassification.from_pretrained('./model/tf_model.h5')
model = AutoModelForCausalLM.from_pretrained('./model/tf_model.h5')

In [None]:
# explainer = shap.Explainer(model, tokenizer)
import shap

explainer = shap.DeepExplainer(model, data['Column2'])

In [None]:
shap_values = explainer()
shap.plots.text(shap_values)