In [1]:
#import shapX as shap
import transformers
import torch
import datasets
import yaml
import numpy as np
import scipy as sp
import pandas as pd
import shap
from tqdm import tqdm
torch.cuda.is_available()

True

# 3. Classification Perspective

## b) Explainable classification models

In [None]:
config = yaml.safe_load(open("config.yaml"))

In [2]:
dataset_names = config['datasets']
path_models = './tmp2/models/'
path_combined_test_set = './tmp2/datasets/combined_test'
MODEL_NAME = 'asafaya/bert-base-arabic'

### Load tokenizer and combiend test data set

In [3]:
# tokenizer
tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)

# load combined test data set
combined_test_dataset = datasets.load_from_disk(path_combined_test_set)
selected_test_data = combined_test_dataset[:20]

### Define prediction function

In [4]:
def predict(x):
    val = []
    for record in x:
        inputs =  tokenizer(record, return_tensors="pt")
        labels = torch.tensor([1]).unsqueeze(0)
        outputs = model(**inputs, labels=labels)
        m = torch.nn.Softmax(dim=1).cuda()
        # softmax the logits
        softmaxed = m(outputs.logits).detach().cpu().numpy()
        # get the probaility for the positive class (hate)
        val.append(softmaxed[0][1])
    return np.array(val)

### Prediction for combined test data set by each classifier

In [5]:
df = pd.DataFrame(combined_test_dataset['label'],columns =['Labels'])
for i,dataset_name in enumerate(tqdm(dataset_names)):
    predictions = []
    # load model
    path_to_model = '{}{}_{}_model'.format(path_models,i,dataset_name)
    model = transformers.AutoModelForSequenceClassification.from_pretrained(path_to_model)
    # predict test set
    for record in combined_test_dataset:
        inputs =  tokenizer(record['text'], return_tensors="pt",truncation=True, max_length=512)
        labels = torch.tensor([1]).unsqueeze(0)
        outputs = model(**inputs, labels=labels)
        m = torch.nn.Softmax(dim=1).cuda()
        # softmax the logits
        softmaxed = m(outputs.logits).detach().cpu().numpy()
        # get the probaility for the positive class (hate)
        if softmaxed[0][1] >= 0.5:
            predictions.append(1)
        else:
            predictions.append(0)
            
    df[dataset_name] = predictions

100%|██████████| 6/6 [09:49<00:00, 98.26s/it]


### Calculate error rate for each document in combined test set

In [6]:
def calculateError(x):
    error = 0
    for dataset in dataset_names:
        if x['Labels'] != x[dataset]:
            error += 1
    return error
    
df['Error'] = df.apply(lambda x: calculateError(x), axis=1)
#df.head()

### Filter doucments that have n errors

In [7]:
number_of_error = 2
selected_test_data = []
for label in range(2):
    errorLabel = 0 if label == 1 else 1
    for dataset in dataset_names:
        for i in range(5):
            try:
                index = df[(df['Labels'] == label) & (df['Error'] == number_of_error) & (df[dataset] == errorLabel)].index[i]
                #print(type(index), index)
                selected_test_data.append(combined_test_dataset[int(index)]['text'])
            except:
                break

print(len(selected_test_data))

58


### Get explanations for filtered documents

In [8]:
shap_values_of_models = []
for i,dataset_name in enumerate(dataset_names):
    path_to_model = '{}{}_{}_model'.format(path_models,i,dataset_name)
    model = transformers.AutoModelForSequenceClassification.from_pretrained(path_to_model)
    
    explainer = shap.Explainer(predict, masker=tokenizer)
    
    shap_values = explainer(selected_test_data)
    
    shap_values_of_models.append(shap_values)

explainers.Partition is still in an alpha state, so use with caution...
Partition explainer: 59it [02:56,  3.00s/it]                        
explainers.Partition is still in an alpha state, so use with caution...
Partition explainer: 59it [02:57,  3.01s/it]                        
explainers.Partition is still in an alpha state, so use with caution...
Partition explainer: 59it [02:55,  2.98s/it]                        
explainers.Partition is still in an alpha state, so use with caution...
Partition explainer: 59it [02:57,  3.01s/it]                        
explainers.Partition is still in an alpha state, so use with caution...
Partition explainer: 59it [02:56,  3.00s/it]                        
explainers.Partition is still in an alpha state, so use with caution...
Partition explainer: 59it [02:57,  3.01s/it]                        


### Print explanations of different classifiers for selected document

In [16]:
selected_item_to_explain = 35

# 30
# 31
#34
#


#40#13#46

global_min = 1
global_max = 0

for i,dataset_name in enumerate(dataset_names):
    min_val = shap_values_of_models[i].base_values[selected_item_to_explain]
    max_val = shap_values_of_models[i].base_values[selected_item_to_explain]
    for elem in shap_values_of_models[i].values[selected_item_to_explain]:
        if  elem < 0:
            min_val = min_val + elem
        else:
            max_val = max_val + elem
    global_min = min(min_val,global_min)
    global_max = max(max_val,global_max)

for i,dataset_name in enumerate(dataset_names):
    #print(dataset_name)
    shap.plots.text(shap_values_of_models[i][selected_item_to_explain],xmin=global_min-0.05,xmax=global_max+0.05)