## SVM with religion articles

Data and model from https://github.com/marcotcr/lime-experiments, Author Marco T. Ribeiro, et al. in article *"Why Should I Trust You?" Explaining the Predictions of Any Classifier*

---

In [1]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.datasets import fetch_20newsgroups
from sklearn import metrics
from sklearn import svm

from lime import lime_text

import os
import pickle
import sys
from functools import reduce
from pathlib import Path
import numpy as np
import pandas as pd

from IPython.display import clear_output

sys.path.append("../../python/tme/")


from tme.src.helper import highlight_summary
from tme.src import tme

### Get model

In [2]:
def load_model():
    return svm.SVC(probability=True, kernel='rbf', C=10,gamma=0.001)

model = load_model()

### Train model with data from 20newsgroup

In [3]:
cats = ['alt.atheism', 'soc.religion.christian']
class_names = ['Atheism', 'Christianity']

In [4]:
train = fetch_20newsgroups(subset='train', categories=cats)
test = fetch_20newsgroups(subset='test', categories=cats)

In [None]:
train_x = train.data
train_y = train.target
test_x = test.data
test_y = test.target
del train, test

vectorizer = CountVectorizer(lowercase=False, binary=True) 
train_vector = vectorizer.fit_transform(train_x)

In [None]:
model.fit(train_vector, train_y)

<br> 

----

### Load LIME Religion dataset

In [None]:
datapath = "../../data/lime-religion/"  # add religion or christianity to path respondingly

In [None]:
def load_files(path_to_files):
    
    files_it = os.scandir(path_to_files)
    files_contents = []
    
    for file in files_it:
        if file.is_file() and file.name.endswith('.txt'):
            content = Path(file.path).read_text()
            content = content.replace('\n', ' ')
            files_contents.append(content)

    del content
    return files_contents

In [None]:
data_christianity = load_files(os.path.join(datapath, "christianity"))
data_atheism = load_files(os.path.join(datapath, "atheism"))

---

In [None]:
y_pred = model.predict(vectorizer.transform(data_atheism + data_christianity))

In [None]:
y_true = [0]*len(data_atheism) + [1]*len(data_christianity)

In [None]:
"accuracy: " + str(metrics.accuracy_score(y_true, y_pred))

In [None]:
"precision: " + str(metrics.precision_score(y_true, y_pred))

In [None]:
"recall: " + str(metrics.recall_score(y_true, y_pred))

In [None]:
test_x[16]

---

### Explanations

In [None]:
def pred(text):
    text = vectorizer.transform(text)
    return model.predict_proba(text)

In [None]:
tm = tme.TextModelsExplainer(pred, classnames=class_names, fm=1411)

In [None]:
ex = lime_text.LimeTextExplainer(class_names=class_names)

In [None]:
with open('../../data/experiments/saved-instances.pickle', 'rb') as f:
    saved_instances = pickle.load(f)

In [None]:
def explanation_sample(sample, i, category):
    sample_vec = vectorizer.transform([sample])
    
    exl = ex.explain_instance(sample, pred, num_features=10)
    ext = tm.explanation_summaries([sample], precomputed_explanations=[exl.as_list()], fm=1411)[0]
    
    dec = model.predict(sample_vec)[0]

    exl.show_in_notebook()
    highlight_summary(ext, summary_name=str(i), decision=dec, class_names=class_names)
    response = input(prompt="Keep?")
    if response:
        saved_instances.append((category, i, sample))

In [None]:
expla = ex.explain_instance(data_atheism[150], pred, num_features=100)

In [None]:
with open('../../data/misc/explanation-test.pickle', 'wb') as f:
    pickle.dump(expla, f)

In [None]:
a = ''
with open('../../data/misc/explanation-test.pickle', 'rb') as f:
    a = pickle.load(f)

In [None]:
a.show_in_notebook()

In [None]:
i = 1

while len(saved_instances) < 15:
    
    if i < len(data_atheism):
        explanation_sample(data_atheism[i],i,'a')
    
    clear_output()
    
    if i < len(data_christianity):
        explanation_sample(data_christianity[i],i,'c')
    
    clear_output()
    
    if i < len(test_x):
        explanation_sample(test_x[i],i,'20n')
    
    clear_output()
    
    i += 10

#### Example

In [None]:
d = model.predict(vectorizer.transform([train_x[1]]))[0]

In [None]:
e = tm.explanation_summaries([train_x[1]])

In [None]:
highlight_summary(e[0], decision=1, class_names=class_names)