## Explaining SMS Spam Classification

In [None]:
import lime
import numpy as np
import sklearn
import sklearn.svm
import sklearn.metrics
import sklearn.pipeline
import sklearn.feature_extraction.text
from __future__ import print_function

### Load data

In [None]:
X = []
y = []
with open('data/SMSSpamCollection') as f:
    for line in f:
        splited = line.split('\t')
        X.append(splited[1])
        if (splited[0] == 'ham'):
            y.append(0)
        elif (splited[0] == 'spam'):
            y.append(1)
        else:
            print('ERROR: label not found')
class_names = ['ham', 'spam']

### Split train and test set

In [None]:
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.2)

### Choose learner

In [None]:
svm = sklearn.svm.SVC(kernel='linear', C=1.0, probability=True)

clf = svm

### Build pipeline

In [None]:
classifier = sklearn.pipeline.Pipeline([
    ('vect', sklearn.feature_extraction.text.CountVectorizer(lowercase=False)),
    ('tfidf', sklearn.feature_extraction.text.TfidfTransformer()),
    ('clf', clf),
])

### Fit model

In [None]:
classifier.fit(X_train, y_train)

### Show classification report

In [None]:
pred = classifier.predict(X_test)
# print(sklearn.metrics.f1_score(y_test, pred, average='binary'))
# print(classifier.predict_proba([X_test[0]]))
print(sklearn.metrics.classification_report(y_test, pred, target_names=class_names))

### Build LIME explainer

In [None]:
from lime.lime_text import LimeTextExplainer
explainer = LimeTextExplainer(class_names=class_names)

### Explain single instance

In [None]:
idx = 83
exp = explainer.explain_instance(X_test[idx], classifier.predict_proba, num_features=10)
print('Document id: %d' % idx)
print('Probability(spam) =', classifier.predict_proba([X_test[idx]])[0,1])
print('True class: %s' % class_names[y_test[idx]])

### Show explanation as a list

In [None]:
exp.as_list()

### Show explanation as a pyplot figure

In [None]:
%matplotlib inline
fig = exp.as_pyplot_figure()

### Show html explanation in ipython notebook

In [None]:
exp.show_in_notebook(text=True)

### Save html explanation to file

In [None]:
exp.save_to_file('tmp/oi.html')