# Text

## Methods
- LIME
- QII



Import required package


'''TODO
1) Implement delete words over all training data set
2) For now only apply on binary classifier, need to modify for multi-class
3) Visualization: shows the true label of shown data set
4) Why apply influence score on test data, not train data?
5) ~~Something's wrong with equal_label method.~~
'''


In [11]:
from __future__ import print_function
import logging
from sklearn.datasets import fetch_20newsgroups

import sklearn
import sklearn.ensemble
import sklearn.metrics
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot

import itertools
import lime
from lime import lime_text
from sklearn.pipeline import make_pipeline
from lime.lime_text import LimeTextExplainer
#matplotlib inline

Load data set and train a classifier (random forest)


In [12]:
logging.basicConfig()

categories = ['alt.atheism', 'soc.religion.christian']
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)
class_names = ['atheism', 'christian']

vectorizer = sklearn.feature_extraction.text.TfidfVectorizer(lowercase=False)
train_vectors = vectorizer.fit_transform(newsgroups_train.data)
test_vectors = vectorizer.transform(newsgroups_test.data)

dict = vectorizer.vocabulary_
rf = sklearn.ensemble.RandomForestClassifier(n_estimators=500)
rf.fit(train_vectors, newsgroups_train.target)
pred = rf.predict(test_vectors)

c = make_pipeline(vectorizer, rf)

Define IndexdString class and random sampling method for QII.

In [13]:
class IndexedString(object):
    """String with various indexes."""
    def __init__(self, raw_string, split_expression=r'\W+', bow=True):  
        self.raw = raw_string
        self.as_list = re.split(r'(%s)|$' % split_expression, self.raw)
        self.as_np = np.array(self.as_list)
        non_word = re.compile(r'(%s)|$' % split_expression).match
        self.string_start = np.hstack(
            ([0], np.cumsum([len(x) for x in self.as_np[:-1]])))
        vocab = {}
        self.inverse_vocab = []
        self.positions = []
        self.bow = bow
        non_vocab = set()
        for i, word in enumerate(self.as_np):
            if word in non_vocab:
                continue
            if non_word(word):
                non_vocab.add(word)
                continue
            if bow:
                if word not in vocab:
                    vocab[word] = len(vocab)
                    self.inverse_vocab.append(word)
                    self.positions.append([])
                idx_word = vocab[word]
                self.positions[idx_word].append(i)
            else:
                self.inverse_vocab.append(word)
                self.positions.append(i)
        if not bow:
            self.positions = np.array(self.positions)

    def raw_string(self):
        """Returns the original raw string"""
        return self.raw

    def num_words(self):
        """Returns the number of tokens in the vocabulary for this document."""
        return len(self.inverse_vocab)

    def word(self, id_):
        """Returns the word that corresponds to id_ (int)"""
        return self.inverse_vocab[id_]

    def string_position(self, id_):
        """Returns a np array with indices to id_ (int) ocurrences"""
        if self.bow:
            return self.string_start[self.positions[id_]]
        else:
            return self.string_start[[self.positions[id_]]]

    def inverse_removing(self, words_to_remove):
        """Returns a string after removing the appropriate words.

        If self.bow is false, replaces word with UNKWORDZ instead of removing
        it.

        Args:
            words_to_remove: list of ids (ints) to remove

        Returns:
            original raw string with appropriate words removed.
        """
        mask = np.ones(self.as_np.shape[0], dtype='bool')
        mask[self.__get_idxs(words_to_remove)] = False
        if not self.bow:
            return ''.join([self.as_list[i] if mask[i]
                            else 'UNKWORDZ' for i in range(mask.shape[0])])
        return ''.join([self.as_list[v] for v in mask.nonzero()[0]])

    def __get_idxs(self, words):
        """Returns indexes to appropriate words."""
        if self.bow:
            return list(itertools.chain.from_iterable(
                [self.positions[z] for z in words]))
        else:
            return self.positions[words]



In [14]:
def random_intervene_point(X, cols, x0):
    """ Randomly intervene on a set of columns of x from X. """
    n = X.shape[0]
    order = np.random.permutation(range(n))
    X_int = np.tile(x0.toarray(), (n, 1))
    X_int[:, cols] = X.toarray()[order, cols]
    return X_int

def unary_delete_words_abs_prob(dict, cls, x_ind, X, indexed_string):
    d = cls.predict_proba(x_ind)
    res = {}
    for i in range(len(indexed_string.inverse_vocab)):
        inverse_data = []
        inverse_data.append(indexed_string.inverse_removing([i]))
        result = c.predict_proba(inverse_data)
        res[indexed_string.word(i)] = abs(result[0][0]-d[0][0])
    return res

def unary_individual_influence_abs_prob(dict, cls, x_ind, X, indexed_string):
    pred_proba = cls.predict_proba(x_ind)
    y_pred_proba = pred_proba[:,0]
    average_local_inf = {}
    iters = 1

    feature_num = len(indexed_string.inverse_vocab)

    for f in range(feature_num):
        local_influence = np.zeros(y_pred_proba.shape[0])
        vocab = indexed_string.word(f)
        if (vocab in dict):
            col = dict[vocab]
            for i in range(0,iters):
                X_inter = random_intervene_point(X, col, x_ind)
                y_pred_inter_proba = cls.predict_proba(X_inter)
                local_influence = local_influence + abs(y_pred_proba - y_pred_inter_proba[:,0])*1.
            tmp = (local_influence/iters).mean()
            average_local_inf[vocab] = tmp
        else:
            average_local_inf[vocab] = 0
    return average_local_inf

def unary_individual_influence_equal_prob(dict, cls, x_ind, X, indexed_string):
    pred_proba = cls.predict_proba(x_ind)
    y_pred_proba = pred_proba[:,0]
    average_local_inf = {}
    iters = 1

    feature_num = len(indexed_string.inverse_vocab)

    for f in range(feature_num):
        local_influence = np.zeros(y_pred_proba.shape[0])
        vocab = indexed_string.word(f)
        if (vocab in dict):
            col = dict[vocab]
            for i in range(0,iters):
                X_inter = random_intervene_point(X, col, x_ind)
                y_pred_inter_proba = cls.predict_proba(X_inter)
                local_influence = local_influence + (y_pred_proba == y_pred_inter_proba[:,0])*1.
            tmp = 1 - (local_influence/iters).mean()
            average_local_inf[vocab] = tmp
        else:
            average_local_inf[vocab] = 0
    return average_local_inf

def unary_individual_influence_equal_label(dict, cls, x_ind, X, indexed_string):
    pred_proba = cls.predict_proba(x_ind)
    y_pred_proba = pred_proba[:,0]
    average_local_inf = {}
    iters = 1

    feature_num = len(indexed_string.inverse_vocab)

    for f in range(feature_num):
        local_influence = np.zeros(y_pred_proba.shape[0])
        vocab = indexed_string.word(f)
        if (vocab in dict):
            col = dict[vocab]
            for i in range(0,iters):
                X_inter = random_intervene_point(X, col, x_ind)
                y_pred_inter_proba = cls.predict_proba(X_inter)
                if (y_pred_proba > 0.5):
                    local_influence = local_influence + (y_pred_inter_proba[:,0] <= 0.5)*1.
                else:
                    local_influence = local_influence + (y_pred_inter_proba[:,0] > 0.5)*1.
            tmp = (local_influence/iters).mean()
            average_local_inf[vocab] = tmp
        else:
            average_local_inf[vocab] = 0
    return average_local_inf

In [15]:
influence_name_list = ['delete words', 'influence_abs_prob', 'influence_equal_prob', 'influence_equal_label']
influence_methods = {'delete words': unary_delete_words_abs_prob,
             'influence_abs_prob': unary_individual_influence_abs_prob,
             'influence_equal_prob': unary_individual_influence_equal_prob,
             'influence_equal_label': unary_individual_influence_equal_label}

for idx in range(0, 30, 5):
    test_instance = newsgroups_test.data[idx]
    x_individual = test_vectors[idx]
    indexed_string = IndexedString(test_instance, bow = True, split_expression=r'\W+')
    print (test_instance)
    for method in influence_name_list:
        unary_individual_inf = influence_methods[method](dict, rf, x_individual, test_vectors, indexed_string)
        t = sorted(unary_individual_inf.items(), key = lambda x: x[1], reverse=True)
        print ('Method ' + method)
        for item in t[:5]:
            print (item)
        print ()
    print ('Method LIME')
    explainer = LimeTextExplainer(class_names=class_names)
    exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=5)
    b = exp.as_list()
    for _ in b:
        print (_)
    print (); print ()

From: crackle!dabbott@munnari.oz.au (NAME)
Subject: "Why I am not Bertrand Russell" (2nd request)
Reply-To: dabbott@augean.eleceng.adelaide.edu.au (Derek Abbott)
Organization: Electrical & Electronic Eng., University of Adelaide
Lines: 4

Could the guy who wrote the article "Why I am not Bertrand Russell"
resend me a copy?

Sorry, I accidently deleted my copy and forgot your name.

Method delete words
(u'article', 0.068000000000000033)
(u'au', 0.044000000000000011)
(u'University', 0.021999999999999964)
(u'guy', 0.016000000000000014)
(u'wrote', 0.012000000000000011)

Method influence_abs_prob
(u'au', 0.044811715481171563)
(u'article', 0.043015341701534197)
(u'University', 0.019690376569037622)
(u'guy', 0.01379079497907951)
(u'wrote', 0.012295676429567654)

Method influence_equal_prob
(u'NAME', 0.99721059972105996)
(u'Sorry', 0.99581589958159)
(u'Why', 0.99581589958159)
(u'Eng', 0.99442119944211993)
(u'Could', 0.99442119944211993)

Method influence_equal_label
(u'and', 0.0)
(u'From', 0.0

Method influence_equal_label
(u'among', 0.0)
(u'because', 0.0)
(u'From', 0.0)
(u'Russell', 0.0)
(u'be', 0.0)

Method LIME
(u'article', -0.067300005340944422)
(u'atheists', -0.059126166581495776)
(u'absurd', -0.029951704528000989)
(u'than', -0.029049745044082729)
(u'edu', -0.028055147165195506)


From: christen@astro.ocis.temple.edu (Carl Christensen)
Subject: Re: Books
Organization: Temple University
Lines: 4
Nntp-Posting-Host: astro.ocis.temple.edu
X-Newsreader: TIN [version 1.1 PL8]

[stuff about hard to find atheist books deleted]

Perhaps the infiltration of fundies onto school boards, city councils,
etc. has something to do with why you can't find alternative media?

Method delete words
(u'Posting', 0.14600000000000002)
(u'Host', 0.10600000000000009)
(u'Nntp', 0.06800000000000006)
(u'Re', 0.038000000000000034)
(u'TIN', 0.026000000000000023)

Method influence_abs_prob
(u'Posting', 0.12037935843793585)
(u'Host', 0.087054393305439404)
(u'Nntp', 0.06562343096234316)
(u'TIN', 0.0269400