In [None]:
import sys
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

sys.path.append('../..')
from slime import lime_tabular

In [None]:
breast_cancer = load_breast_cancer()
train, test, labels_train, labels_test = train_test_split(breast_cancer.data, breast_cancer.target, train_size=0.80)

rf = RandomForestClassifier()
rf.fit(train, labels_train)

explainer = lime_tabular.LimeTabularExplainer(rf,
                                              train, 
                                              feature_names = breast_cancer.feature_names, 
                                              class_names = breast_cancer.target_names, 
                                              discretize_continuous = False, 
                                              feature_selection = "lasso_path", 
                                              sample_around_instance = True)

### LIME results in different explantions

In [None]:
exp = explainer.explain_instance(test[0], rf.predict_proba, num_features = 5, num_samples = 1000)
exp.show_in_notebook(show_table = True)

In [None]:
exp = explainer.if_explain_instance(test[0], rf.predict_proba, num_features = 5, num_samples = 1000)
exp.show_in_notebook(show_table = True)

### S-LIME provides stable explantions

In [None]:
exp = explainer.slime(test[0], rf.predict_proba, num_features = 5, num_samples = 1000, n_max = 10000, alpha = 0.05)
exp.show_in_notebook(show_table = True)

### Jaccard index for LIME

In [None]:
jaccard_lime = []

for seed in range(20):
    np.random.seed(seed + 1)
    i = np.random.randint(0, test.shape[0])
    
    result = []
    for _ in range(20):
        exp = explainer.explain_instance(test[i], rf.predict_proba, num_features=5, num_samples = 1000)
        result.append([i[0] for i in exp.as_list()])

        
    jaccard = []
    for num in range(1, 6):
        temp = []
        for j in range(20 - 1):
            for k in range(j + 1, 20):
                A = result[j][:num]
                B = result[k][:num]
                temp.append(len(set(A).intersection(set(B))) / float(len(set(A).union(set(B)))))
        jaccard.append(np.mean(temp))

    jaccard_lime.append(jaccard)
    
print(np.mean(jaccard_lime, axis = 0))

In [None]:
jaccard_lime = []

for seed in range(20):
    np.random.seed(seed + 1)
    i = np.random.randint(0, test.shape[0])
    
    result = []
    for _ in range(20):
        exp = explainer.if_explain_instance(test[i], rf.predict_proba, num_features=5, num_samples = 1000)
        result.append([i[0] for i in exp.as_list()])
        
    jaccard = []
    for num in range(1, 6):
        temp = []
        for j in range(20 - 1):
            for k in range(j + 1, 20):
                A = result[j][:num]
                B = result[k][:num]
                temp.append(len(set(A).intersection(set(B))) / float(len(set(A).union(set(B)))))
        jaccard.append(np.mean(temp))

    jaccard_lime.append(jaccard)
    
print(np.mean(jaccard_lime, axis = 0))

### Jaccard index for S-LIME

In [None]:
explainer = lime_tabular.LimeTabularExplainer(train, 
                                              feature_names = breast_cancer.feature_names, 
                                              class_names = breast_cancer.target_names, 
                                              discretize_continuous = False, 
                                              feature_selection = "lasso_path", 
                                              sample_around_instance = True)

jaccard_slime = []

for seed in range(20):
    np.random.seed(seed + 1)
    i = np.random.randint(0, test.shape[0])
    
    result = []
    for _ in range(20):
        exp = explainer.slime(test[i], rf.predict_proba, num_features=5, num_samples = 1000, n_max = 20000, alpha = 0.05)
        result.append([i[0] for i in exp.as_list()])
        
    jaccard = []
    for num in range(1, 6):
        temp = []
        for j in range(20 - 1):
            for k in range(j + 1, 20):
                A = result[j][:num]
                B = result[k][:num]
                temp.append(len(set(A).intersection(set(B))) / float(len(set(A).union(set(B)))))
        jaccard.append(np.mean(temp))

    jaccard_slime.append(jaccard)
    
print(np.mean(jaccard_slime, axis = 0))