# LIME

## Notes
- This work comes from [this link](https://towardsdatascience.com/interpretable-machine-learning-for-image-classification-with-lime-ea947e82ca13).

## Imports

In [None]:
import numpy as np
import skimage
import matplotlib.pyplot as plt
from skimage import io, transform, segmentation
from sklearn.linear_model import LinearRegression
import sklearn.metrics
import tensorflow as tf
from keras.applications.imagenet_utils import decode_predictions
import copy
from lime import lime_image

In [None]:
rnd_seed = 222
np.random.seed(rnd_seed)

## Data

In [None]:
Xi = io.imread('images/input/Maula.jpg')
#Xi = io.imread('images/cat-and-dog.jpeg')
Xi = transform.resize(Xi, (299,299))

#Inception pre-processing
Xi = (Xi - 0.5)*2
# Show image before inception preprocessing
io.imshow(Xi/2+0.5)

In [None]:
Xi.shape

## Model

In [None]:
# We use inception model in order to make a prediction. 
# The output of this model is a vector of length 1000.
inceptionV3_model = tf.keras.applications.inception_v3.InceptionV3() #Load pretrained model

In [None]:
preds = inceptionV3_model.predict(Xi[np.newaxis, ...])
# Get the position of the 5 classes with highest probability
top_pred_classes = preds[0].argsort()[-5:][::-1]

In [None]:
# Decodes the prediction of an ImageNet model. top=5
decode_predictions(preds)

## Step 1: Generate random perturbations for input image
For the case of images, LIME generates perturbations by turning on and off some of the super-pixels in the image. The following script uses the quick-shift segmentation algorithm to compute the super-pixels in the image. In addition, it generates an array of 150 perturbations where each perturbation is a vector with zeros and ones that represent whether the super-pixel is on or off.

The output of this process is a data set of length 150 that will be used to fit an "interpretable" model.

In [None]:
# Generate segmentation for image (superpixels)
# Each point is the cluster to which the pixel belongs
superpixels = segmentation.quickshift(Xi, kernel_size=4, max_dist=200, ratio=0.2)
num_superpixels = np.unique(superpixels).shape[0]
io.imshow(segmentation.mark_boundaries(Xi/2+0.5, superpixels))
print('Number of superpixels: ', num_superpixels)

In [None]:
fig = plt.gcf()
DPI = fig.get_dpi()
fig.set_size_inches(299.0/float(DPI),299.0/float(DPI))
plt.imshow(segmentation.mark_boundaries(Xi/2+0.5, superpixels))
plt.axis('off')
fig.savefig('images/output/maula_segmentation.png')

In [None]:
# Generate perturbations, i.e, obtain a new data set.
num_perturb = 150
perturbations = np.random.binomial(1, 0.5, size=(num_perturb, num_superpixels))

In [None]:
# Create function to apply perturbations to images
# img: an image
# perturbation: a mask indicating which superpixes have to be activated
# segments: a 299 x 299 numpy array that contains the mapping between the pixel and the superpixel the pixel belongs to
def perturb_image(img,perturbation,segments): 
    active_pixels = np.where(perturbation == 1)[0]
    mask = np.zeros(segments.shape)
    for active in active_pixels:
        mask[segments == active] = 1 
    perturbed_image = copy.deepcopy(img)
    perturbed_image = perturbed_image*mask[..., np.newaxis]
    return perturbed_image

In [None]:
pert = perturbations[1] 
print(pert) 
io.imshow(perturb_image(Xi/2+0.5,pert,superpixels))

In [None]:
fig = plt.gcf()
DPI = fig.get_dpi()
fig.set_size_inches(299.0/float(DPI),299.0/float(DPI))
plt.imshow(perturb_image(Xi/2+0.5,pert,superpixels))
plt.axis('off')
fig.savefig('images/output/maula_occlusion.png')

## Step 2: Predict class for perturbations
The following script uses the inceptionV3_model to predict the class of each of the perturbed images. The shape of the predictions is (150,1000) which means that for each of the 150 images, we get the probability of belonging to the 1,000 classes in InceptionV3. From these 1,000 classes we will use only the Labrador class in further steps since it is the prediction we want to explain.

In [None]:
predictions = []
for pert in perturbations:
    perturbed_img = perturb_image(Xi,pert,superpixels)
    pred = inceptionV3_model.predict(perturbed_img[np.newaxis, ...])
    predictions.append(pred)

predictions = np.array(predictions)

In [None]:
decode_predictions(predictions.squeeze(axis=1))[0:2]

### Step 3: Compute weights (importance) for the perturbations
We use a distance metric to evaluate how far is each perturbation from the original image. The original image is just a perturbation with all the super-pixels active (all elements in one). Given that the perturbations are multidimensional vectors, the cosine distance is a metric that can be used for this purpose. After the cosine distance has been computed, a kernel function is used to translate such distance to a value between zero and one (a weight). At the end of this process we have a weight (importance) for each perturbation in the dataset.

In [None]:
# Compute distances to original image
# Perturbation with all superpixels enabled
original_image = np.ones(num_superpixels)[np.newaxis, ...] 
print(original_image.shape)
distances = sklearn.metrics.pairwise_distances(perturbations,original_image, metric='cosine').ravel()
print(distances.shape)

#Transform distances to a value between 0 an 1 (weights) using a kernel function
kernel_width = 0.25
weights = np.sqrt(np.exp(-(distances**2)/kernel_width**2)) #Kernel function
print(weights.shape)

## Step 4: Fit a explainable linear model using the perturbations, predictions and weights
We fit a weighted linear model using the information obtained in the previous steps. We get a coefficient for each super-pixel in the image that represents how strong is the effect of the super-pixel in the prediction of Labrador.

In [None]:
# Labrador class is the one we are interested in explaining
class_to_explain = top_pred_classes[0]

# Fitting a linear model using as target the probability of being the Labrador class for each perturbation
simpler_model = LinearRegression()
simpler_model.fit(X=perturbations, y=predictions[:,:,class_to_explain], sample_weight=weights)
# Getting the coefficients
coeff = simpler_model.coef_[0]

# Use coefficients from linear model to extract top features
num_top_features = 10
top_features = np.argsort(coeff)[-num_top_features:] 

# Show only the superpixels corresponding to the top features
mask = np.zeros(num_superpixels)
# Activate top superpixels
mask[top_features]= True
io.imshow(perturb_image(Xi/2+0.5,mask,superpixels))

In [None]:
fig = plt.gcf()
DPI = fig.get_dpi()
fig.set_size_inches(299.0/float(DPI),299.0/float(DPI))
plt.imshow(perturb_image(Xi/2+0.5,mask,superpixels))
plt.axis('off')
fig.savefig('images/output/maula_lime.png')

In [None]:
mask

## Using lime library

In [None]:
explainer = lime_image.LimeImageExplainer()

In [None]:
explanation = explainer.explain_instance(image=Xi,
                                         classifier_fn=inceptionV3_model.predict,  
                                         top_labels=1,
                                         num_samples=150,
                                         random_seed=rnd_seed)

In [None]:
temp_1, mask_1 = explanation.get_image_and_mask(label=explanation.top_labels[0],
                                                positive_only=True, 
                                                num_features=num_top_features,
                                                hide_rest=True)

In [None]:
# It may be that the explanation we observe here is different from the previous one 
# due to the segmentation algorithm (superpixels)
io.imshow(segmentation.mark_boundaries(temp_1/2+0.5, mask_1))