## SHAP (SHapley Additive exPlanations)
Explaining model output

#### Necessary Python imports

In [7]:
import os
import keras
from keras.preprocessing import image
from skimage.io import imread
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import shap
import sys
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
import random
from keras.applications.resnet50 import preprocess_input
from keras.applications.resnet50 import ResNet50
from keras.layers import Input, Lambda
from keras import Model
import tensorflow as tf
import keras.backend as K
from scipy.ndimage.filters import gaussian_filter
import requests
np.set_printoptions(threshold=sys.maxsize)

In [8]:
def prep_image(url, preprocess):
  response = requests.get(url)
  img = Image.open(BytesIO(response.content))
  if img.mode != 'RGB':
      img = img.convert('RGB')
  x = np.array(img.resize((224, 224)))
  x = np.expand_dims(x, axis=0)
  if preprocess:  
    return preprocess_input(x), img.size
  return x, img.size

In [9]:
def setup_model():
  keras_model = ResNet50(input_shape=[224, 224, 3], weights='imagenet', include_top=False, pooling='avg')
  im1 = Input([224, 224, 3])
  f1 = keras_model(im1)
  return keras_model, im1, f1

def inv_logit(y):
    return tf.math.log(y/(1-y))

In [10]:
im2_const = tf.placeholder(tf.float32, [1, 224, 224, 3])
im2 = Lambda(lambda im1: im2_const)(im1)
f2 = keras_model(im2)
d = keras.layers.Dot(1, normalize=True)([f1, f2])
logit = Lambda(lambda d: inv_logit((d+1)/2))(d)
model = Model(inputs=[im1], outputs=[logit])

In [11]:
def precompute(original_url):
  training_img, training_img_size = prep_image(original_url, True)
  x_train = np.array(gaussian_filter(training_img, sigma=10))
  e = shap.DeepExplainer(model, x_train, img_dict = {im2_const: training_img})
  return e

In [19]:
def test_match(match_url, e):
  start = time.time()
  test_image, size = prep_image(match_url, True)
  no_preprocess, _ = prep_image(match_url, False)
  x_test = np.array(test_image)
  shap_values = e.shap_values(x_test, check_additivity=False)
  shap_values_normed = np.array(shap_values)
  shap_values_normed = np.linalg.norm(shap_values_normed, axis=4)
  
  blurred = gaussian_filter(shap_values_normed[0], sigma=4)
  bflat = blurred.flatten()
  shap_values_mask_qi = np.where(np.array(blurred) > np.mean(bflat) + np.std(bflat), 1, 0).reshape(224, 224, 1)
  shap_values_qi = np.multiply(shap_values_mask_qi, x_test[0])
  
  new_size = (224, int(size[1]/size[0]*224)) if size[0] > size[1] else (int(size[0]/size[1]*224), 224)
  original_size = Image.fromarray(shap_values_qi.astype(np.uint8), 'RGB').resize(new_size)
  
#   end = time.time()
#   print(end-start)
#   imgs = shap.image_plot(shap_values, no_preprocess.astype(float))
#   return imgs
  return original_size

In [None]:
# def setup_model():
#   keras_model = ResNet50(input_shape=[224, 224, 3], weights='imagenet', include_top=False, pooling='avg')
#   im1 = Input([224, 224, 3])
#   f1 = keras_model(im1)
#   return keras_model, im1, f1

# def inv_logit(y):
#     return tf.math.log(y/(1-y))

In [None]:
# def precompute(original_url):
#   training_img, training_img_size = prep_image(original_url, True)
#   x_train = np.array(gaussian_filter(training_img, sigma=10))
  
#   query_img, query_img_size = prep_image(original_url, True)
#   im2_const = tf.constant(query_img, dtype=tf.float32)
#   im2 = Lambda(lambda im1: im2_const)(im1)

#   f2 = keras_model(im2)
#   d = keras.layers.Dot(1, normalize=True)([f1, f2])

#   logit = Lambda(lambda d: inv_logit((d+1)/2))(d)
#   model = Model(inputs=[im1], outputs=[logit])
  
#   e = shap.DeepExplainer(model, x_train)
#   return e

In [None]:
# def test_match(match_url, e):
#   start = time.time()
#   test_image, size = prep_image(match_url, True)
#   no_preprocess, _ = prep_image(match_url, False)
#   x_test = np.array(test_image)
#   shap_values = e.shap_values(x_test, check_additivity=False)
#   shap_values_normed = np.array(shap_values)
#   shap_values_normed = np.linalg.norm(shap_values_normed, axis=4)
  
#   blurred = gaussian_filter(shap_values_normed[0], sigma=4)
#   bflat = blurred.flatten()
#   shap_values_mask_qi = np.where(np.array(blurred) > np.mean(bflat) + np.std(bflat), 1, 0).reshape(224, 224, 1)
#   shap_values_qi = np.multiply(shap_values_mask_qi, x_test[0])
  
#   new_size = (224, int(size[1]/size[0]*224)) if size[0] > size[1] else (int(size[0]/size[1]*224), 224)
#   original_size = Image.fromarray(shap_values_qi.astype(np.uint8), 'RGB').resize(new_size)
  
#   end = time.time()
#   print(end-start)
#   imgs = shap.image_plot(shap_values, no_preprocess.astype(float))
#   return imgs
# #   return original_size

In [5]:
import time

In [6]:
# run before any queries
start = time.time()
keras_model, im1, f1 = setup_model()
end = time.time()
print(end-start)








18.252468824386597


In [35]:
# run only once for each original image
start = time.time()
original_url = "https://mmlsparkdemo.blob.core.windows.net/cknn/datasets/interpret/lex1.jpg" # replace with link to original image
e = precompute(original_url)
end = time.time()
print(end-start)

3.3403663635253906


In [37]:
# run once for each match
start = time.time()
match_url = "https://mmlsparkdemo.blob.core.windows.net/cknn/datasets/interpret/lex2.jpg" # replace with link to matched image
explained_pic = test_match(match_url, e)
end = time.time()
print(end-start)

0.5744669437408447


In [15]:
K.tensorflow_backend._get_available_gpus()