In [1]:
import tensorflow as tf
import numpy as np

In [2]:
gpus = tf.config.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [3]:
IMAGE_PATH = "./hummingbird.jpg"

In [4]:
import sys
import cv2

In [11]:
from keras_applications.vgg16 import preprocess_input

In [5]:
from utils.image import resize_activations, normalize_activations
from utils.display import grid_display, heatmap_display
from utils.saver import save_rgb

In [6]:
def image_to_uint_255(image):
    """
    Convert float images to int 0-255 images.
    Args:
        image (numpy.ndarray): Input image. Can be either [0, 255], [0, 1], [-1, 1]
    Returns:
        numpy.ndarray:
    """
    if image.dtype == np.uint8:
        return image

    if image.min() < 0:
        image = (image + 1.0) / 2.0

    return (image * 255).astype("uint8")

In [7]:
def heatmap_display(
    heatmap, original_image, colormap=cv2.COLORMAP_JET, image_weight=0.7
):
    """
    Apply a heatmap (as an np.ndarray) on top of an original image.

    Args:
        heatmap (numpy.ndarray): Array corresponding to the heatmap
        original_image (numpy.ndarray): Image on which we apply the heatmap
        colormap (int): OpenCV Colormap to use for heatmap visualization
        image_weight (float): An optional `float` value in range [0,1] indicating the weight of
            the input image to be overlaying the calculated attribution maps. Defaults to `0.7`

    Returns:
        np.ndarray: Original image with heatmap applied
    """
    heatmap = cv2.resize(heatmap, (original_image.shape[1], original_image.shape[0]))

    image = image_to_uint_255(original_image)

    heatmap = (heatmap - np.min(heatmap)) / (heatmap.max() - heatmap.min())

    heatmap = cv2.applyColorMap(
        cv2.cvtColor((heatmap * 255).astype("uint8"), cv2.COLOR_GRAY2BGR), colormap
    )

    output = cv2.addWeighted(
        cv2.cvtColor(image, cv2.COLOR_RGB2BGR), image_weight, heatmap, 1, 0
    )

    return cv2.cvtColor(output, cv2.COLOR_BGR2RGB)

In [18]:
def explain(
        self,
        validation_data,
        model,
        class_index,
        input_shape=None,
        colormap=cv2.COLORMAP_JET,
        image_weight=0.7,
        display_in_grid=True
    ):
        """
        Compute GradCAM for a specific class index.

        Args:
            validation_data (Tuple[np.ndarray, Optional[np.ndarray]]): Validation data
                to perform the method on. Tuple containing (x, y).
            model (tf.keras.Model): tf.keras model to inspect
            class_index (int): Index of targeted class
            input_shape (Tuple[int, int]): Shape of input data, e.g. (224, 224)
            colormap (int): OpenCV Colormap to use for heatmap visualization
            image_weight (float): An optional `float` value in range [0,1] indicating the weight of
                the input image to be overlaying the calculated attribution maps. Defaults to `0.7`.
            display_in_grid (bool): Whether display images on grid or separately.

        Returns:
            numpy.ndarray: Grid of all the GradCAM or 4D array (batch_size, height, width, channels)
        """
        assert input_shape != None, "Pass input shape argument"

        images, _ = validation_data
        batch_size = images.shape[0]

        # according to section 4.1 of paper, we need the last convolutional layer
        layer_name = self.get_last_convolutional_layer_name(model)

        # normalize feature maps, calculate masks and compute the
        # output score
        weights, maps = ScoreCAM.get_filters(
            model, images, layer_name, class_index, input_shape
        )

        weights = weights.reshape((-1, 1, 1, batch_size)) # shape (K, 1, 1, 1)
        weights = tf.reshape(weights, (batch_size, 1, 1, weights.shape[0])) # shape (1, 1, 1, K)

        cam = ScoreCAM.generate_cam(weights, maps)

        heatmaps = np.array(
            [
                # not showing the actual image if image_weight=0
                heatmap_display(cam.numpy()[0], images[0], colormap, image_weight)
            ]
        )

        if display_in_grid:
            return grid_display(heatmaps)
        else:
            return heatmaps

def get_last_convolutional_layer_name(model):
        """
        Search for the last convolutional layer to perform Score-CAM, as stated
        in section 4.1 in the original paper.

        Args:
            model (tf.keras.Model): tf.keras model to inspect

        Returns:
            str: Name of the target layer
        """
        for layer in reversed(model.layers):
            # Select closest 4D layer to the end of the network.
            if len(layer.output_shape) == 4 and layer.name.count('conv') > 0:
                return layer.name

        raise ValueError(
            "Model does not seem to contain 4D layer. Grad CAM cannot be applied."
        )

def generate_cam(weights, maps):
        """
        Generate the Score-CAM

        Inputs are the weights (shape Kx1x1xbatch_size) generated by the foward computing F(Mk)
        followed by softmax activation and normalized maps (shape KxHxWx3)

        Args:
            weights (numpy.ndarray): Output score with shape (K, 1, 1, batch_size) where
            K is the number of filters in the last convolutional layer
            maps (tf.Tensor): 4D-Tensor with shape (K, H, W, batch_size) where K is the number
            of filters in the last convolutional layer and H,W are the input image size

        Returns:
            tf.Tensor: 4D-Tensor of linear weighted combination of all activation maps
            with shape (batch_size, H, W, 1)
        """

        cam = tf.math.reduce_sum(tf.math.multiply(weights, maps), axis=-1, keepdims=True)

        return cam

def save(self, grid, output_dir, output_name):
        """
        Save the output to a specific dir.

        Args:
            grid (numpy.ndarray): Grid of all the heatmaps
            output_dir (str): Output directory path
            output_name (str): Output name
        """

        save_rgb(grid, output_dir, output_name)

In [13]:
with tf.device('/GPU:0'):
    model = tf.keras.applications.vgg16.VGG16(weights="imagenet", include_top=True)

    input_shape = (224, 224)

    img = tf.keras.preprocessing.image.load_img(IMAGE_PATH, target_size=input_shape)
    img = tf.keras.preprocessing.image.img_to_array(img)
    img = np.expand_dims(img, axis=0)

    data = (img, None)

    tabby_cat_class_index = 94
    explainer = ScoreCAM()
    # Compute ScoreCAM on VGG16
    image = explainer.explain(
        data, model, tabby_cat_class_index, input_shape, display_in_grid=False
    )[0]

In [19]:
layer_name = get_last_convolutional_layer_name(model)

In [20]:
layer_name

'block5_conv3'

In [30]:
def get_filters(model, images, layer_name, class_index, input_shape):
    """
    Generate guided gradients and convolutional outputs with an inference.

    Args:
        model (tf.keras.Model): tf.keras model to inspect
        images (numpy.ndarray): 4D-Tensor with shape (batch_size, H, W, 3)
        layer_name (str): Last convolutional layer
        class_index (int): Index of targeted class
        input_shape (Tuple[int, int]): Shape of input data, e.g. (224, 224)
        
    Returns:
        Tuple[numpy.ndarray, tf.Tensor]: (Output score of given class, Normalized last conv outputs)
    """

    conv_model = tf.keras.models.Model(
        [model.inputs], [model.get_layer(layer_name).output]
    )
    softmax_model = tf.keras.models.Model(
        [model.inputs], [model.outputs]
    )

    inputs = tf.cast(images, tf.float32)

    conv_output = conv_model(inputs)
    resized_conv_output = resize_activations(conv_output, input_shape)
    normalized_maps = normalize_activations(resized_conv_output) # shape (batch_size, H, W, K)
    shape = normalized_maps.shape

    # reshape normalized_maps tensor to shape (K, H, W, batch_size)
    reshaped_normalized_maps = tf.reshape(normalized_maps, (shape[3], shape[1], shape[2], shape[0]))

    masked_images = tf.math.multiply(reshaped_normalized_maps, inputs)
        
    classes_activation_scale = softmax_model.predict(masked_images)

    # return the output only for the given class
    weights = classes_activation_scale[:, class_index] # shape (K,)

    return weights, normalized_maps

In [31]:
weights, maps = get_filters(model, data[0], layer_name, 94, input_shape)

In [34]:
ass = tf.math.multiply(weights, maps)

In [43]:
weights = weights.reshape((-1, 1, 1, 1)) # shape (K, 1, 1, 1)
weights = tf.reshape(weights, (1, 1, 1, weights.shape[0])) # shape (1, 1, 1, K)

In [44]:
weights

<tf.Tensor: shape=(1, 1, 1, 512), dtype=float32, numpy=
array([[[[0.0020314 , 0.00202176, 0.00204512, 0.00202735, 0.00204637,
          0.00203198, 0.0020267 , 0.00202486, 0.00201975, 0.00203711,
          0.00202327, 0.00204954, 0.00203055, 0.00203785, 0.00203112,
          0.00201535, 0.0020314 , 0.00202176, 0.00204365, 0.00202559,
          0.00204531, 0.0020252 , 0.00202273, 0.00201243, 0.00201079,
          0.00202624, 0.00201206, 0.00203238, 0.00201442, 0.00202326,
          0.00200511, 0.00199878, 0.00199938, 0.00200157, 0.0020158 ,
          0.00200618, 0.00201757, 0.00200114, 0.00199789, 0.00198292,
          0.00199281, 0.00200073, 0.00200099, 0.00200172, 0.00200132,
          0.00199734, 0.00198845, 0.00198914, 0.00197858, 0.0019963 ,
          0.00199572, 0.00200743, 0.00200036, 0.00200242, 0.00198828,
          0.00197847, 0.00199017, 0.00199101, 0.00200674, 0.00198585,
          0.00200504, 0.00198516, 0.00199759, 0.00198898, 0.00198008,
          0.00199814, 0.00197898, 