# Score-CAM

Implement the following saliency algorithm described in the article "Score-CAM:Score-Weighted Visual Explanations for Convolutional Neural Networks", which is summarized in the following equation:

$$
L^c_\text{Score-CAM} = ReLU(∑_k C(A_l^k) A_l^k)
$$
Where
$$
C(A_l^k) = f(X_b \odot s(Up(A_l^k))) - f(X_b)
$$

And $s$ is the standardization function and $Up$ the upscaling function.


This approach main idea is to build the saliency map by weighting the activation signal of the last convolutional layer with its local contribution factor, which will focus solely on the regions that do contribute to the output
classification maximization while nullifying unrelated regions.

References:

- Wang H, Du M, Yang F, Zhang Z. Score-cam: Improved visual explanations via score-weighted class activation mapping. [arXiv preprint arXiv:1910.01279](https://arxiv.org/pdf/1910.01279.pdf), 2019.

In [None]:
LAST_CONV_LAYER = 'block14_sepconv2_act'

NUM_SAMPLES = 50
NOISE = .2

In [1]:
import tensorflow as tf

from config import config, data, model, utils
from config.utils import to_image, normalize

utils.setup_clean_image_plotting()

In [None]:
images = data.load_images()
x = model.preprocess(images)

utils.plot(to_image(images),
           titles=data.class_names)

In [None]:
nn = model.build()

logits = nn(x, training=False)
preds = tf.argmax(logits, axis=1)
probs = tf.nn.softmax(logits)

utils.print_predictions(model, images, top=2)

## Score-CAM

In [9]:
def activation_loss(y, units):
    return tf.gather(y, units, axis=1, batch_dims=1)

def standardize(x, axis=(1, 2)):
    x -= tf.reduce_min(x, axis=axis, keepdims=True)
    x /= tf.reduce_max(x, axis=axis, keepdims=True) + 1e-07
    return x

In [10]:
model_s = Model(model.inputs, model.outputs + [model.get_layer(LAST_CONV_LAYER).output])

In [11]:
def scorecam(inputs, units, activations=None):
    fb, a = model_s(inputs, training=False)
    fb = tf.nn.softmax(fb)
    fb = activation_loss(fb, units)

    if not activations:
        activations = a.shape[-1]

    # Always sort so we can break on the following loop.
    std = tf.math.reduce_std(a, axis=(1, 2))
    s = tf.argsort(std, axis=-1, direction='DESCENDING')[:, :activations]
    a = tf.gather(a, s, axis=-1, batch_dims=-1)

    a = tf.map_fn(lambda i: tf.image.resize(i, INPUT_SHAPE[:2]), a)
    an = standardize(a)

    s = tf.zeros(a.shape[:-1])

    for i in range(activations):
        c = an[..., i:i+1]

        an_min = tf.reduce_min(an[..., i:i+1], axis=(1, 2))
        an_max = tf.reduce_max(an[..., i:i+1], axis=(1, 2))

        if (an_min == an_max).numpy().all():
            break  # The NEXT filters have 0 var as well

        x = inputs * c # (B, H, W, C) x (B, H, W, 1)

        y = model(x, training=False)
        y = tf.nn.softmax(y)
        y = activation_loss(y, units)
        y = tf.reshape(y, (-1, 1, 1))

        y = y * a[..., i]

        s += y

    s = tf.nn.relu(s)
    return s / (tf.reduce_max(s, axis=(1, 2), keepdims=True) + 1e-07)

In [12]:
SAMPLES = 10

maps = tf.concat(
    [scorecam(images[ix:ix+1], preds[ix:ix+1], activations=512)
     for ix in range(min(SAMPLES, len(images)))],
    axis=0)

In [16]:
#@title

def plot_saliency(image, map, rows=1, cols=4, i0=0):
    plot([image, map, (image*map[..., np.newaxis]).astype('uint8')],
         rows=rows, cols=cols, i0=i0)
    plt.subplot(rows, cols, i0+4)
    draw_heatmap(image, map)


plt.figure(figsize=(12, 30))
for ix, (i, m) in enumerate(zip(as_image_vector(images[:SAMPLES]), maps.numpy())):
    plot_saliency(i, m, rows=SAMPLES, i0=ix*4)
plt.tight_layout()

Output hidden; open in https://colab.research.google.com to view.

### Vectorized Implementation

I have also written a vectorized version of the method.
Unfortunatelly, it's not very useful as the statement `inputs * b` will quickly consume all memory resources.

P.s.: you can probably use it if passing `activations=256`.

```python
@tf.function
def scorecam(inputs, units, activations=None):
    fb, a = model_s(inputs, training=False)
    fb = tf.nn.softmax(fb)
    fb = activation_loss(fb, units)

    if activations:
        std = tf.math.reduce_std(a, axis=(1, 2))
        s = tf.argsort(std, axis=-1, direction='DESCENDING')[:, :activations]
        a = tf.gather(a, s, axis=-1, batch_dims=-1)
    else:
        activations = a.shape[-1]

    a = tf.map_fn(lambda i: tf.image.resize(i, INPUT_SHAPE[:2]), a)
    b = standardize(a)
    b = tf.transpose(b, (0, 3, 1, 2))
    b = b[..., tf.newaxis]               # (B, A, H, W, 1)
    inputs = inputs[:, tf.newaxis, ...]  # (B, 1, H, W, C)

    b = tf.reshape(inputs * b, (-1, *INPUT_SHAPE))  # batchify (B*A, H, W, C)
    fm = model(b, training=False)
    fm = tf.nn.softmax(fm)
    fm = activation_loss(fm, tf.repeat(units, activations, axis=0))

    fm = tf.reshape(fm, (-1, 1, 1, activations)) # restore (B, A)
    s = tf.reduce_sum(fm*a, axis=-1)
    s = tf.nn.relu(s)

    return s / (tf.reduce_max(s, axis=(1, 2), keepdims=True) + 1e-07)
```