# ConvNet

Implementing the algorithm described in https://arxiv.org/pdf/1312.6034.pdf.

In [1]:
REPETITIONS = 50
NOISE = .2

## Setup

In [2]:
from google.colab import drive
drive.mount('/content/drive')

! cp -r '/content/drive/MyDrive/Colab Notebooks/cs-no/6.grads/config' .

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import tensorflow as tf
from config import config, data, model, utils

from config.utils import to_image, normalize

utils.setup_clean_image_plotting()

In [4]:
images = data.load_images()
x = model.preprocess(images)

## Guided Functions

Architectures defined in `tf.keras.applications` will load activation layers using one of the following forms:

- They will add a `Activation('relu')` layer, which will 
- They will add a custom concrete-class layer such as `tf.keras.layers.ReLU`

For the first case, we simply replace the layer's `activation` attribute with its guided version. As for the second form, we instantiate an object of the class `VersionAwareLayers` and override the object in the network's module (eg `tf.python.keras.applications.mobilenet_v2`). The module will instanciate the the updated ReLU layer when building.

In [5]:
from tensorflow.keras import backend as K
from tensorflow.python.keras.layers import VersionAwareLayers

@tf.custom_gradient
def guided_relu(x, max_value=None):
    def grad(dy):
        f = dy > 0
        r = x > 0
        if max_value is not None: r &= x <= max_value
        return dy * tf.cast(f & r, dy.dtype), None

    x = tf.nn.relu(x)

    if max_value is not None:
        zero = tf.constant(0, tf.float32)
        x = tf.clip_by_value(x, zero, max_value)

    return x, grad

@tf.custom_gradient
def guided_swish(x):
    def grad(dy):
        with tf.control_dependencies([dy]):
            sx = tf.math.sigmoid(x)

        f = dy > 0
        r = x > 0

        g = (sx * (1.0 + x * (1.0 - sx)))
        return dy * tf.cast(f & r, dy.dtype) * g

    return x * tf.math.sigmoid(x), grad

class GuidedReLU(tf.keras.layers.ReLU):
    def call(self, inputs):
        return guided_relu(inputs, self.max_value)

layers = VersionAwareLayers()
layers.ReLU = GuidedReLU

In [6]:
nn = model.build(layers=layers)

for l in nn.layers:
    if not hasattr(l, 'activation'):
        continue

    if l.activation is tf.keras.activations.relu:
        l.activation = guided_relu
    if l.activation is tf.keras.activations.swish:
        l.activation = guided_swish

logits = nn(x, training=False)
preds = tf.argmax(logits, axis=1)
probs = tf.nn.softmax(logits)

## Guided-Backpropagation

In [7]:
def activation_loss(y, units):
    return tf.gather(y, units, axis=1, batch_dims=1)

@tf.function
def gradients(inputs, units):
    with tf.GradientTape(watch_accessed_variables=True) as tape:
        tape.watch(inputs)
        y = nn(inputs)
        loss = activation_loss(y, units)
    
    grads = tape.gradient(loss, inputs, unconnected_gradients=tf.UnconnectedGradients.ZERO)

    return loss, grads

In [20]:
_, g = gradients(x, preds)

a = normalize(tf.reduce_sum(tf.abs(g), axis=-1))
p = normalize(tf.reduce_sum(tf.nn.relu(g), axis=-1))
n = normalize(tf.reduce_sum(tf.nn.relu(-g), axis=-1))

In [9]:
columns = [
    to_image(images),
    a.numpy(),
    p.numpy(),
    n.numpy(),
    to_image(images * a[..., tf.newaxis])
]
titles = ['original',
          'absolute gradients',
          'positive gradients',
          'negative gradients',
          'original * abs(gradients)']

utils.plot(sum(zip(*columns), ()),
           titles=titles,
           rows=len(images))

Output hidden; open in https://colab.research.google.com to view.