# Attribution Maps for FashionMNIST
In this notebook, you will investigate why the neural network attributes a given label to a particular piece of fashion.

The start is the same as in the `CNN_Fashion` notebook.

## Preparations

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist

# Load the dataset
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

# Preprocess the data
train_images = train_images / 255.0
test_images = test_images / 255.0

# Add a channel dimension
train_images = train_images[..., tf.newaxis]
test_images = test_images[..., tf.newaxis]

The code below displays some images in the training and testing sets

In [None]:
import matplotlib.pyplot as plt

fashion_mnist_labels = {
    0: "T-shirt/top",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle boot"
}

# Function to display images
def display_images(images, labels, num_images=5):
    plt.figure(figsize=(10,10))
    for i in range(num_images):
        plt.subplot(1, num_images, i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(images[i].reshape(28, 28), cmap=plt.cm.binary)
        plt.xlabel(fashion_mnist_labels[labels[i]])
    plt.show()

# Display images from the training set
display_images(train_images, train_labels)

# Display images from the testing set
display_images(test_images, test_labels)

## The network
We will work with the network below to predict the label for the fashion items

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Assuming you've already filtered train_images, train_labels, test_images, and test_labels
# And added a channel dimension to your images

# Define the CNN model
model = models.Sequential([
    # Convolutional base
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),

    # Dense layers
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Model summary
model.summary()

# Train the model
history = model.fit(train_images, train_labels, epochs=10,
                    validation_data=(test_images, test_labels))

# Evaluate the model
test_loss, test_acc = model.evaluate(test_images, test_labels)
print('\nTest accuracy:', test_acc)


## Visualising Internal Parameters
Looping through the layers of the notebook, we can visualise the convolution filters the network has learned:

In [None]:
import numpy as np

# Iterate thru all the layers of the model
for layer in model.layers:
    if 'conv' in layer.name:
        weights, bias= layer.get_weights()
        print(layer.name, weights.shape)

        filters = weights
        # optional: normalize filter values between  0 and 1 for visualization
        # f_min = weights.min()
        # f_max = weights.max()
        # filters = (weights - f_min) / (f_max - f_min)
        nFiltersInLayer = filters.shape[3]
        print(nFiltersInLayer)
        filter_cnt=1

        figCols = int(np.ceil(np.sqrt(nFiltersInLayer)))
        figRows = int(np.ceil(nFiltersInLayer/figCols))

        # plotting all the filters
        for i in range(nFiltersInLayer):
            # get the filters
            filt = filters[:, :, :, i]
            ax = plt.subplot(figRows, figCols, filter_cnt)
            ax.set_xticks([])
            ax.set_yticks([])
            plt.imshow(filt[:, :, 0])
            filter_cnt+=1

        plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)
        cax = plt.axes([0.85, 0.1, 0.025, 0.8])
        plt.colorbar(cax=cax)
        plt.show()

## Feature Maps
Also, we can loop through the network layers and look at how an input image looks like after the given layer:

In [None]:
for layer in model.layers:
    print(layer.name)
    if layer.name == 'flatten':
        # skip the flattening layer
        continue

    # Build a new model 'model_internal' that consists of all layers of the original model until the current layer
    model_internal = models.Model(inputs=model.inputs , outputs=layer.output)
    # The predictions of the 'model_internal' model are how a given input 'looks' for the model:
    feature_map_layer = model_internal.predict(train_images[1:2])

    # visualize the internal representation, depending on its shape:
    if len(feature_map_layer.shape)==1:
        continue

    nFiltersInLayer = feature_map_layer.shape[-1]
    print(nFiltersInLayer)
    filter_cnt=1

    figCols = int(np.ceil(np.sqrt(nFiltersInLayer)))
    figRows = int(np.ceil(nFiltersInLayer/figCols))

    # plotting all the filters
    if len(feature_map_layer.shape)==4:
        for i in range(nFiltersInLayer):
            # get the filters
            filt = feature_map_layer[:, :, :, i]
            ax = plt.subplot(figRows, figCols, filter_cnt)
            ax.set_xticks([])
            ax.set_yticks([])
            plt.imshow(filt.squeeze())
            filter_cnt+=1

        plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)
        cax = plt.axes([0.85, 0.1, 0.025, 0.8])
        plt.colorbar(cax=cax)
    elif len(feature_map_layer.shape)==2:
        plt.bar(range(feature_map_layer.size), np.squeeze(feature_map_layer))
    if filt.size==1:
        continue

    plt.title(layer.name)
    plt.show()

## Examples with highest likelihood
For better interpretability, it might also be helpful to find out which examples of a given class the model most confidently classifies correctly:

In [None]:
model_preds = model.predict(test_images)
test_labels_est = np.argmax(model_preds, axis=1)

In [None]:
target_class = 1
target_idx = np.where(test_labels==target_class)

In [None]:
model_preds_4target = model_preds[ target_idx, target_class ].squeeze()
model_preds_4target

In [None]:
extreme_vals = np.argsort(model_preds_4target)
model_preds_4target[extreme_vals]

In [None]:
num_images = 6
model_preds_4target[extreme_vals]

least_likely_idx = target_idx[0][extreme_vals[:num_images]]
most_likely_idx = target_idx[0][extreme_vals[-num_images:]]

In [None]:
display_images(test_images[least_likely_idx], test_labels[least_likely_idx],
               num_images=num_images)

In [None]:
display_images(test_images[most_likely_idx], test_labels[most_likely_idx],
               num_images=num_images)

## Saliency Map
The calculation of the saliency map is rather technical. We provide the code below with a few explanations. Some of the material here is from https://usmanr149.github.io/urmlblog/cnn/2020/05/01/Salincy-Maps.html.

We will be calculating the saliency map for a few misclassified images. To do so, we first determine the indices of all images that have been misclassified:

In [None]:
misclassifications = np.where(test_labels_est != test_labels.squeeze())[0]
len(misclassifications)

Now we choose the indices of the images we want to get the saliency map for. Here we take the first 12 misclassified images - feel free to change this!

In [None]:
misclass_idx = misclassifications[:12]

The code below calculates and visualizes the saliency map besides the original image, for the images with index given in the list `misclass_idx`.

In [None]:
nMisClass = len(misclass_idx)
figCols = int(2*np.ceil(np.sqrt(2*nMisClass)/2))
figRows = int(np.ceil(2*nMisClass/figCols))
mc_count = 0

fig, axs = plt.subplots(figRows, figCols, figsize=(10, 10))
fig.suptitle('Misclassifications')

for mc in misclass_idx:
    # define image as variable
    my_img = tf.Variable(test_images[mc:(mc+1)], dtype=float)

    # calculate gradient of the output predictions with respect to the image
    with tf.GradientTape() as tape:
        pred = model(my_img, training=False)
        class_idxs_sorted = np.argsort(pred.numpy().flatten())[::-1]
        loss = pred[0][class_idxs_sorted[0]]

    grads = tape.gradient(loss, my_img)
    dgrad_abs = tf.math.abs(grads)
    dgrad_max_ = np.max(dgrad_abs, axis=3)[0]

    # normalize to range between 0 and 1
    arr_min, arr_max  = np.min(dgrad_max_), np.max(dgrad_max_)
    grad_eval = (dgrad_max_ - arr_min) / (arr_max - arr_min + 1e-18)

    # display the input image
    axs[mc_count // figCols, mc_count % figCols].set_xticks([])
    axs[mc_count // figCols, mc_count % figCols].set_yticks([])
    title_str = str(mc) + ': est: ' + fashion_mnist_labels[test_labels_est[mc]] \
                + '\ntrue:  ' + fashion_mnist_labels[test_labels[mc]]
    axs[mc_count // figCols, mc_count % figCols].set_title(title_str)
    axs[mc_count // figCols, mc_count % figCols].imshow(np.squeeze(my_img), cmap=plt.cm.binary)

    # display the gradient
    axs[(mc_count+1) // figCols, (mc_count+1) % figCols].set_xticks([])
    axs[(mc_count+1) // figCols, (mc_count+1) % figCols].set_yticks([])
    axs[(mc_count+1) // figCols, (mc_count+1) % figCols].imshow(grad_eval, cmap='jet')

    mc_count += 2

for mc_c in range(mc_count, figRows*figCols):
    axs[(mc_c) // figCols, (mc_c) % figCols].axis('off')

**Exercises:**
- Looking at the original images and the saliency maps above, can you understand why the model misclassified these images?
- You might also look at the saliency map of some of the *correctly* classified images. Do these classification results look reliable?