<a href="https://colab.research.google.com/github/phaninandula/ERA-Session11/blob/main/Grad_CAM_Understanding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# GRAD-CAM STEPS
Steps of Grad-CAM
1. Capture the output of the last convolution layer of the network.
2. Take gradient of last convolution layer with respect to prediction probability. (We can take predictions with respect to any class we want. In our case, we'll take prediction with the highest probability. We can look at other probabilities as well)
3. Average gradients calculated in the previous step at axis which has the same dimension as output channels of last convolution layer. The output of this step will be 1D array that has the same numbers as that of output channels of the last convolution layer.
4. Multiply convolution layer output with averaged gradients from the previous step at output channel level, i.e. first channel output should be multiplied with first averaged value, second should be multiplied with the second value, and so on.
5. Average output from the previous step at channel level to create 2D heatmap that has the same dimension as that of image.
6. Normalize heatmap (Optional step but recommended as it helps improve results).

In [1]:
import torch

print("PyTorch Version : {}".format(torch.__version__))

PyTorch Version : 2.0.1+cu118


In [2]:
from tensorflow import keras
from sklearn.model_selection import train_test_split

(X_train, Y_train), (X_test, Y_test) = keras.datasets.fashion_mnist.load_data()

X_train, X_test, Y_train, Y_test = torch.tensor(X_train, dtype=torch.float32),\
                                   torch.tensor(X_test, dtype=torch.float32),\
                                   torch.tensor(Y_train, dtype=torch.long),\
                                   torch.tensor(Y_test, dtype=torch.long)

X_train, X_test = X_train.reshape(-1,1,28,28), X_test.reshape(-1,1,28,28)

X_train, X_test = X_train/255.0, X_test/255.0

classes =  Y_train.unique()
class_labels = ["T-shirt/top","Trouser","Pullover","Dress","Coat","Sandal","Shirt","Sneaker","Bag","Ankle boot"]
mapping = dict(zip(classes.numpy(), class_labels))

X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


(torch.Size([60000, 1, 28, 28]),
 torch.Size([10000, 1, 28, 28]),
 torch.Size([60000]),
 torch.Size([10000]))

## Data Loader

In [3]:
from torch.utils.data import TensorDataset, DataLoader

train_dataset = TensorDataset(X_train, Y_train)
test_dataset  = TensorDataset(X_test , Y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

## CNN Model

In [4]:
from torch import nn
import torch.nn as nn

class ConvNetModel(nn.Module):
  def __init__(self):
    super(ConvNetModel,self).__init__()
    self.seq = nn.Sequential(
        nn.Conv2d(in_channels=1, out_channels=48,kernel_size=(3,3), padding = 'same', bias=False),
        nn.ReLU(),

        nn.Conv2d(in_channels=48,out_channels=32,kernel_size=(3,3), padding = 'same', bias=False),
        nn.ReLU(),

        nn.Conv2d(in_channels=32,out_channels=16,kernel_size=(3,3), padding = 'same', bias=False),
        nn.ReLU(),

        nn.Flatten(),
        nn.Linear(16*28*28, 10)
    )

  def forward(self, x_batch):
    return self.seq(x_batch)

convnet = ConvNetModel()
convnet

ConvNetModel(
  (seq): Sequential(
    (0): Conv2d(1, 48, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
    (1): ReLU()
    (2): Conv2d(48, 32, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
    (3): ReLU()
    (4): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
    (5): ReLU()
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): Linear(in_features=12544, out_features=10, bias=True)
  )
)

## Train Network

In [5]:
from sklearn.metrics import accuracy_score
from tqdm import tqdm
import gc

def CalcValLoss(model, loss_func, val_loader):
    with torch.no_grad(): ## Prevents calculation of gradients
        val_losses = []
        for X_batch, Y_batch in val_loader:
            preds = model(X_batch)
            loss = loss_func(preds, Y_batch)
            val_losses.append(loss)
        print("Valid CategoricalCrossEntropy : {:.3f}".format(torch.tensor(val_losses).mean()))

def MakePredictions(model, loader):
    preds, Y_shuffled = [], []
    for X_batch, Y_batch in loader:
        preds.append(model(X_batch))
        Y_shuffled.append(Y_batch)

    preds = torch.cat(preds).argmax(axis=-1)
    Y_shuffled = torch.cat(Y_shuffled)
    return Y_shuffled, preds

def TrainModelInBatchesV1(model, loss_func, optimizer, train_loader, val_loader, epochs=5):
    for i in range(epochs):
        losses = [] ## Record loss of each batch
        for X_batch, Y_batch in tqdm(train_loader):
            preds = model(X_batch) ## Make Predictions by forward pass through network

            loss = loss_func(preds, Y_batch) ## Calculate Loss
            losses.append(loss) ## Record Loss

            optimizer.zero_grad() ## Zero weights before calculating gradients
            loss.backward() ## Calculate Gradients
            optimizer.step() ## Update Weights

        print("Train CategoricalCrossEntropy : {:.3f}".format(torch.tensor(losses).mean()))
        CalcValLoss(model, loss_func, val_loader)

        Y_test_shuffled, test_preds = MakePredictions(model, val_loader)
        val_acc = accuracy_score(Y_test_shuffled, test_preds)
        print("Val  Accuracy : {:.3f}".format(val_acc))
        #gc.collect()

In [None]:
from torch.optim import SGD, RMSprop, Adam

#torch.manual_seed(42) ##For reproducibility.This will make sure that same random weights are initialized each time.
epochs = 3
learning_rate = torch.tensor(1e-3) # 0.001

conv_net = ConvNetModel()
cross_entropy_loss = nn.CrossEntropyLoss()
optimizer = Adam(params=conv_net.parameters(), lr=learning_rate)

TrainModelInBatchesV1(conv_net, cross_entropy_loss, optimizer, train_loader, test_loader,epochs)

 62%|██████▏   | 577/938 [02:06<26:35,  4.42s/it]

## Grad-CAM implementation

## Step1: Capture Output Of Last Convolution Layer

In [None]:
list(conv_net.children())

In [None]:
list(conv_net.children())[0]

In [None]:
class Conv_layer_Selection(nn.Module):
  def __init__(self):
    super(Conv_layer_Selection,self).__init__()
    self.layers = list(list(conv_net.children())[0].children())

  def forward(self,layer_num,X_batch):
    x = self.layers[0](X_batch)
    conv_layer_output = None

    for i,layer in enumerate(self.layers):
      x = layer(x) # helps to pass the data through the layers till it reaches the layer_num of interest and stops
      if i == layer_num:
        self.conv_layer_output = x
    return x

## Passing one random image through the Trained Network and

In [None]:
import numpy as np

conv_model = LastConvLayerModel()
idx = np.random.choice(range(10000))
pred = conv_model(X_test[idx:idx+1],layer_num=)

F.softmax(pred, dim=-1).argmax(), F.softmax(pred, dim=-1).max()

In [None]:
conv_model.conv_layer_output.shape # Should have 16 filters at the end