In [1]:
!pip install gradio --quiet
!pip install grad-cam --quiet
!pip install torch-lr-finder --quiet

Collecting grad-cam
  Using cached grad-cam-1.5.4.tar.gz (7.8 MB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting torch>=1.7.1 (from grad-cam)
  Using cached torch-2.5.1-cp311-cp311-manylinux1_x86_64.whl.metadata (28 kB)
Collecting torchvision>=0.8.2 (from grad-cam)
  Using cached torchvision-0.20.1-cp311-cp311-manylinux1_x86_64.whl.metadata (6.1 kB)
Collecting ttach (from grad-cam)
  Using cached ttach-0.0.3-py3-none-any.whl.metadata (5.2 kB)
Collecting opencv-python (from grad-cam)
  Using cached opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting matplotlib (from grad-cam)
  Using cached matplotlib-3.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting scikit-learn (from grad-cam)
  Using cached scikit_learn-1.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x

In [17]:
# Third-Party Imports
import numpy as np
import gradio as gr
import torch
from torchvision import transforms
import torchvision.models as models
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image

In [5]:
# Local Imports
from model.resnets import ResNet50
# from utils.visualize import FeatureMapVisualizer

In [12]:
example_directory = './assets/examples/'
dir = '/'
model_path = '/assets/models/resnet50_v2_imagenet.pth'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [42]:
# Get ImageNet class names from ResNet50 weights
classes = models.ResNet50_Weights.IMAGENET1K_V2.meta["categories"]

# model = ResNet50(num_classes=1000)
# state_dict = torch.load(model_path, map_location=torch.device('cpu'))
# model.load_state_dict(state_dict, strict=False)
# model.eval()
# model.to(device)

# Load the pretrained ResNet-50 v2 model
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
model.eval()
model.to(device)


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [43]:
print(classes)

['tench', 'goldfish', 'great white shark', 'tiger shark', 'hammerhead', 'electric ray', 'stingray', 'cock', 'hen', 'ostrich', 'brambling', 'goldfinch', 'house finch', 'junco', 'indigo bunting', 'robin', 'bulbul', 'jay', 'magpie', 'chickadee', 'water ouzel', 'kite', 'bald eagle', 'vulture', 'great grey owl', 'European fire salamander', 'common newt', 'eft', 'spotted salamander', 'axolotl', 'bullfrog', 'tree frog', 'tailed frog', 'loggerhead', 'leatherback turtle', 'mud turtle', 'terrapin', 'box turtle', 'banded gecko', 'common iguana', 'American chameleon', 'whiptail', 'agama', 'frilled lizard', 'alligator lizard', 'Gila monster', 'green lizard', 'African chameleon', 'Komodo dragon', 'African crocodile', 'American alligator', 'triceratops', 'thunder snake', 'ringneck snake', 'hognose snake', 'green snake', 'king snake', 'garter snake', 'water snake', 'vine snake', 'night snake', 'boa constrictor', 'rock python', 'Indian cobra', 'green mamba', 'sea snake', 'horned viper', 'diamondback', 

## GradCam

In [56]:
from collections import OrderedDict
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget


def inference(input_img,
              transparency=0.5,
              number_of_top_classes=3,
              target_layer_number=4):
    """
    Function to run inference on the input image
    :param input_img: Image provided by the user
    :parma transparency: Percentage of cam overlap over the input image
    :param number_of_top_classes: Number of top predictions for the input image
    :param target_layer_number: Layer for which GradCam to be shown
    """
    # Save a copy of input img
    org_img = input_img.copy()

    # Calculate mean over each channel of input image
    mean_r, mean_g, mean_b = np.mean(input_img[:, :, 0]/255.), np.mean(input_img[:, :, 1]/255.), np.mean(input_img[:, :, 2]/255.)

    # Calculate Standard deviation over each channel
    std_r, std_g, std_b = np.std(input_img[:, :, 0]/255.), np.std(input_img[:, :, 1]/255.), np.std(input_img[:, :, 2]/255.)
    
    # Convert img to tensor and normalize it
    _transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((mean_r, mean_g, mean_b), (std_r, std_g, std_b))
        ])

    # Preprocess the input image
    input_tensor = _transform(input_img)
    
    # Create a mini-batch as expected by the model
    input_tensor = input_tensor.unsqueeze(0)
    
    # Move the input and model to GPU if available
    input_tensor = input_tensor.to(device)
    model.to(device)

    # Get Model Predictions
    with torch.no_grad():
        outputs = model(input_tensor)
        probabilities = torch.softmax(outputs, dim=1)[0]
        confidences = {classes[i]: float(probabilities[i]) for i in range(1000)}

    # Select the top classes based on user input
    sorted_confidences = sorted(confidences.items(), key=lambda val: val[1], reverse=True)
    show_confidences = OrderedDict(sorted_confidences[:number_of_top_classes])

    # Map layer numbers to meaningful parts of the ResNet architecture
    _layers = {
        1: model.conv1,          # Initial convolution layer
        2: model.layer1[-1],     # Last bottleneck of first residual block
        3: model.layer2[-1],     # Last bottleneck of second residual block
        4: model.layer3[-1],     # Last bottleneck of third residual block
        5: model.layer4[-1],     # Last bottleneck of fourth residual block
        6: model.layer4[-1]      # Changed from fc to last conv layer for better visualization
    }
    
    # Ensure valid layer selection
    target_layer_number = min(max(target_layer_number, 1), 6)
    target_layers = [_layers[target_layer_number]]

    # Get the class activations from the selected layer
    cam = GradCAM(model=model, target_layers=target_layers)
    
    # Get the most probable class index
    top_class = max(confidences.items(), key=lambda x: x[1])[0]
    class_idx = classes.index(top_class)
    
    # Generate GradCAM for the top predicted class
    grayscale_cam = cam(input_tensor=input_tensor, 
                       targets=[ClassifierOutputTarget(class_idx)],
                       aug_smooth=True,
                       eigen_smooth=True)
    grayscale_cam = grayscale_cam[0, :]

    # Overlay input image with Class activations
    visualization = show_cam_on_image(org_img/255., grayscale_cam, use_rgb=True, image_weight=transparency)
    return show_confidences, visualization

## Feature Maps and Kernels

In [57]:
# Create an object of the Class
viz = FeatureMapVisualizer(model)

def feature_maps(input_img, kernel_number=32):
    """
    Function to return feature maps for the selected image
    :param kernel_number: Number of kernel in all 6 layers
    """
    # Calculate mean over each channel of input image
    mean_r, mean_g, mean_b = np.mean(input_img[:, :, 0]/255.), np.mean(input_img[:, :, 1]/255.), np.mean(input_img[:, :, 2]/255.)

    # Calculate Standard deviation over each channel
    std_r, std_g, std_b = np.std(input_img[:, :, 0]/255.), np.std(input_img[:, :, 1]/255.), np.std(input_img[:, :, 2]/255.)

    # Convert img to tensor and normalize it
    _transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((mean_r, mean_g, mean_b), (std_r, std_g, std_b))
        ])

    # Apply transforms on the input image
    input_img = _transform(input_img)

    # Visualize feature maps for kernel number 32
    plt = viz.visualize_feature_map_of_kernel(image=input_img, kernel_number=kernel_number)
    return plt

def get_kernels(layer_number):
    """
    Function to get the kernels from the layer
    :param layer_number: Number of layer from which kernels to be visualized
    """
    # Visualize kernels from layer
    plt = viz.visualize_kernels_from_layer(layer_number=layer_number)
    return plt

NameError: name 'FeatureMapVisualizer' is not defined

## App

In [59]:
# with gr.Blocks() as demo:
#     gr.Markdown(
#         """
#         # ImageNet-1K trained on ResNet50v2
#         """
#     )

#     # #############################################################################
#     # ################################ GradCam Tab ################################
#     # #############################################################################
#     with gr.Tab("GradCam"):
#         gr.Markdown(
#             """
#             Visualize Class Activations Maps generated by the model's layer for the predicted class.
#             This is used to see what the model is actually looking at in the image.
#             """
#         )
#         with gr.Row():
#             # Update the image input dimensions
#             img_input = [gr.Image(label="Input Image", type="numpy", height=224)]  # Changed dimensions
#             gradcam_outputs = [
#                 gr.Label(label="Predictions"),
#                 gr.Image(label="GradCAM Output", height=224)  # Match input image height
#             ]

#         with gr.Row():
#             gradcam_inputs = [
#                 gr.Slider(0, 1, value=0.5, label="Activation Map Transparency"),
#                 gr.Slider(1, 10, value=3, step=1, label="Number of Top Predictions"),
#                 gr.Slider(1, 6, value=4, step=1, label="Target Layer Number")
#             ]

#         gradcam_button = gr.Button("Generate GradCAM")
#         gradcam_button.click(inference, inputs=img_input + gradcam_inputs, outputs=gradcam_outputs)

#         gr.Markdown("## Examples")
#         gr.Examples(
#         [
#             "./assets/examples/dog.jpg",  # Update paths to be relative
#             "./assets/examples/cat.jpg",
#             "./assets/examples/frog.jpg",
#             "./assets/examples/bird.jpg",
#             "./assets/examples/shark-plane.jpg",
#             "./assets/examples/car.jpg",
#             "./assets/examples/truck.jpg",
#             "./assets/examples/horse.jpg",
#             "./assets/examples/plane.jpg",
#             "./assets/examples/ship.png"
#         ],
#         inputs=img_input,
#         fn=inference
#     )

#     # ################################################################################################
#     # ################################ Feature Maps Visualization Tab ################################
#     # ################################################################################################
#     # with gr.Tab("Feature Map Visualization"):
#     #     gr.Markdown(
#     #         """
#     #         The model has 6 convolutional blocks. Each block has two or three convolutional layers
#     #         From each block's first convolutional layer, output of specific kernel number is visualized
#     #         In the below images `l1` represents first block and `kx` represents the number of kerenel from the first convolutional layer of that block
#     #         """
#     #     )
#     #     with gr.Column():
#     #         feature_map_input = [gr.Image(shape=(32, 32), label="Feature Map Input Image"),
#     #                                 gr.Slider(1, 32, value=16, step=1,
#     #                                         label="Select a Kernel number whose Features Maps from all 6 block's to be shown")]
#     #         map = gr.Plot()
#     #         feature_map_button = gr.Button("Visualize FeatureMaps")
#     #     feature_map_button.click(feature_maps, inputs=feature_map_input, outputs=map)

#     # # ##########################################################################################
#     # # ################################ Kernel Visualization Tab ################################
#     # # ##########################################################################################
#     # with gr.Tab("Kernel Visualization"):
#     #     gr.Markdown(
#     #         """
#     #         The model has 6 convolutional blocks. Each block has two or three convolutional layers
#     #         Some of the Kernels from the first convolutional layer of selected block number are visualized below
#     #         """
#     #     )
#     #     with gr.Column():
#     #         kernel_input = [
#     #             gr.Slider(1, 4, value=2, step=1, label="Select a block number whose first convolutional layer's Kernels to be shown")]
#     #         map = gr.Plot()
#     #         kernel_button = gr.Button("Visualize Kernels")

#     #     kernel_button.click(get_kernels, inputs=kernel_input, outputs=map)

# gr.close_all()
# demo.launch(debug=True)

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.
