# CAM and Object Detection

## Authors: 
Sat Arora \
Richard Fan

### Project Goal:
"CAM and object detection". First, you should implement some standard method for CAM for some (simple) classification network trained on image-level tags. You should also obtain object detection (spacial localization of the object approximate "center"). You should apply your approach to one specific object type (e.g. faces, or anything else). Training should be done on image-level tags (e.g. face, no face). You can come up with your specialized dataset, but feel free to use subsets of standard data. You can also test the ideas on real datasets where label noise is presemt

In [1]:
import torch
import torchvision.models as models
import cv2
import numpy as np
from torchvision import transforms
import torch.nn as nn


In [None]:
# Pre-trained model used: ResNet 18
model = models.resnet(pretrained=True)
final_convolution_layer = 'layer4'
model.eval()




In [None]:
img = cv2.imread('river_hand.jpeg')

if img is not None:
    print("Image loaded successfully!")
else:
    print("Unable to load the image. Please check the file path.")
    
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

In [None]:
import torch
import torchvision.models as models
import cv2
import numpy as np
from torchvision import transforms
import torch.nn as nn

# Load pre-trained model
model = models.resnet50(pretrained=True)
model.eval()

# Remove the fully connected layer
model = nn.Sequential(*list(model.children())[:-2])

# Load and preprocess the image
img = cv2.imread('image_1.jpg')

if img is not None:
    print("Image loaded successfully!")
else:
    print("Unable to load the image. Please check the file path.")

img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
preprocess = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_img = preprocess(img).unsqueeze(0)

# Forward pass to get feature maps
with torch.no_grad():
    feature_maps = model(input_img)

# Get the weights of the final convolutional layer
final_conv_layer = None
for layer in reversed(model):
    if isinstance(layer, torch.nn.modules.conv.Conv2d):
        final_conv_layer = layer
        break

if final_conv_layer is None:
    raise ValueError("Final convolutional layer not found in the model.")

final_conv_layer_weights = final_conv_layer.weight.detach().cpu()

# Compute the class activation map (CAM)
cam = np.zeros((feature_maps.shape[2], feature_maps.shape[3]), dtype=np.float32)
for i in range(final_conv_layer_weights.size(0)):
    weight = final_conv_layer_weights[i].detach().cpu().numpy()
    cam += np.sum(weight * feature_maps.squeeze(0)[i].cpu().numpy(), axis=0)

cam = np.maximum(cam, 0)  # ReLU activation
cam = cv2.resize(cam, (img.shape[1], img.shape[0]))
cam = cam - np.min(cam)
cam = cam / np.max(cam)

# Apply heatmap on the original image
heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
print(heatmap.shape)
superimposed_img = heatmap * 0.4 + img.astype('float32') * 0.6
superimposed_img = superimposed_img / superimposed_img.max()

# Display the original image and the image with the heatmap
cv2.imshow('Original Image', img)
cv2.imshow('CAM', np.uint8(255 * superimposed_img))
cv2.waitKey(0)
cv2.destroyAllWindows()
