In [496]:
import cv2
import torch
import torch.nn as nn
from torchvision import models
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.autograd import Variable
from PIL import Image
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [497]:
img_transform = transforms.Compose([transforms.Resize((224, 224)),transforms.ToTensor(),
                                    transforms.Normalize((0.5,), (0.5,))])

In [498]:
IMAGE_PATH = "../datasets/UCMerced_LandUse/Images"
BATCH_SIZE = 32

In [499]:
dataset = datasets.ImageFolder(root=IMAGE_PATH, 
                               transform=img_transform,
                               target_transform=None)

len_dataset = len(dataset)

class_to_idx = dataset.class_to_idx
idx_to_class = {v: k for k, v in class_to_idx.items()}
class_to_idx, idx_to_class
num_classes = len(class_to_idx)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

In [500]:
class CNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU())
        self.layer2 = nn.Sequential(
            # nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            # nn.BatchNorm2d(64),
            # nn.ReLU(), 
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            # nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            # nn.BatchNorm2d(128),
            # nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer5 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU())
        self.layer6 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU())
        self.layer7 = nn.Sequential(
            # nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            # nn.BatchNorm2d(256),
            # nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer8 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer9 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer10 = nn.Sequential(
            # nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            # nn.BatchNorm2d(512),
            # nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer11 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer12 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer13 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(7*7*512, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7(out)
        out = self.layer8(out)
        out = self.layer9(out)
        out = self.layer10(out)
        out = self.layer11(out)
        out = self.layer12(out)
        out = self.layer13(out)
        print(out.shape)
        out = out.reshape(out.size(0), -1)
        print(out.shape)
        out = self.fc(out)
        print(out.shape)
        out = self.fc1(out)
        print(out.shape)
        out = self.fc2(out)
        print(out.shape)
        return out

In [501]:
# from cnn import CNN
model = CNN(num_classes=num_classes)
model.load_state_dict(torch.load('output_cnn/models/model_20250309_153723_9.pth'))

<All keys matched successfully>

In [502]:
model.eval()
with torch.inference_mode():
    out = model(torch.randn(32, 3, 224, 224))
    print(out)

torch.Size([32, 512, 7, 7])
torch.Size([32, 25088])
torch.Size([32, 4096])
torch.Size([32, 4096])
torch.Size([32, 21])
tensor([[-7.7499e+00,  9.8024e+00, -9.9454e+00, -1.0742e+01,  1.4080e+01,
         -1.8480e+01, -6.2377e+00, -1.4196e+01, -9.1115e+00, -1.3843e+01,
          1.8074e+01, -1.7902e+00, -2.5353e+00,  2.4369e+01,  2.9831e+00,
          3.1216e+00, -1.3741e+01, -9.3245e+00, -1.1815e+01,  1.3154e+01,
          1.6855e-01],
        [-7.8158e+00,  9.5003e+00, -9.9902e+00, -1.0983e+01,  1.4282e+01,
         -1.8640e+01, -6.0242e+00, -1.4356e+01, -9.3774e+00, -1.4100e+01,
          1.8679e+01, -1.6097e+00, -2.2638e+00,  2.5084e+01,  2.4221e+00,
          3.1875e+00, -1.3916e+01, -9.6320e+00, -1.2253e+01,  1.3199e+01,
          3.0897e-01],
        [-7.7135e+00,  9.5603e+00, -9.9049e+00, -1.0722e+01,  1.3939e+01,
         -1.8371e+01, -6.0899e+00, -1.4136e+01, -9.1754e+00, -1.3824e+01,
          1.8072e+01, -1.6492e+00, -2.3941e+00,  2.4634e+01,  2.8131e+00,
          3.0298e+00,

In [503]:
mod = nn.Sequential(*list(model.children())[:-3])


In [504]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net,self).__init__() 
        #img = images
        self.fc=nn.Linear(512,num_classes)

    
    def forward(self,x):   
        # print(x.shape)  
        x=x.view(-1, 512, 7*7)
        # print(x.shape)
        x = x.mean(2)
        # print(x.shape)
        x=self.fc(x)
        # print(x.shape)
        return  F.softmax(x,dim=1)

In [505]:
model=nn.Sequential(mod,Net(num_classes=num_classes))

In [506]:
trainable_parameters = []
for name, p in model.named_parameters():
    if "fc" in name:
        trainable_parameters.append(p)
optimizer = torch.optim.SGD(params=trainable_parameters, lr=0.1, momentum=1e-5)  
criterion = nn.CrossEntropyLoss()

In [507]:
num_epochs = 10

In [508]:
total_step = len(dataloader)
loss_list = []
acc_list = []

for epoch in range(num_epochs):
    for i, data in enumerate(dataloader):
        images, labels = data
        # Run the forward pass
        # if images.shape[0] != BATCH_SIZE:
        #     continue
        outputs = model(images)
        # print(outputs.shape)
        loss = criterion(outputs, labels)
        loss_list.append(loss.item())

        # Backprop 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Track the accuracy
        total = labels.size(0)
        _, predicted = torch.max(outputs.data, 1)
        correct = (predicted == labels).sum().item()
        acc_list.append(correct / total)

        if (i + 1) % 10 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
                  .format(epoch + 1, num_epochs, i + 1, total_step, loss.item(),
                          (correct / total) * 10))

Epoch [1/10], Step [10/66], Loss: 2.9874, Accuracy: 2.81%
Epoch [1/10], Step [20/66], Loss: 2.8929, Accuracy: 2.50%
Epoch [1/10], Step [30/66], Loss: 3.0568, Accuracy: 0.31%
Epoch [1/10], Step [40/66], Loss: 2.8964, Accuracy: 2.50%
Epoch [1/10], Step [50/66], Loss: 2.7709, Accuracy: 4.06%
Epoch [1/10], Step [60/66], Loss: 2.8789, Accuracy: 2.81%
Epoch [2/10], Step [10/66], Loss: 2.9277, Accuracy: 1.88%
Epoch [2/10], Step [20/66], Loss: 2.9378, Accuracy: 1.56%
Epoch [2/10], Step [30/66], Loss: 2.7754, Accuracy: 3.75%
Epoch [2/10], Step [40/66], Loss: 2.9084, Accuracy: 2.19%
Epoch [2/10], Step [50/66], Loss: 2.5622, Accuracy: 5.62%
Epoch [2/10], Step [60/66], Loss: 2.8019, Accuracy: 3.44%
Epoch [3/10], Step [10/66], Loss: 2.7082, Accuracy: 4.38%
Epoch [3/10], Step [20/66], Loss: 2.8606, Accuracy: 2.50%
Epoch [3/10], Step [30/66], Loss: 2.8735, Accuracy: 2.50%


KeyboardInterrupt: 

In [509]:
torch.save(model.state_dict(), 'cam.pth')

In [510]:
params = list(Net(num_classes=num_classes).parameters())
weight = np.squeeze(params[-1].data.numpy())

In [511]:
def return_CAM(feature_conv, weight, class_idx):
    # generate the class -activation maps upsample to 256x256
    size_upsample = (256, 256)
    bz, nc, h, w = feature_conv.shape
    output_cam = []
    for idx in class_idx:
        beforeDot =  feature_conv.reshape((nc, h*w))
        print("beforeDot", beforeDot.shape)
        cam = np.matmul(weight[idx], beforeDot)
        print("cam", cam.shape)
        cam = cam.reshape(h, w)
        print("cam2", cam.shape)
        print(cam)
        min = torch.min(cam)
        cam = cam - 1
        print("cam3", cam.shape)
        cam_img = cam / torch.max(cam)
        print("cam_img", cam_img.shape)
        cam_img = (255 * cam_img).to(torch.uint8)  
        print("cam_img2", cam_img.shape)
        print(cam_img)
        cam_img = cam_img.cpu().numpy()  # Convert to NumPy
        cam_img = cv2.resize(cam_img, size_upsample)  # Resize
        output_cam.append(cam_img)
    return output_cam

In [512]:
normalize = transforms.Normalize(
   mean=[0.485, 0.456, 0.406],
   std=[0.229, 0.224, 0.225]
)
preprocess = transforms.Compose([
   transforms.Resize((224,224)),
   transforms.ToTensor(),
   normalize
])

In [517]:
import glob

IMG_URL = glob.glob("../datasets/UCMerced_LandUse/Images/**/*.tif", recursive=True)


In [518]:
IMG_URL

['../datasets/UCMerced_LandUse/Images/forest/forest11.tif',
 '../datasets/UCMerced_LandUse/Images/forest/forest05.tif',
 '../datasets/UCMerced_LandUse/Images/forest/forest39.tif',
 '../datasets/UCMerced_LandUse/Images/forest/forest38.tif',
 '../datasets/UCMerced_LandUse/Images/forest/forest04.tif',
 '../datasets/UCMerced_LandUse/Images/forest/forest10.tif',
 '../datasets/UCMerced_LandUse/Images/forest/forest06.tif',
 '../datasets/UCMerced_LandUse/Images/forest/forest12.tif',
 '../datasets/UCMerced_LandUse/Images/forest/forest13.tif',
 '../datasets/UCMerced_LandUse/Images/forest/forest07.tif',
 '../datasets/UCMerced_LandUse/Images/forest/forest03.tif',
 '../datasets/UCMerced_LandUse/Images/forest/forest17.tif',
 '../datasets/UCMerced_LandUse/Images/forest/forest16.tif',
 '../datasets/UCMerced_LandUse/Images/forest/forest02.tif',
 '../datasets/UCMerced_LandUse/Images/forest/forest28.tif',
 '../datasets/UCMerced_LandUse/Images/forest/forest14.tif',
 '../datasets/UCMerced_LandUse/Images/fo

In [519]:
predicted_labels = []

In [520]:
from torchsummary import summary
summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
       BatchNorm2d-2         [-1, 64, 224, 224]             128
              ReLU-3         [-1, 64, 224, 224]               0
         MaxPool2d-4         [-1, 64, 112, 112]               0
            Conv2d-5        [-1, 128, 112, 112]          73,856
       BatchNorm2d-6        [-1, 128, 112, 112]             256
              ReLU-7        [-1, 128, 112, 112]               0
         MaxPool2d-8          [-1, 128, 56, 56]               0
            Conv2d-9          [-1, 256, 56, 56]         295,168
      BatchNorm2d-10          [-1, 256, 56, 56]             512
             ReLU-11          [-1, 256, 56, 56]               0
           Conv2d-12          [-1, 256, 56, 56]         590,080
      BatchNorm2d-13          [-1, 256, 56, 56]             512
             ReLU-14          [-1, 256,

In [524]:
from pathlib import Path
import os
save_path = Path("output_cam/")
os.makedirs(save_path, exist_ok=True)


In [528]:
import torch.nn.functional as F
import numpy as np
import cv2
from PIL import Image

predicted_labels = []

for i, fname in enumerate(IMG_URL):
    img_pil = Image.open(fname)
    img_tensor = img_transform(img_pil)
    print(img_tensor.shape)
    img_variable = img_tensor.unsqueeze(0)  # Add batch dimension
    print(img_variable.shape)
    
    logit = model(img_variable)
    h_x = F.softmax(logit, dim=1).data.squeeze()

    probs, idx = h_x.sort(0, True)
    probs = probs.cpu().detach().numpy()
    idx = idx.cpu().numpy()
    print(idx)
    print(idx.shape)

    predicted_labels.append(idx[0])
    predicted = dataloader.dataset.classes[idx[0]]
    
    print(f"Target: {fname} | Predicted: {predicted}")

    # Feature extraction and CAM generation
    features_blobs = mod(img_variable)
    print(features_blobs.shape)
    features_blobs = features_blobs  # Apply Global Average Pooling (GAP)
    print(features_blobs.shape)
    features_blobs1 = features_blobs.cpu().detach().numpy()
    print(features_blobs1.shape)
    print(weight.shape)
    # Correct way to access the fully connected layer
    last_layer = list(model.modules())[-1]  # Works for nested Sequential models
    if isinstance(last_layer, nn.Linear):
        weight = last_layer.weight.data
        print(weight.shape)
    
    CAMs = return_CAM(features_blobs1, weight, [idx[0]])

    # Load original image
    img = cv2.imread(fname)
    height, width, _ = img.shape

    # Apply heatmap
    heatmap = cv2.applyColorMap(cv2.resize(CAMs[0], (width, height)), cv2.COLORMAP_JET)
    result = heatmap * 0.5 + img * 0.5

    relative_path = Path(fname).relative_to("../datasets/UCMerced_LandUse/Images").with_suffix(".png")
    path = save_path / relative_path  # Construct new save path
    os.makedirs(path.parent, exist_ok=True)
    cv2.imwrite(str(path), result)


torch.Size([3, 224, 224])
torch.Size([1, 3, 224, 224])
[ 0  7 16  3  9 18  5  8 17 11 20 13  2 19 14 12 15  1  6 10  4]
(21,)
Target: ../datasets/UCMerced_LandUse/Images/forest/forest11.tif | Predicted: agricultural
torch.Size([1, 512, 7, 7])
torch.Size([1, 512, 7, 7])
(1, 512, 7, 7)
torch.Size([21, 512])
torch.Size([21, 512])
beforeDot (512, 49)
cam torch.Size([49])
cam2 torch.Size([7, 7])
tensor([[4.6179, 5.3135, 5.4558, 5.5335, 5.2692, 4.7758, 4.2874],
        [4.6983, 5.4250, 5.8176, 5.9768, 5.6731, 4.9110, 4.4570],
        [4.4896, 5.0121, 5.3995, 5.5338, 5.4373, 4.9998, 4.6816],
        [4.6793, 5.2287, 5.2869, 5.1230, 5.1964, 5.1371, 4.7991],
        [4.8996, 5.5182, 5.4684, 5.1012, 4.9731, 4.5473, 3.8820],
        [4.6474, 5.2443, 5.3725, 5.0810, 4.9112, 4.8099, 4.2946],
        [3.9471, 4.3829, 4.6117, 4.4843, 4.6177, 4.6735, 4.1741]])
cam3 torch.Size([7, 7])
cam_img torch.Size([7, 7])
cam_img2 torch.Size([7, 7])
tensor([[185, 221, 228, 232, 218, 193, 168],
        [189, 226, 