In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms.functional as TF
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
import torch.optim as optim
import albumentations as A
from albumentations.pytorch import ToTensorV2
DEBUG = True


In [None]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels, out_channels, kernel_size=3,
            stride=stride, padding=1, bias=False
        )
        self.bn1 = nn.BatchNorm2d(out_channels)

        self.conv2 = nn.Conv2d(
            out_channels, out_channels, kernel_size=3,
            stride=1, padding=1, bias=False
        )
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


In [None]:
class MyResNet(nn.Module):
    def __init__(self, block=BasicBlock, num_blocks=[2,2,2,2], num_classes=2, in_channels=1):
        super(MyResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(
            in_channels, 64, kernel_size=7, stride=2,
            padding=3, bias=False
        )
        self.bn1 = nn.BatchNorm2d(64)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 64,  num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride):
        layers = []
        layers.append(block(self.in_planes, out_channels, stride=stride))
        self.in_planes = out_channels
        for _ in range(1, blocks):
            layers.append(block(out_channels, out_channels, stride=1))
        return nn.Sequential(*layers)

    def forward_features(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.pool1(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        return out

    def forward(self, x):
        out = self.forward_features(x)
        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        out = self.fc(out)
        return out

def build_model():
    model = MyResNet(
        block=BasicBlock,
        num_blocks=[2,2,2,2],  
        num_classes=2,
        in_channels=1  
    )
    return model


In [None]:
train_transform_alb = A.Compose([
    A.Resize(224, 224),
    A.RandomRotate90(p=0.2),            
    A.HorizontalFlip(p=0.5),            
    A.RandomBrightnessContrast(p=0.2),  
    A.Normalize(mean=(0.5,), std=(0.25,)),  
    ToTensorV2()
],
    bbox_params=A.BboxParams(
        format='pascal_voc',
        min_area=0,
        min_visibility=0,
        label_fields=['class_labels']
    )
)

val_transform_alb = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.5,), std=(0.25,)),
    ToTensorV2()
],
    bbox_params=A.BboxParams(
        format='pascal_voc',
        label_fields=['class_labels']
    )
)


In [None]:

class PneumoniaDataset(Dataset):
    def __init__(self, csv_path, img_dir, transform=None, is_train=True):
        super().__init__()
        self.df = pd.read_csv(csv_path)
        self.img_dir = img_dir
        self.transform = transform
        self.is_train = is_train

        grouped = self.df.groupby('patientId')
        self.records = []
        for pid, group in grouped:
            target_vals = group['Target'].values
            label = int(np.any(target_vals == 1))

            bboxes = []
            for idx, row in group.iterrows():
                if row['Target'] == 1:
                    x, y, w, h = row['x'], row['y'], row['width'], row['height']
                    x_min, y_min = x, y
                    x_max, y_max = x + w, y + h
                    bboxes.append([x_min, y_min, x_max, y_max])

            self.records.append({
                'patientId': pid,
                'label': label,
                'bboxes': bboxes
            })

    def __len__(self):
        return len(self.records)

    def __getitem__(self, idx):
        record = self.records[idx]
        pid = record['patientId']
        label = record['label']
        bboxes = record['bboxes']  

        img_path = os.path.join(self.img_dir, pid + '.png')
        image_pil = Image.open(img_path).convert('L')  
        image_np = np.array(image_pil)  


        image_np = np.expand_dims(image_np, axis=-1)  

        class_labels = [1]*len(bboxes)  
        if self.transform:
            transformed = self.transform(
                image=image_np,
                bboxes=bboxes,
                class_labels=class_labels
            )
            aug_image = transformed['image']           
            aug_bboxes = transformed['bboxes']         
        else:
            aug_image = torch.tensor(image_np).permute(2,0,1).float()/255.0
            aug_bboxes = bboxes
        final_bboxes = []
        for (x_min, y_min, x_max, y_max) in aug_bboxes:
            w = x_max - x_min
            h = y_max - y_min
            final_bboxes.append((x_min, y_min, w, h))

        return aug_image, label, final_bboxes


In [None]:
def compute_gradcam(model, input_tensor, class_idx=1):
    model.eval()  
    conv_features = []
    conv_grads = []
    
    def forward_hook(module, input, output):
        conv_features.append(output)
    def backward_hook(module, grad_input, grad_output):
        conv_grads.append(grad_output[0])
    
    forward_handle = model.layer4.register_forward_hook(forward_hook)
    backward_handle = model.layer4.register_full_backward_hook(backward_hook)
    
    logits = model(input_tensor)  
    chosen_logit = logits[:, class_idx].sum()
    
    model.zero_grad()
    chosen_logit.backward(retain_graph=True)
    
    features = conv_features[0]  
    grads = conv_grads[0]      
    
    weights = grads.view(grads.size(0), grads.size(1), -1).mean(dim=2)  
    
    B, C, H, W = features.shape
    gradcam = torch.zeros((B, 1, H, W), device=features.device)
    for i in range(B):
        for c in range(C):
            gradcam[i, 0] += weights[i, c] * features[i, c]
    
    gradcam = torch.relu(gradcam)
    max_vals = gradcam.view(B, -1).max(dim=1, keepdim=True)[0].view(B, 1, 1, 1)
    gradcam = gradcam / (max_vals + 1e-8) 
    
    forward_handle.remove()
    backward_handle.remove()
    
    model.train()  
    return gradcam


In [None]:
def bboxes_to_mask(bboxes, img_size=(224,224), feature_size=(7,7)):
    mask = np.zeros(feature_size, dtype=np.float32)

    scale_x = feature_size[1] / img_size[1]
    scale_y = feature_size[0] / img_size[0]

    for (x, y, w, h) in bboxes:
        x1 = int(x * scale_x)
        y1 = int(y * scale_y)
        x2 = int((x + w) * scale_x)
        y2 = int((y + h) * scale_y)

        x1, x2 = max(0, x1), min(feature_size[1], x2)
        y1, y2 = max(0, y1), min(feature_size[0], y2)

        mask[y1:y2, x1:x2] = 1.0

    return mask


In [None]:

def alignment_loss(gradcam, bbox_mask):
    eps = 1e-6
    B, _, H, W = gradcam.shape
    cam_flat = gradcam.view(B, -1)
    mask_flat = bbox_mask.view(B, -1)

    overlap = torch.sum(cam_flat * mask_flat, dim=1)
    sum_cam = torch.sum(cam_flat, dim=1) + eps
    ratio = overlap / sum_cam  
    loss = 1.0 - ratio         
    return loss.mean()


In [None]:
def train_one_epoch(model, loader, optimizer, alpha=0.1, device='cuda'):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    from tqdm.notebook import tqdm  
    import numpy as np
    
    pbar = tqdm(loader, desc="Training", unit="batch")
    
    for batch_idx, (images, labels, bboxes) in enumerate(pbar):
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        logits = model(images) 
        ce_loss = nn.CrossEntropyLoss()(logits, labels)
        if DEBUG:
            print(f"Batch {batch_idx}: CE Loss = {ce_loss.item():.4f}")
        
        gradcam = compute_gradcam(model, images, class_idx=1)  
        B, _, Hf, Wf = gradcam.shape
        if DEBUG:
            print(f"Batch {batch_idx}: Grad-CAM shape = {gradcam.shape}")
        
        masks_list = []
        for i in range(B):
            if labels[i] == 1:
                mask_np = bboxes_to_mask(bboxes[i], img_size=(224,224), feature_size=(Hf, Wf))
            else:
                mask_np = np.zeros((Hf, Wf), dtype=np.float32)
            masks_list.append(mask_np)
        
        bbox_mask_np = np.array(masks_list)  
        bbox_mask_tensor = torch.from_numpy(bbox_mask_np).to(device).unsqueeze(1)  
        
        if DEBUG:
            print(f"Batch {batch_idx}: BBox Mask tensor shape = {bbox_mask_tensor.shape}, sum = {torch.sum(bbox_mask_tensor).item()}")
        

        align_loss = alignment_loss(gradcam, bbox_mask_tensor)
        if DEBUG:
            print(f"Batch {batch_idx}: Alignment Loss = {align_loss.item():.4f}")
        

        total_loss = ce_loss + alpha * align_loss
        if DEBUG:
            print(f"Batch {batch_idx}: Total Loss = {total_loss.item():.4f}")
        
        total_loss.backward()
        optimizer.step()
        
        running_loss += total_loss.item()
        preds = logits.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
        
        pbar.set_postfix({
            "CE Loss": f"{ce_loss.item():.4f}",
            "Align": f"{align_loss.item():.4f}",
            "Total": f"{total_loss.item():.4f}"
        })
        
        if DEBUG:
            print(f"Batch {batch_idx} complete: CE Loss = {ce_loss.item():.4f}, "
                  f"Align Loss = {align_loss.item():.4f}, Total Loss = {total_loss.item():.4f}\n")
    
    epoch_loss = running_loss / len(loader)
    epoch_acc = 100.0 * correct / total
    return epoch_loss, epoch_acc



def train_model(model, train_loader, num_epochs=5, alpha=0.1, lr=1e-3, device='cuda'):
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(num_epochs):
        print(f"=== EPOCH {epoch+1}/{num_epochs} ===")
        epoch_loss, epoch_acc = train_one_epoch(model, train_loader, optimizer, alpha=alpha, device=device)
        print(f"Epoch {epoch+1} | Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.2f}%\n")
    return model


In [14]:
def custom_collate_fn(batch):
    images = []
    labels = []
    all_bboxes = []
    for (img, lbl, bxs) in batch:
        images.append(img)
        labels.append(lbl)
        all_bboxes.append(bxs)
    images = torch.stack(images, dim=0)
    labels = torch.tensor(labels, dtype=torch.long)
    return images, labels, all_bboxes


In [None]:

if __name__ == "__main__":
    import os
    from tqdm.notebook import tqdm  
    import torch
    from torch.utils.data import DataLoader
    
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    base_dir = os.path.abspath(".")
    train_csv_path = os.path.join(base_dir, "dataset", "stage2_train_metadata.csv")
    train_img_dir = os.path.join(base_dir, "dataset", "Training", "Images")
    
    print("Train CSV path:", train_csv_path, flush=True)
    print("Train image directory:", train_img_dir, flush=True)
    
    train_dataset = PneumoniaDataset(
        csv_path=train_csv_path,
        img_dir=train_img_dir,
        transform=train_transform_alb,
        is_train=True
    )
    
    train_loader = DataLoader(
    train_dataset,
    batch_size=8,
    shuffle=True,
    num_workers=2,
    collate_fn=custom_collate_fn
)
    
    model = build_model()
    
    model = train_model(
        model,
        train_loader,
        num_epochs=5,
        alpha=0.05,   
        lr=1e-4,
        device=device
    )
    
    print("Training complete!")


Train CSV path: f:\Final Year\Final Project\Clinician-Guided-Grad-CAM\dataset\stage2_train_metadata.csv
Train image directory: f:\Final Year\Final Project\Clinician-Guided-Grad-CAM\dataset\Training\Images
=== EPOCH 1/5 ===


Training:   0%|          | 0/3336 [00:00<?, ?batch/s]