In [8]:
import os
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from PIL import Image
import torch
from torchvision import transforms

class_map = {"Trophozoite": 0, "NEG": 1, "WBC": 2}
class_names = {v: k for k, v in class_map.items()}

class MalariaDataset(Dataset):
    def __init__(self, csv_file, image_dir, s=7, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.s = s
        self.c = len(class_map)
        self.transform = transform

    def __len__(self):
        return len(self.annotations["Image_ID"].unique())

    def __getitem__(self, idx):
        image_id = self.annotations["Image_ID"].unique()[idx]
        image_path = os.path.join(self.image_dir, image_id)
        image = Image.open(image_path).convert("RGB")
        
        # Resize the image to (448, 448)
        image = image.resize((448, 448))
        
        if self.transform:
            image = self.transform(image)

        target = torch.zeros((self.s, self.s, self.c + 5))

        image_annotations = self.annotations[self.annotations["Image_ID"] == image_id]
        for _, row in image_annotations.iterrows():
            x_min = row["xmin"]
            x_max = row["xmax"]
            y_min = row["ymin"]
            y_max = row["ymax"]

            x_centers = (x_min + x_max) / 2 / 448
            y_centers = (y_min + y_max) / 2 / 448
            box_width = (x_max - x_min) / 448
            box_height = (y_max - y_min) / 448

            i = min(int(y_centers * self.s), self.s - 1)
            j = min(int(x_centers * self.s), self.s - 1)

            if target[i, j, self.c] == 0:
                target[i, j, self.c] = 1
                target[i, j, self.c+1:self.c+5] = torch.tensor(
                    [x_centers, y_centers, box_width, box_height]
                )
                class_label = class_map[row["class"]]
                target[i, j, class_label] = 1
        assert not torch.isnan(target).any(), "Target contains NaN!"
        assert not (target < 0).any(), "Target contains negative values!"
        assert target.shape == (self.s, self.s, self.c + 5), "Incorrect target shape!"

        return image, target


def collate_fn(batch):
    images, targets = zip(*batch)
    images = torch.stack(images)
    targets = torch.stack([t for t in targets])
    return images, targets


# Define transformations with normalization
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Instantiate the dataset and dataloader
dataset_train = MalariaDataset(
    "/kaggle/input/lacuna-malaria-detection-dataset/Train.csv",
    "/kaggle/input/lacuna-malaria-detection-dataset/images",
    transform=transform
)

train_dataloader = DataLoader(dataset_train, batch_size=8, collate_fn=collate_fn)


In [9]:
def visualize_samples(dataloader, num_samples=5, skip_samples=20):
    fig, axes = plt.subplots(1, num_samples, figsize=(15, 6))
    
    for idx, (image, target) in enumerate(dataloader):
        if idx < skip_samples:
            continue
        if idx >= skip_samples + num_samples:
            break
            
        # Handle batch dimension
        if image.dim() == 4:
            img_np = image[0].permute(1, 2, 0).numpy()
        else:
            img_np = image.permute(1, 2, 0).numpy()
            
        ax = axes[idx - skip_samples]
        ax.imshow(img_np)
        
        # Handle batch dimension in target
        if isinstance(target, list):
            target = target[0]
            
        boxes = target["boxes"]
        labels = target["lables"]  # Fixed ty

        if boxes.dim() == 1:
            boxes = boxes.unsqueeze(0)
        if labels.dim() == 0:
            labels = labels.unsqueeze(0)
            
        for box, label in zip(boxes, labels):
            x_min, y_min, x_max, y_max = box.numpy()
            
            # Skip invalid boxes
            if (x_min == 0 and x_max == 0 and y_min == 0 and y_max == 0):
                continue
                
            width = x_max - x_min
            height = y_max - y_min
            #label_value = label.item()
            #print("Label value:", label_value)
            label_value = label.item()
           # if label_value in class_names:
            #    print("Class name:", class_names[label_value])
            #else:
             #   print("Label value not found in class_names:", label_value)
            
            rect = patches.Rectangle(
                (x_min, y_min), width, height,
                edgecolor="red",
                facecolor="none"
            )
            ax.add_patch(rect)

          #  print(label)
            
            # Add label
          #  print(label.item())
          #  print(class_names.get(label.item()))
         #   print(class_names.get(label.item(), "Unknown"),)
            label_value = label.item()
            class_name = class_names.get(label_value)

            ax.text(
                x_min, y_min,
                class_name,
                color="red",
                fontsize=12,
                bbox=dict(facecolor="white", alpha=0.7)
            )
        ax.set_title(f"Sample {idx + 1}")
        ax.axis("off")
        
    plt.tight_layout()
    plt.show()

yolov1

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class YOLOv1(nn.Module):
    def __init__(self, s=7, b=2, c=3):
        super(YOLOv1, self).__init__()
        self.s = s
        self.b = b
        self.c = c

        # Convolutional layers
        self.conv_layers = nn.Sequential(
            # Layer 1

            
            
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.5),
            nn.MaxPool2d(2, 2),
            
            # Layer 2
            nn.Conv2d(64, 192, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(192),
            nn.Dropout(0.5),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(2, 2),
            
            # Layers 3-5
            nn.Conv2d(192, 128, kernel_size=1, stride=1, padding=0),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.1),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.1),
            nn.Conv2d(256, 512, kernel_size=1, stride=1, padding=0),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.5),
            nn.MaxPool2d(2, 2),

            # Layers 6-10
            nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.1),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.1),
            nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.1),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.1),
            nn.Conv2d(512, 512, kernel_size=1, stride=1, padding=0),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.1),
            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.5),
            nn.MaxPool2d(2, 2),
            
            # Layers 11-15
            nn.Conv2d(1024, 512, kernel_size=1, stride=1, padding=0),
            nn.Dropout(0.5),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.1),
            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(0.1),
            nn.Conv2d(1024, 512, kernel_size=1, stride=1, padding=0),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.5),
            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(0.1),
            nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(0.1),
            nn.Conv2d(1024, 1024, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(1024),
            nn.Dropout(0.5),
            nn.LeakyReLU(0.1),
            
            # Final convolutional layers before FC layers
            nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(0.1),
            nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(1024),
            nn.Dropout(0.5),
            nn.LeakyReLU(0.1),
        )
        
        # Fully connected layers
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(1024 * 7 * 7, 4096),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.5),
            nn.Linear(4096, s * s * (b * 5 + c)),
        )
        
        # Initialize weights
        self._initialize_weights()
    
    def forward(self, x):
        # Add activation checking after each major block
        for i, layer in enumerate(self.conv_layers):
            x = layer(x)
            if torch.isnan(x).any():
                print(f"NaN detected in conv layer {i}")
                print(f"Layer type: {type(layer)}")
                return None
            
            # Print statistics periodically
       #     if i % 10 == 0:
        #        print(f"Layer {i} stats:")
         #       print(f"Mean: {x.mean().item():.4f}")
          #      print(f"Std: {x.std().item():.4f}")
           #     print(f"Min: {x.min().item():.4f}")
            #    print(f"Max: {x.max().item():.4f}")
        
        # Check before FC layers
      #  print("Before FC layers:")
       # print(f"Shape: {x.shape}")
        #print(f"Range: [{x.min().item():.4f}, {x.max().item():.4f}]")

      #  print("Shape before FC layers:", x.shape)

        for i, layer in enumerate(self.fc_layers):
            x = layer(x)
           # print(x.shape)
            if torch.isnan(x).any():
                print(f"NaN detected in fc layer {i}")
                print(f"Layer type: {type(layer)}")
                return None
        
        return x.view(-1, self.s, self.s, self.b * 5 + self.c)
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
            # Use more conservative initialization for first layer
                if m is self.conv_layers[0]:
                    nn.init.normal_(m.weight, mean=0, std=0.01)
                else:
                    nn.init.kaiming_normal_(m.weight, a=0.1, mode='fan_out', nonlinearity='leaky_relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, mean=0, std=0.01)
                nn.init.constant_(m.bias, 0)




In [11]:
model = YOLOv1()

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

YOLOv1(
  (conv_layers): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.1)
    (3): Dropout(p=0.5, inplace=False)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): Dropout(p=0.5, inplace=False)
    (8): LeakyReLU(negative_slope=0.1)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): LeakyReLU(negative_slope=0.1)
    (13): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): BatchNorm2d(256, eps=1e-05, m

In [13]:
class YOLOLoss(nn.Module):
    def __init__(self, s=7, b=2, c=3, lambda_coord=5e-4, lambda_noobj=0.5,lambda_class=0.1):
        super().__init__()
        self.s = s
        self.b = b
        self.c = c
        self.lambda_coord = lambda_coord
        self.lambda_noobj = lambda_noobj
        self.lambda_class = lambda_class

    def forward(self, predictions, targets):
        predictions = predictions.float()
        targets = targets.float()

        predictions = predictions.reshape(-1, self.s, self.s, self.c + self.b * 5)

        # Split predictions
        pred_classes = torch.sigmoid(predictions[..., :self.c])

        # Box 1
        pred_box_1_xy = torch.sigmoid(predictions[..., self.c:self.c + 2])
        pred_box_1_wh = torch.exp(torch.clamp(predictions[..., self.c + 2:self.c + 4], -100, 4.6))
        pred_box_1_conf = torch.sigmoid(predictions[..., self.c + 4:self.c + 5])
        pred_box_1 = torch.cat([pred_box_1_xy, pred_box_1_wh], dim=-1)

        # Box 2
        pred_box_2_xy = torch.sigmoid(predictions[..., self.c + 5:self.c + 7])
        pred_box_2_wh = torch.exp(torch.clamp(predictions[..., self.c + 7:self.c + 9], -100, 4.6))
        pred_box_2_conf = torch.sigmoid(predictions[..., self.c + 9:self.c + 10])
        pred_box_2 = torch.cat([pred_box_2_xy, pred_box_2_wh], dim=-1)

        

        # Target components
        target_classes = targets[..., :self.c]
        target_box_1 = targets[..., self.c:self.c + 4]
        target_box_2 = targets[..., self.c + 5:self.c + 9]
        target_box_1_conf = targets[..., self.c + 4:self.c + 5]
        target_box_2_conf = targets[..., self.c + 9:self.c + 10]
        

        # Loss components
        class_loss = torch.sum((target_classes - pred_classes) ** 2) * self.lambda_class


        box_1_loss = torch.sum((target_box_1 - pred_box_1) ** 2) * self.lambda_coord
        if target_box_2.size(-1) > 0:
            box_2_loss = torch.sum((target_box_2 - pred_box_2) ** 2) * self.lambda_coord
        else:
            target_box_2 = torch.zeros_like(pred_box_2)
            box_2_loss = 0 


        obj_loss = torch.sum((target_box_1_conf - pred_box_1_conf) ** 2) * self.lambda_coord
        noobj_loss = torch.sum((target_box_2_conf - pred_box_2_conf) ** 2) * self.lambda_noobj

        total_loss = class_loss + box_1_loss + box_2_loss + obj_loss + noobj_loss
        return total_loss


In [14]:
import os
import numpy as np
from torchvision.ops import box_iou
from torch.nn.functional import mse_loss , binary_cross_entropy_with_logits ,binary_cross_entropy
import torch.nn.functional as F

In [15]:
import torch.optim as optim
import time
from torch.utils.data import DataLoader
from tqdm import tqdm  # Import tqdm for progress bar

# Instantiate the YOLOv1 model and YOLOLoss
model = YOLOv1(s=7, b=2, c=3).to(device)
criterion = YOLOLoss(s=7, b=2, c=3).to(device)

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-4)


# Set the number of epochs
epochs = 5

# Set device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Instantiate the dataset and dataloader
dataset_train = MalariaDataset(
    "/kaggle/input/lacuna-malaria-detection-dataset/Train.csv",
    "/kaggle/input/lacuna-malaria-detection-dataset/images",
    transform=transform
)
train_dataloader = DataLoader(dataset_train, batch_size=8, collate_fn=collate_fn)

# Training loop
def train():
    model.train()
    running_loss = 0.0
    start_time = time.time()

    for epoch in range(epochs):
        epoch_loss = 0.0
        # Wrap the DataLoader with tqdm to show progress
        with tqdm(train_dataloader, desc=f"Epoch [{epoch+1}/{epochs}]", unit="batch") as pbar:
            for i, (images, targets) in enumerate(pbar):
                images = images.to(device)
                targets = targets.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass
                predictions = model(images)

                

                # Compute loss
                loss = criterion(predictions, targets)

                # Backward pass and optimization
                loss.backward()
                optimizer.step()

                # Accumulate the loss
                epoch_loss += loss.item()

                # Update progress bar description with loss
                pbar.set_postfix(loss=loss.item())

        # Print epoch loss
        print(f"Epoch [{epoch+1}/{epochs}], Epoch Loss: {epoch_loss / len(train_dataloader):.4f}")

        # Calculate elapsed time for each epoch
        elapsed_time = time.time() - start_time
        print(f"Time elapsed for epoch {epoch+1}: {elapsed_time:.2f} seconds")

    print("Training complete!")

# Run training
train()


Epoch [1/5]: 100%|██████████| 344/344 [09:18<00:00,  1.62s/batch, loss=0.244]


Epoch [1/5], Epoch Loss: 2.5443
Time elapsed for epoch 1: 558.23 seconds


Epoch [2/5]: 100%|██████████| 344/344 [08:03<00:00,  1.41s/batch, loss=0.122] 


Epoch [2/5], Epoch Loss: 1.9883
Time elapsed for epoch 2: 1041.57 seconds


Epoch [3/5]: 100%|██████████| 344/344 [08:00<00:00,  1.40s/batch, loss=0.12]  


Epoch [3/5], Epoch Loss: 2.4606
Time elapsed for epoch 3: 1521.77 seconds


Epoch [4/5]: 100%|██████████| 344/344 [07:49<00:00,  1.36s/batch, loss=0.119] 


Epoch [4/5], Epoch Loss: 2.5703
Time elapsed for epoch 4: 1990.89 seconds


Epoch [5/5]: 100%|██████████| 344/344 [07:36<00:00,  1.33s/batch, loss=0.124] 

Epoch [5/5], Epoch Loss: 2.5493
Time elapsed for epoch 5: 2447.60 seconds
Training complete!



