In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("iarunava/cell-images-for-detecting-malaria")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/iarunava/cell-images-for-detecting-malaria?dataset_version_number=1...


100%|██████████| 675M/675M [00:09<00:00, 75.3MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/iarunava/cell-images-for-detecting-malaria/versions/1


In [2]:
import os

def walk_through(dir_path):
  for dirpath, dirnames, filenames in os.walk(dir_path):
    print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

In [3]:
walk_through(path)

There are 1 directories and 0 images in '/root/.cache/kagglehub/datasets/iarunava/cell-images-for-detecting-malaria/versions/1'.
There are 3 directories and 0 images in '/root/.cache/kagglehub/datasets/iarunava/cell-images-for-detecting-malaria/versions/1/cell_images'.
There are 0 directories and 13780 images in '/root/.cache/kagglehub/datasets/iarunava/cell-images-for-detecting-malaria/versions/1/cell_images/Parasitized'.
There are 0 directories and 13780 images in '/root/.cache/kagglehub/datasets/iarunava/cell-images-for-detecting-malaria/versions/1/cell_images/Uninfected'.
There are 2 directories and 0 images in '/root/.cache/kagglehub/datasets/iarunava/cell-images-for-detecting-malaria/versions/1/cell_images/cell_images'.
There are 0 directories and 13780 images in '/root/.cache/kagglehub/datasets/iarunava/cell-images-for-detecting-malaria/versions/1/cell_images/cell_images/Parasitized'.
There are 0 directories and 13780 images in '/root/.cache/kagglehub/datasets/iarunava/cell-imag

In [4]:
import numpy as np
import torch
import torch.nn as nn

In [5]:
 root_dir = '/root/.cache/kagglehub/datasets/iarunava/cell-images-for-detecting-malaria/versions/1'
 data_dir = os.path.join(root_dir, 'cell_images')

In [6]:
import shutil
import os

nested_cell_images = "/root/.cache/kagglehub/datasets/iarunava/cell-images-for-detecting-malaria/versions/1/cell_images/cell_images"

# Check if the directory exists before deleting
if os.path.exists(nested_cell_images):
    shutil.rmtree(nested_cell_images)
    print(f"Deleted: {nested_cell_images}")
else:
    print("Directory does not exist or has already been deleted.")


Deleted: /root/.cache/kagglehub/datasets/iarunava/cell-images-for-detecting-malaria/versions/1/cell_images/cell_images


In [7]:
walk_through(path)

There are 1 directories and 0 images in '/root/.cache/kagglehub/datasets/iarunava/cell-images-for-detecting-malaria/versions/1'.
There are 2 directories and 0 images in '/root/.cache/kagglehub/datasets/iarunava/cell-images-for-detecting-malaria/versions/1/cell_images'.
There are 0 directories and 13780 images in '/root/.cache/kagglehub/datasets/iarunava/cell-images-for-detecting-malaria/versions/1/cell_images/Parasitized'.
There are 0 directories and 13780 images in '/root/.cache/kagglehub/datasets/iarunava/cell-images-for-detecting-malaria/versions/1/cell_images/Uninfected'.


In [8]:
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset, random_split

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomAffine(0, shear=10, scale=(0.8, 1.2)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


In [9]:
from torchvision import datasets

full_dataset = datasets.ImageFolder(root=data_dir,transform=train_transform)

total_size = len(full_dataset)
train_size = int(0.8 * total_size)
val_size = total_size - train_size

In [10]:
train_dataset, val_dataset = random_split(
        full_dataset,
        [train_size, val_size],
        generator=torch.Generator().manual_seed(42)
    )

val_dataset.dataset.transform = val_transform

In [11]:
len(train_dataset), len(val_dataset), full_dataset.classes

(22046, 5512, ['Parasitized', 'Uninfected'])

In [12]:
classes = full_dataset.classes

classes

['Parasitized', 'Uninfected']

In [13]:
class_idx = full_dataset.class_to_idx

class_idx

{'Parasitized': 0, 'Uninfected': 1}

In [14]:
import os
from torch.utils.data import DataLoader
BATCH_SIZE = 32
train_dataloader = DataLoader(dataset=train_dataset,
                              batch_size=BATCH_SIZE,
                              num_workers=0,
                              shuffle=True)

test_dataloader = DataLoader(dataset=val_dataset,
                             batch_size=BATCH_SIZE,
                             num_workers=4,
                             shuffle=False)

train_dataloader, test_dataloader



(<torch.utils.data.dataloader.DataLoader at 0x79a3d3968c10>,
 <torch.utils.data.dataloader.DataLoader at 0x79a4ce3a8e50>)

In [15]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [16]:
import torchvision

weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
model = torchvision.models.efficientnet_b0(weights=weights)

model = model.to(device)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 82.0MB/s]


In [17]:
for param in model.features.parameters():
    param.requires_grad = False

In [18]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)


output_shape = len(full_dataset.classes)

model.classifier = torch.nn.Sequential(
    torch.nn.Dropout(p=0.2, inplace=True),
    torch.nn.Linear(in_features=1280,
                    out_features=output_shape,
                    bias=True)).to(device)


In [19]:
import torch.optim as optim

criterion=nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0003, weight_decay=0.1)

In [20]:
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=2
)

In [21]:
import torch

from tqdm.auto import tqdm
from typing import Dict, List, Tuple
def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device: torch.device,
               scheduler: torch.optim.lr_scheduler._LRScheduler = None) -> Tuple[float, float]:

    model.train()

    train_loss, train_acc = 0.0, 0.0
    total_samples = 0

    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        y_pred = model(X)

        loss = loss_fn(y_pred, y)
        train_loss += loss.item() * X.size(0)  # Scale loss by batch size

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if scheduler:
            scheduler.step(loss)  # Step the scheduler if provided

        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == y).sum().item()
        total_samples += y.size(0)

    train_loss /= total_samples  # Normalize by total dataset size
    train_acc /= total_samples  # Normalize accuracy

    return train_loss, train_acc

In [22]:
def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              device: torch.device) -> Tuple[float, float]:


    model.eval()


    test_loss, test_acc = 0, 0


    with torch.inference_mode():

        for batch, (X, y) in enumerate(dataloader):

            X, y = X.to(device), y.to(device)



            test_pred_logits = model(X)


            loss = loss_fn(test_pred_logits, y)
            test_loss += loss.item()

            test_pred_labels = test_pred_logits.argmax(dim=1)
            test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))


    test_loss = test_loss / len(dataloader)
    test_acc = test_acc / len(dataloader)
    return test_loss, test_acc

In [23]:
def train_and_validate(model,
                       train_loader,
                       val_loader,
                       criterion,
                       optimizer,
                       device,
                       num_epochs=50,
                       patience=5,
                       checkpoint_dir='./checkpoints'):
    """
    Train and validate the model with early stopping and checkpointing

    Args:
    - model: PyTorch model
    - train_loader: DataLoader for training data
    - val_loader: DataLoader for validation data
    - criterion: Loss function
    - optimizer: Optimizer
    - device: Computing device (cuda/cpu)
    - num_epochs: Maximum number of training epochs
    - patience: Number of epochs with no improvement after which training will be stopped
    - checkpoint_dir: Directory to save model checkpoints

    Returns:
    - Dictionary containing training history
    """
    # Create checkpoint directory if it doesn't exist
    os.makedirs(checkpoint_dir, exist_ok=True)

    # Training history tracking
    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': []
    }

    # Early stopping variables
    best_val_loss = float('inf')
    epochs_no_improve = 0

    # Training loop
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss, train_acc = 0, 0

        train_progress_bar = tqdm(train_loader,
                                  desc=f'Epoch {epoch+1}/{num_epochs}',
                                  unit='batch')

        for batch, (X, y) in enumerate(train_progress_bar):
            X, y = X.to(device), y.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(X)
            loss = criterion(outputs, y)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Compute metrics
            train_loss += loss.item()
            train_pred = torch.argmax(torch.softmax(outputs, dim=1), dim=1)
            train_acc += (train_pred == y).float().mean().item()

            # Update progress bar
            train_progress_bar.set_postfix({
                'Train Loss': loss.item(),
                'Train Acc': train_acc / (batch + 1)
            })

        # Average epoch metrics
        train_loss /= len(train_loader)
        train_acc /= len(train_loader)

        # Validation phase
        model.eval()
        val_loss, val_acc = 0, 0

        with torch.inference_mode():
            for X, y in val_loader:
                X, y = X.to(device), y.to(device)

                outputs = model(X)
                loss = criterion(outputs, y)

                val_loss += loss.item()
                val_pred = torch.argmax(torch.softmax(outputs, dim=1), dim=1)
                val_acc += (val_pred == y).float().mean().item()

        # Average validation metrics
        val_loss /= len(val_loader)
        val_acc /= len(val_loader)

        # Store history
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)

        # Print epoch summary
        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

        # Early stopping and model checkpointing
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0

            # Save best model
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'train_loss': train_loss,
                'val_loss': val_loss
            }, os.path.join(checkpoint_dir, 'best_model.pth'))
        else:
            epochs_no_improve += 1

        # Early stopping
        if epochs_no_improve >= patience:
            print(f'Early stopping triggered after {epoch+1} epochs')
            break

    return history

In [24]:
history = train_and_validate(
        model=model,
        train_loader=train_dataloader,
        val_loader=test_dataloader,
        criterion=criterion,
        optimizer=optimizer,
        device=device,
        num_epochs=6,
        patience=3
    )


Epoch 1/6:   0%|          | 0/689 [00:00<?, ?batch/s]

Epoch 1/6:
Train Loss: 0.3160, Train Acc: 0.8829
Val Loss: 0.2079, Val Acc: 0.9261


Epoch 2/6:   0%|          | 0/689 [00:00<?, ?batch/s]

Epoch 2/6:
Train Loss: 0.2333, Train Acc: 0.9126
Val Loss: 0.1897, Val Acc: 0.9335


Epoch 3/6:   0%|          | 0/689 [00:00<?, ?batch/s]

Epoch 3/6:
Train Loss: 0.2184, Train Acc: 0.9188
Val Loss: 0.1775, Val Acc: 0.9398


Epoch 4/6:   0%|          | 0/689 [00:00<?, ?batch/s]

Epoch 4/6:
Train Loss: 0.2130, Train Acc: 0.9210
Val Loss: 0.1717, Val Acc: 0.9411


Epoch 5/6:   0%|          | 0/689 [00:00<?, ?batch/s]

Epoch 5/6:
Train Loss: 0.2104, Train Acc: 0.9231
Val Loss: 0.1710, Val Acc: 0.9408


Epoch 6/6:   0%|          | 0/689 [00:00<?, ?batch/s]

Epoch 6/6:
Train Loss: 0.2092, Train Acc: 0.9226
Val Loss: 0.1713, Val Acc: 0.9420


In [25]:
torch.save(model.state_dict(), 'malaria_model.pth')