In [1]:
import pandas as pd
import numpy as np
import os
from PIL import Image
import torch
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split

In [2]:
# Load CSV files
train_df = pd.read_csv('/kaggle/input/lacuna-malaria-detection-dataset/Train.csv')
test_df = pd.read_csv('/kaggle/input/lacuna-malaria-detection-dataset/Test.csv')

# Set the image directory
img_dir = '/kaggle/input/lacuna-malaria-detection-dataset/images/'

# Create image paths for training data
train_df['image_path'] = train_df['Image_ID'].apply(lambda x: os.path.join(img_dir, x))

# Split training data into train and validation sets
train_data, val_data = train_test_split(train_df, test_size=0.2, random_state=42, stratify=train_df['class'])

# Data augmentation and normalization
train_transforms = transforms.Compose([
    transforms.RandomRotation(15),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # ImageNet normalization
])

val_test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

class MalariaDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
        
        # Create a class-to-index mapping
        self.class_to_idx = {class_name: idx for idx, class_name in enumerate(dataframe['class'].unique())}
        
    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['image_path']
        image = Image.open(img_path).convert('RGB')
        
        # Get label and map it to an integer using the class-to-index mapping
        label = self.dataframe.iloc[idx]['class']
        label = self.class_to_idx[label]  # Convert class name to index
        
        if self.transform:
            image = self.transform(image)
        
        # Convert label to tensor
        label = torch.tensor(label, dtype=torch.long)
        
        return image, label



# Create Datasets and DataLoaders
batch_size = 32

train_dataset = MalariaDataset(train_data, transform=train_transforms)
val_dataset = MalariaDataset(val_data, transform=val_test_transforms)
test_dataset = MalariaDataset(test_df, transform=val_test_transforms)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Print some information about the data
print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of validation samples: {len(val_dataset)}")
print(f"Number of test samples: {len(test_dataset)}")

# Check class balance
print("\nClass distribution in training data:")
print(train_data['class'].value_counts(normalize=True))
print("\nClass distribution in validation data:")
print(val_data['class'].value_counts(normalize=True))


Number of training samples: 18824
Number of validation samples: 4706
Number of test samples: 1178

Class distribution in training data:
class
Trophozoite    0.673077
WBC            0.297652
NEG            0.029271
Name: proportion, dtype: float64

Class distribution in validation data:
class
Trophozoite    0.673183
WBC            0.297705
NEG            0.029112
Name: proportion, dtype: float64


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [4]:
import torch.nn as nn
import torch.optim as optim
from torchvision.models import efficientnet_b0

# Load pretrained EfficientNet model
model = efficientnet_b0(pretrained=True)
model.classifier = nn.Sequential(
    nn.Dropout(p=0.2),
    nn.Linear(model.classifier[1].in_features, 3)  # Assuming 3 classes
)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# **Training and Validation**

def train_model(model, train_loader, val_loader, num_epochs=10):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

        train_accuracy = 100 * correct / total
        val_accuracy = evaluate_model(model, val_loader)

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/total:.4f}, Train Acc: {train_accuracy:.2f}%, Val Acc: {val_accuracy:.2f}%')

def evaluate_model(model, val_loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    return 100 * correct / total

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 112MB/s] 


In [None]:
train_model(model, train_loader, val_loader, num_epochs=10)

In [None]:
def test_model(model, test_loader):
    model.eval()
    predictions = []
    with torch.no_grad():
        for images in test_loader:
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            predictions.extend(predicted.cpu().numpy())
    return predictions

test_predictions = test_model(model, test_loader)

In [None]:
results_df = pd.DataFrame({
    'Image_ID': test_df['Image_ID'],
    'Predicted_Class': test_predictions
})

results_df.head()