In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, WeightedRandomSampler, random_split
from sklearn.metrics import confusion_matrix, classification_report, precision_recall_curve, f1_score
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from collections import Counter

In [None]:
# Configurations
data_dir = "/kaggle/input/malariacells/Cells"
batch_size = 32
num_epochs = 25
learning_rate = 0.0001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
minority_transform = transforms.Compose([
    transforms.RandomRotation(30),  # Random rotations up to 30 degrees
    transforms.RandomHorizontalFlip(),  # Horizontal flip
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.Resize((160, 170)),
    transforms.ToTensor()
])

# Define transformations for the majority classes
majority_transform = transforms.Compose([
    transforms.Resize((160, 170)),
    transforms.ToTensor()
])

In [None]:
dataset = datasets.ImageFolder(root=data_dir)

In [None]:
labels = [label for _, label in dataset]

# Count the occurrences of each label (class)
class_counts = Counter(labels)

# Print the class counts
for class_idx, count in class_counts.items():
    class_name = dataset.classes[class_idx]  # Get class name from the dataset
    print(f"Class: {class_name}, Count: {count}")

In [None]:
dataset.class_to_idx

In [None]:
minority_classes = [1,4,5,6]

In [None]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, dataset, minority_classes, minority_transform, majority_transform):
        self.dataset = dataset
        self.minority_classes = minority_classes
        self.minority_transform = minority_transform
        self.majority_transform = majority_transform
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        image, label = self.dataset[idx]
        
        # Apply transformations based on the class label
        if label in self.minority_classes:
            image = self.minority_transform(image)
        else:
            image = self.majority_transform(image)
        
        return image, label

In [None]:
custom_dataset = CustomDataset(dataset, minority_classes, minority_transform, majority_transform)

In [None]:
train_size = int(0.8 * len(custom_dataset))
val_size = int(0.05 * len(custom_dataset))  
test_size = len(custom_dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(custom_dataset, [train_size, val_size, test_size])

In [None]:
print(train_size)
print(test_size)
print(val_size)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
print(f"Number of batches in train_loader: {len(train_loader)}")
print(f"Number of batches in val_loader: {len(val_loader)}")
print(f"Number of batches in test_loader: {len(test_loader)}")


In [None]:
from torchvision.models import ResNet18_Weights
model = models.resnet18(weights=ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, len(dataset.classes))  # Replace the last layer
model = model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [None]:
history = {
    "train_loss": [],
    "val_loss": [],
    "train_acc": [],
    "val_acc": []
}

In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=10):
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_acc = correct / total
        val_acc, val_loss, _, _ = evaluate_model(model, val_loader, criterion)  # Corrected unpacking

        # Save metrics
        history["train_loss"].append(train_loss / len(train_loader))
        history["val_loss"].append(val_loss)
        history["train_acc"].append(train_acc)
        history["val_acc"].append(val_acc)

        print(f"Epoch {epoch+1}/{epochs}")
        print(f"Train Loss: {train_loss/len(train_loader):.4f}, Train Accuracy: {train_acc:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}")

In [None]:
def evaluate_model(model, data_loader, criterion):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Collect all predictions and labels
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = correct / total
    return accuracy, val_loss / len(data_loader), all_preds, all_labels

In [None]:
train_model(model, train_loader, val_loader, criterion, optimizer, epochs=num_epochs)

In [None]:
test_acc, _, test_preds, test_labels = evaluate_model(model, test_loader, criterion)
print(f"Test Accuracy: {test_acc:.4f}")

In [None]:
report = classification_report(test_labels, test_preds, target_names=dataset.classes)
print("\nClassification Report:")
print(report)

In [None]:
import pandas as pd

report_dict=classification_report(test_labels, test_preds, target_names=dataset.classes, output_dict=True)
report_df = pd.DataFrame(report_dict).transpose()

report_df = report_df.drop(["accuracy", "macro avg", "weighted avg"], errors="ignore")

plt.figure(figsize=(10, 6))
sns.heatmap(report_df, annot=True, cmap="Blues", fmt=".2f", cbar=False)
plt.title("Classification Report Heatmap")
plt.ylabel("Classes")
plt.xlabel("Metrics")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
def plot_metrics(history):
    epochs = range(1, len(history["train_loss"]) + 1)

    # Plot Loss
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(epochs, history["train_loss"], label="Train Loss")
    plt.plot(epochs, history["val_loss"], label="Validation Loss")
    plt.title("Loss Over Epochs")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()

    # Plot Accuracy
    plt.subplot(1, 2, 2)
    plt.plot(epochs, history["train_acc"], label="Train Accuracy")
    plt.plot(epochs, history["val_acc"], label="Validation Accuracy")
    plt.title("Accuracy Over Epochs")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend()

    plt.tight_layout()
    plt.show()

plot_metrics(history)

In [None]:
model_save_path = "cell_classification_resnet.pth"
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")