In [1]:
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from PIL import Image
from sklearn.metrics import accuracy_score
from tqdm.notebook import tqdm

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device assigned: {device}")

Device assigned: cuda


In [2]:
ROOT_DIR_TRAIN = '/content/drive/MyDrive/data/chest_xray/train'
ROOT_DIR_TEST  = '/content/drive/MyDrive/data/chest_xray/test'
ROOT_DIR_VAL   = '/content/drive/MyDrive/data/chest_xray/val'

In [3]:
# --- DATASET CLASS ---
class PneumoniaDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        # Check if path exists to prevent crashing
        if not os.path.exists(root_dir):
            print(f"❌ Error: Path '{root_dir}' does not exist. Check your folder structure.")
            return

        for label in ['NORMAL', 'PNEUMONIA']:
            class_dir = os.path.join(root_dir, label)
            if os.path.isdir(class_dir):
                for img_name in os.listdir(class_dir):
                    if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                        self.image_paths.append(os.path.join(class_dir, img_name))
                        self.labels.append(0 if label == 'NORMAL' else 1)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

In [14]:
# 1. Training Transform (With Augmentation)
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),      # Flip left/right
    transforms.RandomRotation(10),          # Rotate slightly (+/- 10 degrees)
    transforms.ColorJitter(brightness=0.1), # Slight brightness change
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 2. Validation/Test Transform (NO Augmentation - just resize)
val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [15]:
# --- LOAD DATA ---
train_dataset = PneumoniaDataset(root_dir=ROOT_DIR_TRAIN, transform=train_transform) # Use train_transform
test_dataset = PneumoniaDataset(root_dir=ROOT_DIR_TEST, transform=val_test_transform) # Use val_test_transform
val_dataset = PneumoniaDataset(root_dir=ROOT_DIR_VAL, transform=val_test_transform)   # Use val_test_transform

In [16]:
# Safety check: ensure data was found
if len(train_dataset) == 0:
    print("❌ No images found! Please check the 'ROOT_DIR' paths at the top of the script.")
else:
    print(f"✅ Data loaded! Training images: {len(train_dataset)}")

✅ Data loaded! Training images: 5216


In [19]:
# Colab allows num_workers=2 safely. pin_memory=True speeds up GPU transfer.
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2, pin_memory=True)

In [20]:
# --- MODEL SETUP ---
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, 2) # Change output to 2 classes
model = model.to(device)

In [21]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [22]:
# --- TRAINING LOOP ---
num_epochs = 15

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    # TQDM creates a progress bar
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=True)

    for images, labels in loop:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Calculate training accuracy on the fly
        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

        # Update progress bar
        loop.set_postfix(loss=loss.item(), acc=100 * correct_train / total_train)

    epoch_loss = running_loss / len(train_loader)
    train_acc = 100 * correct_train / total_train
    print(f"Epoch {epoch+1} Result: Loss: {epoch_loss:.4f} | Train Acc: {train_acc:.2f}%")

    # --- VALIDATION ---
    model.eval()
    val_labels = []
    val_preds = []

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            val_labels.extend(labels.cpu().numpy())
            val_preds.extend(preds.cpu().numpy())

    val_accuracy = accuracy_score(val_labels, val_preds)
    print(f"Validation Accuracy: {val_accuracy:.4f}")
    print("-" * 50)

Epoch 1/15:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 1 Result: Loss: 0.1071 | Train Acc: 95.82%
Validation Accuracy: 1.0000
--------------------------------------------------


Epoch 2/15:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 2 Result: Loss: 0.0560 | Train Acc: 98.03%
Validation Accuracy: 1.0000
--------------------------------------------------


Epoch 3/15:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 3 Result: Loss: 0.0459 | Train Acc: 98.29%
Validation Accuracy: 0.6250
--------------------------------------------------


Epoch 4/15:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 4 Result: Loss: 0.0361 | Train Acc: 98.54%
Validation Accuracy: 1.0000
--------------------------------------------------


Epoch 5/15:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 5 Result: Loss: 0.0307 | Train Acc: 98.85%
Validation Accuracy: 0.8750
--------------------------------------------------


Epoch 6/15:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 6 Result: Loss: 0.0268 | Train Acc: 99.04%
Validation Accuracy: 0.8125
--------------------------------------------------


Epoch 7/15:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 7 Result: Loss: 0.0234 | Train Acc: 99.18%
Validation Accuracy: 0.8750
--------------------------------------------------


Epoch 8/15:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 8 Result: Loss: 0.0196 | Train Acc: 99.29%
Validation Accuracy: 0.9375
--------------------------------------------------


Epoch 9/15:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 9 Result: Loss: 0.0158 | Train Acc: 99.46%
Validation Accuracy: 0.8125
--------------------------------------------------


Epoch 10/15:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 10 Result: Loss: 0.0163 | Train Acc: 99.46%
Validation Accuracy: 0.9375
--------------------------------------------------


Epoch 11/15:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 11 Result: Loss: 0.0197 | Train Acc: 99.33%
Validation Accuracy: 0.8125
--------------------------------------------------


Epoch 12/15:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 12 Result: Loss: 0.0160 | Train Acc: 99.42%
Validation Accuracy: 0.8125
--------------------------------------------------


Epoch 13/15:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 13 Result: Loss: 0.0107 | Train Acc: 99.71%
Validation Accuracy: 0.9375
--------------------------------------------------


Epoch 14/15:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 14 Result: Loss: 0.0098 | Train Acc: 99.67%
Validation Accuracy: 1.0000
--------------------------------------------------


Epoch 15/15:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 15 Result: Loss: 0.0058 | Train Acc: 99.79%
Validation Accuracy: 0.8750
--------------------------------------------------


In [23]:
# --- TESTING ---
print("Starting Final Evaluation on Test Set...")
model.eval()
test_labels = []
test_preds = []

with torch.no_grad():
    for images, labels in tqdm(test_loader, desc="Testing"):
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        test_labels.extend(labels.cpu().numpy())
        test_preds.extend(preds.cpu().numpy())

test_accuracy = accuracy_score(test_labels, test_preds)
print(f"🏆 Final Test Accuracy: {test_accuracy:.4f}")

Starting Final Evaluation on Test Set...


Testing:   0%|          | 0/20 [00:00<?, ?it/s]

🏆 Final Test Accuracy: 0.7853


In [24]:
# Save Model
torch.save(model.state_dict(), 'pneumonia_resnet18_colab.pth')
print("Model saved to Colab files.")

Model saved to Colab files.
