Importing required libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import pandas as pd
import numpy as np
import os
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

In [2]:
# Configuration
CHEXPERT_BASE_DIR_LABEL = "C:\\Users\\Mohammed Arshan\\Downloads\\archive\\CheXpert-v1.0-small"
CHEXPERT_BASE_DIR = "C:\\Users\\Mohammed Arshan\\Downloads\\archive"
#had to create two different filepaths as it was leading to an error
TRAIN_CSV_PATH = os.path.join(CHEXPERT_BASE_DIR_LABEL, 'train.csv')

In [3]:
# Hyperparameters for fast prototyping
SUBSAMPLE_SIZE = 5000
BATCH_SIZE = 32
TARGET_EPOCHS = 1
LEARNING_RATE = 0.001
IMAGE_SIZE = 224

In [4]:
# Five Official CheXpert Benchmark Labels
TARGET_LABELS = [
    'Atelectasis', 
    'Cardiomegaly', 
    'Edema', 
    'Consolidation', 
    'Pleural Effusion'
]

NUM_CLASSES = len(TARGET_LABELS)

In [5]:
# U-Ones Strategy: Treat uncertainty (-1) as POSITIVE (1) for these specific labels
U_ONES_LABELS = ['Edema', 'Cardiomegaly', 'Atelectasis', 'Pleural Effusion'] 

In [17]:
device = torch.device("cpu")
print(f"Using device: {device}")

Using device: cpu


Custom Dataset Class and Data Preparation

In [6]:
class CheXpertDataset(Dataset):
    def __init__(self, dataframe, base_dir, transform=None):
        self.df = dataframe #Stores the input DataFrame 
        self.base_dir = base_dir #Stores the base directory path
        self.transform = transform #Stores the image transformations
        self.labels = dataframe[TARGET_LABELS].values.astype(np.float32) #Extracts the label columns from the DataFrame, converts them into a NumPy array, and casts the data type to 32-bit floating point, which is standard for deep learning model inputs/outputs.
    
    def __len__(self): #Defines the length method, which is required by PyTorch Dataset
        return len(self.df)

    #Defines the core item getter method, which is required by PyTorch Dataset and is called by the DataLoader to retrieve a single sample given an idx (index).
    def __getitem__(self, idx):
        # Construct full image path
        img_path_relative = self.df.iloc[idx]['Path'] #Retrieves the relative image path
        img_path = os.path.join(self.base_dir, img_path_relative)

        # Load image (Grayscale or RGB)
        image = Image.open(img_path).convert('RGB')
        #.convert('RGB') ensures the image is read with three color channels, even if the original image is grayscale

        # Apply transforms (resize, normalization)
        if self.transform:
            image = self.transform(image)

        # Get labels
        label = self.labels[idx]

        return image, torch.tensor(label, dtype=torch.float32)

Data Loading and U-Ones Strategy Application

In [7]:
df_train = pd.read_csv(TRAIN_CSV_PATH)

In [8]:
# Subsampling and U-Labeling
df_train_subsampled = df_train.sample(n=SUBSAMPLE_SIZE, random_state=42)

In [10]:
for label in TARGET_LABELS:
    # Fill NaN with 0
    df_train_subsampled[label] = df_train_subsampled[label].fillna(0)

    if label in U_ONES_LABELS:
        # U-Ones strategy: Replace -1 (Uncertain) with 1 (Positive)
        df_train_subsampled[label] = df_train_subsampled[label].replace({-1: 1})
    else:
        # U-Zeros strategy: Replace -1 (Uncertain) with 0 (Negative)
        df_train_subsampled[label] = df_train_subsampled[label].replace({-1: 0})

In [11]:
# Train/Validation Split
train_df, test_df = train_test_split(df_train_subsampled, test_size=0.2, random_state=42)

In [12]:
# Image Transforms
# Use ImageNet normalization as we are using a model pre-trained on ImageNet

data_transforms = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [13]:
# Create Dataset and DataLoader instances
train_dataset = CheXpertDataset(train_df, CHEXPERT_BASE_DIR, data_transforms)
val_dataset = CheXpertDataset(test_df, CHEXPERT_BASE_DIR, data_transforms)

In [14]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

Model Setup (DenseNet121 TL)

In [18]:
def setup_densenet_model(num_classes):
    # Load the pre-trained DenseNet121 model
    # Using 'DEFAULT' to get the recommended ImageNet weights
    model = models.densenet121(weights=models.DenseNet121_Weights.DEFAULT)

    # Freeze all the parameters
    for param in model.parameters():
        param.requires_grad = False

    # Replace the final classification layer (model.classifier)
    num_ftrs = model.classifier.in_features

    # New classifier for NUM_CLASSES with Sigmoid activation for multi-label
    model.classifier = nn.Sequential(
        nn.Linear(num_ftrs, 512),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(512, num_classes),
        nn.Sigmoid()
    )

    model = model.to(device)
    return model

In [19]:
model = setup_densenet_model(NUM_CLASSES)

In [20]:
# Use Binary Cross-Entropy Loss (BCE) for multi-label classification
# nn.BCELoss expects Sigmoid output (which we added to the model)
criterion = nn.BCELoss()

# Optimizer only updates the new, unfrozen classifier layers
optimizer = optim.Adam(model.classifier.parameters(), lr=LEARNING_RATE)

Training and Evaluation Loop

In [21]:
def train_model(model, criterion, optimizer, train_loader, valid_loader, epochs):
    history = {'train_loss':[], 'val_auc':[]}

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            #loss.item() gets the Python number for the loss, and multiplying by inputs.size(0) (the batch size) ensures the loss is weighted by the sample count.

        epoch_loss = running_loss / len(train_loader.dataset)

        #  Validation Phase
        model.eval()
        all_labels = []
        all_preds = []

        with torch.no_grad(): #Disables gradient tracking within this block
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                outputs = model(inputs)

                all_labels.extend(labels.cpu().numpy()) #Moves the labels back to the CPU and converts them to a NumPy array before appending them to the list.
                all_preds.extend(outputs.cpu().numpy())

        all_labels = np.array(all_labels)
        all_preds = np.array(all_preds)

        # Calculate AUC for each of the 5 labels and take the mean
        # roc_auc_score handles multi-label inputs directly
        try:
            val_auc = roc_auc_score(all_labels, all_preds, average='macro')
        except ValueError:
            # Occurs if a class has no positive examples in the small subsample
            val_auc = 0.5 

        history['train_loss'].append(epoch_loss)
        history['val_auc'].append(val_auc)

        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {epoch_loss:.4f} | Val AUC (Macro): {val_auc:.4f}")

    return history

In [22]:
#Run the training
history = train_model(model, criterion, optimizer, train_loader, val_loader, TARGET_EPOCHS)

Epoch 1/1 | Train Loss: 0.5104 | Val AUC (Macro): 0.6416


In [23]:
#Saving the model weights
#Define a path to save the state dictionary
MODEL_SAVE_PATH = 'densenet121_chexpert_tl_weights.pth'

# Save only the state dict (weights)
torch.save(model.state_dict(), MODEL_SAVE_PATH)