# 3. Classify gender of Chest X-ray images

In [1]:
import torch
import torchvision
from torch.utils.data import Dataset
from torchvision.io import read_image
from torchvision.utils import save_image
from torchvision.transforms import ToTensor, Compose, Resize, Grayscale, Normalize, Lambda
import os
import torchvision.transforms as T
import pydicom
import matplotlib.pyplot as plt
import pandas as pd
from torch.utils.data import DataLoader
from torch import optim
import warnings
warnings.filterwarnings("ignore")
from sklearn.metrics import roc_auc_score

In [2]:
device = torch.device('cuda')

In [3]:
# Define the training image directory and annotations csv
train_img_dir = '/kaggle/input/minijsrtgender/dataset/Gender01'
train_annotations_file = '/kaggle/input/minijsrtgender/dataset/Gender01/list_train.txt'

# Define the testing image directory and annotations csv
test_img_dir = '/kaggle/input/minijsrtgender/dataset/Gender01'
test_annotations_file = '/kaggle/input/minijsrtgender/dataset/Gender01/list_test.txt'

# Define the output classes here
num_classes = 1

# Defining the label mapping for each output
label_mapping = {
    'female': 0,
    'male': 1
}

In [4]:
# Define a custom class for dataset 
class CustomDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = self.img_dir + self.img_labels.iloc[idx, 0]
        image = read_image(img_path)
        image = T.ToPILImage() (image)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [5]:
# Definining the resnet18 model
from torchvision.models import resnet18, ResNet18_Weights
import torch.nn as nn
class Resnet18Classifier(nn.Module):
    def __init__(self, num_classes):
        super(Resnet18Classifier, self).__init__()
        self.resnet18 = resnet18(pretrained=True)
        self.resnet18.fc = nn.Linear(self.resnet18.fc.in_features, num_classes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.sigmoid(self.resnet18(x))
#         return self.resnet18(x)

In [6]:
# Defining the transform for loading the data
transform = Compose([
    ToTensor(),
    Lambda(lambda x: x.repeat(3,1,1)),
    Normalize((0.5,), (0.5,))
])

# Defining the training dataset and data loader
train_dataset = CustomDataset(
    annotations_file=train_annotations_file,
    img_dir=train_img_dir,
    transform=transform,
)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Defining the testing dataset and data loader
test_dataset = CustomDataset(
    annotations_file=test_annotations_file,
    img_dir=test_img_dir,
    transform=transform
)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=True)

In [7]:
# Initializing the resnet18 classifier with number of output models
resnet_18_classifier = Resnet18Classifier(num_classes).to(device)

# Defining the loss_function and the optimizer
loss_func = nn.BCELoss()
optimizer = optim.Adam(resnet_18_classifier.parameters(), lr = 0.001)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 204MB/s]


In [8]:
resnet_18_classifier

Resnet18Classifier(
  (resnet18): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=Tru

In [9]:
# Training the model
num_epochs = 100
for epoch in range(num_epochs):
    for images, labels in train_loader:
        numeric_labels = [label_mapping[label] for label in labels]
        numeric_labels_tensor = torch.tensor(numeric_labels)
        images, labels = images.to(device), numeric_labels_tensor.to(device)
        optimizer.zero_grad()
        outputs = resnet_18_classifier(images)
        outputs = outputs.float().squeeze()
        labels = labels.float()
#         print(f"Outputs are {outputs}")                
#         print(f"Labels are {labels}")
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

print("Training finished")

Epoch [1/100], Loss: 0.5170
Epoch [2/100], Loss: 0.2898
Epoch [3/100], Loss: 0.1058
Epoch [4/100], Loss: 0.0242
Epoch [5/100], Loss: 0.0050
Epoch [6/100], Loss: 0.0046
Epoch [7/100], Loss: 0.0004
Epoch [8/100], Loss: 0.0010
Epoch [9/100], Loss: 0.0011
Epoch [10/100], Loss: 0.0011
Epoch [11/100], Loss: 0.0005
Epoch [12/100], Loss: 0.0006
Epoch [13/100], Loss: 0.0004
Epoch [14/100], Loss: 0.0006
Epoch [15/100], Loss: 0.0291
Epoch [16/100], Loss: 0.0046
Epoch [17/100], Loss: 0.0005
Epoch [18/100], Loss: 0.0094
Epoch [19/100], Loss: 0.0354
Epoch [20/100], Loss: 0.3425
Epoch [21/100], Loss: 0.4231
Epoch [22/100], Loss: 0.0161
Epoch [23/100], Loss: 0.0235
Epoch [24/100], Loss: 0.6612
Epoch [25/100], Loss: 0.1824
Epoch [26/100], Loss: 0.0082
Epoch [27/100], Loss: 0.4943
Epoch [28/100], Loss: 0.2190
Epoch [29/100], Loss: 0.0193
Epoch [30/100], Loss: 0.1273
Epoch [31/100], Loss: 0.2497
Epoch [32/100], Loss: 0.4035
Epoch [33/100], Loss: 0.0220
Epoch [34/100], Loss: 0.2471
Epoch [35/100], Loss: 0

In [10]:
# Testing the model
correct = 0
total_auc = 0
total_counts = 0
# Don't calculate gradients during evaluation
with torch.no_grad():
    resnet_18_classifier.eval()
    for images, labels in test_loader:
        numeric_labels = [label_mapping[label] for label in labels]
        numeric_labels_tensor = torch.tensor(numeric_labels)
        images, labels = images.to(device), numeric_labels_tensor.to(device)
        outputs = resnet_18_classifier(images)
        labels = labels.cpu()
        outputs = outputs.cpu()
        auc = roc_auc_score(labels, outputs)
        total_auc += auc
        total_counts += 1
        print(f'Auc for batch: {auc:.2f}')

final_auc = total_auc / total_counts
print(f"Test auc for gender classification: {final_auc:.2f}")

Auc for batch: 0.98
Auc for batch: 0.73
Auc for batch: 0.89
Auc for batch: 1.00
Auc for batch: 0.83
Auc for batch: 1.00
Test auc for gender classification: 0.91
