# Age Prediction of Patients by Chest X-rays

In [1]:
import torch
import torchvision
from torch.utils.data import Dataset
from torchvision.io import read_image
from torchvision.utils import save_image
from torchvision.transforms import ToTensor, Compose, Resize, Grayscale, Normalize, Lambda
import os
import torchvision.transforms as T
import pydicom
import matplotlib.pyplot as plt
import pandas as pd
from torch.utils.data import DataLoader
from torch import optim
from sklearn.metrics import roc_auc_score
import warnings
warnings.filterwarnings("ignore")
import numpy as np



In [2]:
device = torch.device('cuda')

In [3]:
# Define the training image directory and annotations csv
train_img_dir = '/kaggle/input/minijsrtageprediction/XPAge01_RGB/XP/JPGs/'
train_annotations_file = '/kaggle/input/minijsrtageprediction/XPAge01_RGB/XP/trainingdata.csv'

# Define the testing image directory and annotations csv
test_img_dir = '/kaggle/input/minijsrtageprediction/XPAge01_RGB/XP/JPGs/'
test_annotations_file = '/kaggle/input/minijsrtageprediction/XPAge01_RGB/XP/testdata.csv'

# Define the output classes here
num_classes = 100

In [4]:
df = pd.read_csv(test_annotations_file)
df['age'].max()

89

In [5]:
# Define a custom class for dataset 
class CustomDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = self.img_dir + self.img_labels.iloc[idx, 0]
        image = read_image(img_path)
        image = T.ToPILImage() (image)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [6]:
# Definining the resnet18 model
from torchvision.models import resnet18, ResNet18_Weights
import torch.nn as nn
class Resnet18Classifier(nn.Module):
    def __init__(self, num_classes):
        super(Resnet18Classifier, self).__init__()
        self.resnet18 = resnet18(pretrained=True).eval()
        num_ftrs = self.resnet18.fc.in_features
        self.resnet18.fc = nn.Linear(num_ftrs, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.softmax(self.resnet18(x))

In [7]:
# Defining the transform for loading the data
transform = Compose([
    ToTensor(),
    Lambda(lambda x: x.repeat(3,1,1)),
    Resize(224)
])

# Defining the training dataset and data loader
train_dataset = CustomDataset(
    annotations_file=train_annotations_file,
    img_dir=train_img_dir,
    transform=transform,
)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Defining the testing dataset and data loader
test_dataset = CustomDataset(
    annotations_file=test_annotations_file,
    img_dir=test_img_dir,
    transform=transform
)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=True)

In [8]:
# Initializing the resnet18 classifier with number of output models
resnet_18_classifier = Resnet18Classifier(num_classes).to(device)

# Defining the loss_function and the optimizer
loss_func = nn.BCELoss()
optimizer = optim.Adam(resnet_18_classifier.parameters(), lr = 0.001)
resnet_18_classifier

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 182MB/s]


Resnet18Classifier(
  (resnet18): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=Tru

In [9]:
def vectorize(x):
    one_hot_vec = torch.zeros(100, dtype=torch.int8)
    one_hot_vec[x] = 1
    return one_hot_vec

In [10]:
# Training the model
num_epochs = 50
for epoch in range(num_epochs):
    for images, labels in train_loader:
        one_hot_labels = torch.cat(list(map(vectorize, labels)))
#         print(f"Label is {labels[0]}")
        one_hot_labels = one_hot_labels.view((-1, 100))
        one_hot_labels = one_hot_labels.to(torch.float32)
#         print(f"one_hot_labels is {one_hot_labels[0]}")
        images, labels = images.to(device), one_hot_labels.to(device)
        optimizer.zero_grad()
        outputs = resnet_18_classifier(images)
#         print(f"Output is {outputs[0]}")
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

print("Training finished")

Epoch [1/50], Loss: 0.0564
Epoch [2/50], Loss: 0.0547
Epoch [3/50], Loss: 0.0540
Epoch [4/50], Loss: 0.0530
Epoch [5/50], Loss: 0.0496
Epoch [6/50], Loss: 0.0483
Epoch [7/50], Loss: 0.0464
Epoch [8/50], Loss: 0.0492
Epoch [9/50], Loss: 0.0494
Epoch [10/50], Loss: 0.0468
Epoch [11/50], Loss: 0.0478
Epoch [12/50], Loss: 0.0458
Epoch [13/50], Loss: 0.0466
Epoch [14/50], Loss: 0.0483
Epoch [15/50], Loss: 0.0475
Epoch [16/50], Loss: 0.0483
Epoch [17/50], Loss: 0.0468
Epoch [18/50], Loss: 0.0481
Epoch [19/50], Loss: 0.0488
Epoch [20/50], Loss: 0.0476
Epoch [21/50], Loss: 0.0471
Epoch [22/50], Loss: 0.0483
Epoch [23/50], Loss: 0.0472
Epoch [24/50], Loss: 0.0465
Epoch [25/50], Loss: 0.0462
Epoch [26/50], Loss: 0.0477
Epoch [27/50], Loss: 0.0464
Epoch [28/50], Loss: 0.0471
Epoch [29/50], Loss: 0.0465
Epoch [30/50], Loss: 0.0476
Epoch [31/50], Loss: 0.0474
Epoch [32/50], Loss: 0.0507
Epoch [33/50], Loss: 0.0473
Epoch [34/50], Loss: 0.0467
Epoch [35/50], Loss: 0.0476
Epoch [36/50], Loss: 0.0472
E

In [11]:
# Testing the model
total = 0
auc = 0
with torch.no_grad():
    for images, labels in test_loader:
        one_hot_labels = torch.cat(list(map(vectorize, labels)))
        one_hot_labels = one_hot_labels.view((-1, 100))
        one_hot_labels = one_hot_labels.to(torch.float32)
        images, labels = images.to(device), one_hot_labels.to(device)
        outputs = resnet_18_classifier(images)
        labels = labels.cpu().numpy()
        outputs = outputs.cpu().numpy()
        for i, row in enumerate(labels):
            auc += roc_auc_score(row, outputs[i])
            total += 1

accuracy = auc / total
print(f"Test Accuracy for age: {accuracy:.2f}")

Test Accuracy for age: 0.74
