# RSNA 2024 Deep Learning Lab

# https://tinyurl.com/c43js5va

## December 3, 2024
## Authors: Ian Pan, MD; Felipe Kitamura, MD, PhD

## Objectives:
*   Train a simple deep learning to predict bone age from pediatric hand radiographs
*   Deploy the model using Gradio and Hugging Face

## Prerequisites:
*   Basic Python programming using standard libraries (e.g., NumPy, Pandas) and the PyTorch deep learning library
*   Basic understanding of convolutional neural networks and deep learning

## Let's get started!



## Install Packages

In [None]:
!pip -qq install lightning gradio

## Import Modules

In [None]:
import albumentations as A
import cv2
import kagglehub
import lightning
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import os
import pandas as pd
import timm
import torch
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader

## Download Data

This dataset contains a subset of 1,200 images from the RSNA Pediatric Bone Age Challenge. The images are downsampled to half their original resolutions to further decrease the size of the dataset for this demonstration. Learn more about the challenge here: https://www.rsna.org/rsnai/ai-image-challenge/rsna-pediatric-bone-age-challenge-2017

In [None]:
path = kagglehub.dataset_download("vaillant/rsna-pediatric-bone-age-challenge-n1200")
print(f"Dataset saved to: {path}")

## Install Packages

## Explore Data

Note that the original bone age labels are in units of months. We will convert them to years.

In [None]:
df = pd.read_csv(os.path.join(path, "train.csv"))
df["bone_age"] = df.bone_age / 12.
df["filepath"] = df.pid.apply(lambda x: os.path.join(path, "images", f"{x}.png"))
df.head()

In [None]:
df.female.value_counts()

In [None]:
df.bone_age.describe()

In [None]:
sample = np.random.choice(df.filepath, 4, replace=False)
for idx, samp in enumerate(sample):
  img = cv2.imread(samp, 0)
  plt.subplot(2, 2, idx + 1)
  plt.imshow(img, cmap="gray")

plt.show()

## Create Training, Validation, and Test Sets

In [None]:
def train_val_test_split(df, train_pct, test_pct):
  n_train = int(len(df) * train_pct)
  n_test = int(len(df) * test_pct)
  n_val = len(df) - n_train - n_test
  print(f"TRAIN : n={n_train} {train_pct*100:0.1f}%")
  print(f"VAL   : n={n_val} {(1-train_pct-test_pct)*100:0.1f}%")
  print(f"TEST  : n={n_test} {test_pct*100:0.1f}%")
  all_indices = np.arange(len(df), dtype=np.int64)
  train_indices = np.random.choice(all_indices, n_train, replace=False)
  not_train_indices = list(set(all_indices) - set(train_indices))
  test_indices = np.random.choice(not_train_indices, n_test, replace=False)
  val_indices = list(set(not_train_indices) - set(test_indices))
  return df.iloc[train_indices], df.iloc[val_indices], df.iloc[test_indices]


train_df, val_df, test_df = train_val_test_split(df, train_pct=0.7, test_pct=0.2)

## Create a PyTorch Datasets and Data Loaders

In [None]:
class BoneAgeDataset(Dataset):

  def __init__(self,
               filepaths,
               labels,
               female,
               transforms=None):
    self.filepaths = filepaths
    self.labels = labels
    self.female = female
    self.transforms = transforms

  def __len__(self):
    return len(self.filepaths)

  def __getitem__(self, i):
    img = cv2.imread(self.filepaths[i], 0)
    label = self.labels[i]

    if self.transforms:
      img = self.transforms(image=img)["image"]

    img = torch.from_numpy(img).float()
    # simple normalization from 8-bit [0, 255] -> float [0, 1]
    img = img / 255.
    img = img.unsqueeze(0) # add channel dimension H, W -> C, H, W

    return {"x": img, "y": torch.Tensor([label]), "female": torch.tensor(self.female[i])}


Specify the image dimensions and batch size here. Larger image dimensions and batch size require more GPU memory. While increasing image size can often lead to improved performance (up to a certain point), training and inference times will also be increased, and batch sizes will need to be decreased accordingly.

In [None]:
IMAGE_HEIGHT, IMAGE_WIDTH = 512, 512
BATCH_SIZE = 16

In [None]:
train_transforms = A.Compose([
    A.Resize(IMAGE_HEIGHT, IMAGE_WIDTH, p=1),
    A.HorizontalFlip(p=0.5)
])

val_transforms = A.Compose([
    A.Resize(IMAGE_HEIGHT, IMAGE_WIDTH, p=1)
])

train_dataset = BoneAgeDataset(filepaths=train_df.filepath.values,
                               labels=train_df.bone_age.values,
                               female=train_df.female.values,
                               transforms=train_transforms)

val_dataset = BoneAgeDataset(filepaths=val_df.filepath.values,
                             labels=val_df.bone_age.values,
                             female=val_df.female.values,
                             transforms=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=2, pin_memory=True, persistent_workers=True)

val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE * 2, shuffle=False, drop_last=False, num_workers=2, pin_memory=True, persistent_workers=True)

## Create Your Model

The PyTorch Image Models library (https://github.com/huggingface/pytorch-image-models) has an extensive selection of neural nets, including convolutional neural nets and transformers, with pretrained weights.

Because our radiographs are grayscale, we must specify the number of input channels as 1 (`in_chans=1`). We are predicting bone age as a regression tasks, so we will also specify `num_classes=1`.

To avoid writing too much training code from scratch, we will be using PyTorch Lightning (https://lightning.ai).

In [None]:
class BoneAgeModel(lightning.LightningModule):

  def __init__(self, net, optimizer, scheduler, loss_fn):
    super().__init__()
    self.net = net
    self.optimizer = optimizer
    self.scheduler = scheduler
    self.loss_fn = loss_fn

    self.val_losses = []

  def training_step(self, batch, batch_index):
    out = self.net(batch["x"])
    loss = self.loss_fn(out, batch["y"])
    return loss

  def validation_step(self, batch, batch_index):
    out = self.net(batch["x"])
    loss = self.loss_fn(out, batch["y"])
    self.val_losses.append(loss.item())

  def on_validation_epoch_end(self, *args, **kwargs):
    val_loss = np.mean(self.val_losses)
    self.val_losses = []
    print(f"Validation Loss : {val_loss:0.3f}")

  def configure_optimizers(self):
    lr_scheduler = {"scheduler": self.scheduler, "interval": "step"}
    return {"optimizer": self.optimizer, "lr_scheduler": lr_scheduler}

In [None]:
BACKBONE = "resnet18d"
LEARNING_RATE = 3e-4
NUM_EPOCHS = 10

net = timm.create_model(BACKBONE, pretrained=True, in_chans=1, num_classes=1)
optimizer = torch.optim.AdamW(net.parameters(), LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                       T_max=NUM_EPOCHS * len(train_loader),
                                                       eta_min=0.0)
loss_fn = nn.L1Loss() # equivalent to mean absolute error

model = BoneAgeModel(net, optimizer, scheduler, loss_fn)

## Train Model

In [None]:
callbacks = [
  lightning.pytorch.callbacks.ModelCheckpoint(
    dirpath="./checkpoints/",
    filename="{epoch:03d}",
    save_last=True,
    save_weights_only=True,
    save_top_k=1
  )
]


trainer = lightning.Trainer(max_epochs=NUM_EPOCHS, check_val_every_n_epoch=2, callbacks=callbacks)
trainer.fit(model, train_loader, val_loader)

In [None]:
print(f"Trained model weights saved to : {trainer.callbacks[-1].best_model_path}")

## Create Your Demo Using Gradio

In [None]:
!cp /content/checkpoints/epoch=009.ckpt /content/epoch=009.ckpt

In [None]:
import gradio as gr
import lightning
import numpy as np
import os
import pandas as pd
import timm
import torch
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader

BACKBONE = "resnet18d"
IMAGE_HEIGHT, IMAGE_WIDTH = 512, 512

trained_weights_path = "epoch=009.ckpt"
trained_weights = torch.load(trained_weights_path, map_location=torch.device('cpu'))["state_dict"]

# recreate the model
class BoneAgeModel(lightning.LightningModule):

  def __init__(self, net, optimizer, scheduler, loss_fn):
    super().__init__()
    self.net = net
    self.optimizer = optimizer
    self.scheduler = scheduler
    self.loss_fn = loss_fn

    self.val_losses = []

  def training_step(self, batch, batch_index):
    out = self.net(batch["x"])
    loss = self.loss_fn(out, batch["y"])
    return loss

  def validation_step(self, batch, batch_index):
    out = self.net(batch["x"])
    loss = self.loss_fn(out, batch["y"])
    self.val_losses.append(loss.item())

  def on_validation_epoch_end(self, *args, **kwargs):
    val_loss = np.mean(self.val_losses)
    self.val_losses = []
    print(f"Validation Loss : {val_loss:0.3f}")

  def configure_optimizers(self):
    lr_scheduler = {"scheduler": self.scheduler, "interval": "step"}
    return {"optimizer": self.optimizer, "lr_scheduler": lr_scheduler}

net = timm.create_model(BACKBONE, pretrained=True, in_chans=1, num_classes=1)
trained_model = BoneAgeModel(net, None, None, None)
trained_model.load_state_dict(trained_weights)
trained_model.eval()


def predict_bone_age(Radiograph):
  img = torch.from_numpy(Radiograph)
  img = img.unsqueeze(0).unsqueeze(0) # add channel and batch dimensions
  img = img / 255. # use same normalization as in the PyTorch dataset
  with torch.inference_mode():
    bone_age = trained_model.net(img)[0].item()
  years = int(bone_age)
  months = round((bone_age - years) * 12)
  return f"Predicted Bone Age: {years} years, {months} months"


image = gr.Image(height=IMAGE_HEIGHT, width=IMAGE_WIDTH, image_mode="L") # L for grayscale
label = gr.Label(show_label=True, label="Bone Age Prediction")

demo = gr.Interface(fn=predict_bone_age,
                    inputs=[image],
                    outputs=label)

demo.launch(debug=True)

To deploy the gradio interface as a separate website, you will need a requirements.txt with the following content:


```
lightning
gradio
timm
```



A few things to note:
*   The image input box shows "Radiograph" in the top left corner, which corresponds to the name of the argument in the `predict_bone_age` function which was passed to `gr.Interface`.
*   We use `gr.Label` as our output because we are not displaying any images or videos, just the bone age prediction as a string. If we wanted to display an image, we could use `gr.Image`.
*   Though we only provided 1 input, note that this was passed as a list to `gr.Interface` and thus multiple inputs can be provided (see bonus section below).



## **Bonus:**

## Improve Performance by Incorporating Patient Sex

Image classification can be oversimplified into 2 steps: feature extraction followed by linear classification (or regression).

The neural net is doing the bulk of the work by essentially compressing the original high-dimensional output into a 1-dimensional vector. The final linear layer then maps this vector to predict whatever target you have specified.

Bone age depends on the sex of the patient, and thus this information will likely be useful for the model to have.

Here, we add on the patient sex to the 1-dimensional feature vector by passing the binary value (female or not) through an embedding layer (a glorified lookup table) before the final regression step through the linear layer.

In [None]:
class Net(nn.Module):

  def __init__(self, backbone):
    super().__init__()
    self.backbone = timm.create_model(backbone, pretrained=True, in_chans=1, num_classes=0)
    dim_feats = self.backbone(torch.randn((2, 1, 64, 64))).size(1)
    self.embed = nn.Embedding(2, 32)
    self.regressor = nn.Linear(dim_feats + 32, 1)

  def forward(self, x, female):
    feat = self.backbone(x)
    feat = torch.cat([feat, self.embed(female.long())], dim=1)
    return self.regressor(feat)


class BoneAgeModelV2(BoneAgeModel):

  def training_step(self, batch, batch_index):
    out = self.net(batch["x"], batch["female"])
    loss = self.loss_fn(out, batch["y"])
    return loss

  def validation_step(self, batch, batch_index):
    out = self.net(batch["x"], batch["female"])
    loss = self.loss_fn(out, batch["y"])
    self.val_losses.append(loss.item())

In [None]:
net = Net(BACKBONE)
optimizer = torch.optim.AdamW(net.parameters(), LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                       T_max=NUM_EPOCHS * len(train_loader),
                                                       eta_min=0.0)
loss_fn = nn.L1Loss()

model = BoneAgeModelV2(net, optimizer, scheduler, loss_fn)

trainer = lightning.Trainer(max_epochs=NUM_EPOCHS)
trainer.fit(model, train_loader, val_loader)

In [None]:
print(f"Trained model weights saved to : {trainer.callbacks[-1].best_model_path}")
!cp {trainer.callbacks[-1].best_model_path} /content/{trainer.callbacks[-1].best_model_path.split("/")[-1]}

In [None]:
import gradio as gr
import lightning
import numpy as np
import os
import pandas as pd
import timm
import torch
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader

BACKBONE = "resnet18d"
IMAGE_HEIGHT, IMAGE_WIDTH = 512, 512

trained_weights_path = "epoch=9-step=520.ckpt"
trained_weights = torch.load(trained_weights_path, map_location=torch.device('cpu'))["state_dict"]

# recreate the model

class Net(nn.Module):

  def __init__(self, backbone):
    super().__init__()
    self.backbone = timm.create_model(backbone, pretrained=True, in_chans=1, num_classes=0)
    dim_feats = self.backbone(torch.randn((2, 1, 64, 64))).size(1)
    self.embed = nn.Embedding(2, 32)
    self.regressor = nn.Linear(dim_feats + 32, 1)

  def forward(self, x, female):
    feat = self.backbone(x)
    feat = torch.cat([feat, self.embed(female.long())], dim=1)
    return self.regressor(feat)

class BoneAgeModel(lightning.LightningModule):

  def __init__(self, net, optimizer, scheduler, loss_fn):
    super().__init__()
    self.net = net
    self.optimizer = optimizer
    self.scheduler = scheduler
    self.loss_fn = loss_fn

    self.val_losses = []

  def training_step(self, batch, batch_index):
    out = self.net(batch["x"])
    loss = self.loss_fn(out, batch["y"])
    return loss

  def validation_step(self, batch, batch_index):
    out = self.net(batch["x"])
    loss = self.loss_fn(out, batch["y"])
    self.val_losses.append(loss.item())

  def on_validation_epoch_end(self, *args, **kwargs):
    val_loss = np.mean(self.val_losses)
    self.val_losses = []
    print(f"Validation Loss : {val_loss:0.3f}")

  def configure_optimizers(self):
    lr_scheduler = {"scheduler": self.scheduler, "interval": "step"}
    return {"optimizer": self.optimizer, "lr_scheduler": lr_scheduler}

class BoneAgeModelV2(BoneAgeModel):

  def training_step(self, batch, batch_index):
    out = self.net(batch["x"], batch["female"])
    loss = self.loss_fn(out, batch["y"])
    return loss

  def validation_step(self, batch, batch_index):
    out = self.net(batch["x"], batch["female"])
    loss = self.loss_fn(out, batch["y"])
    self.val_losses.append(loss.item())


net = Net(BACKBONE)
trained_model = BoneAgeModelV2(net, None, None, None)
trained_model.load_state_dict(trained_weights)
trained_model.eval()


def predict_bone_age(Radiograph, Sex):
  img = torch.from_numpy(Radiograph)
  img = img.unsqueeze(0).unsqueeze(0) # add channel and batch dimensions
  img = img / 255. # use same normalization as in the PyTorch dataset
  binary_sex = torch.tensor(Sex == "Female").unsqueeze(0)
  with torch.inference_mode():
    bone_age = trained_model.net(img, binary_sex)[0].item()
  years = int(bone_age)
  months = round((bone_age - years) * 12)
  return f"Predicted Bone Age: {years} years, {months} months"


image = gr.Image(height=IMAGE_HEIGHT, width=IMAGE_WIDTH, image_mode="L") # L for grayscale
# additional input
sex = gr.Radio(["Male", "Female"], type="index")
label = gr.Label(show_label=True, label="Bone Age Prediction")

demo = gr.Interface(fn=predict_bone_age,
                    inputs=[image, sex], # <- adding sex as an input
                    outputs=label)

demo.launch(debug=True)

### Bonus 2: Deploy on HuggingFace Spaces

[Instructions here.](https://scribehow.com/shared/Create_and_Deploy_a_Model_on_Hugging_Face__1JLjq96aRGqyqf1N3O7Yjg)
