In [None]:
import pandas as pd
import numpy as np
import cv2
import os
import re
import pydicom
import gc
from tqdm import tqdm

from PIL import Image

import albumentations
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision

from sklearn import model_selection

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler

from matplotlib import pyplot as plt


import warnings
warnings.filterwarnings("ignore")

In [None]:
# checking if cuda is available
from torch import device as device_

device = device_("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
# reading the training data as train dataframe
train = pd.read_csv("../input/vinbigdata-chest-xray-abnormalities-detection/train.csv")
train.head()

In [None]:
# filling the NaN places in train data with 0
train.fillna(0, inplace=True)
train.head()

In [None]:
# changing the x_max & y_max columns where the class_name is No finding & class_id is 14 respectively as 1.0
train.loc[train["class_id"] == 14, ['x_max', 'y_max']] = 1.0
train.head()

In [None]:
# As FasterRCNN handles class_id == 0 as the background 

# So first we will increase the value of each class id by 1
train["class_id"] = train["class_id"] + 1
train.head()

In [None]:
# then make class_id - 15 = class_id 0 
train.loc[train["class_id"] == 15, ["class_id"]] = 0
train.head()

In [None]:
label_dict = {0 : "No finding",
              1 : "Aortic enlargement",
              2 : "Atelectasis",
              3 : "Calcification",
              4 : "Cardiomegaly",
              5 : "Consolidation",
              6 : "ILD",
              7 : "Infiltration",
              8 : "Lung Opacity",
              9 : "Nodule/Mass",
             10 : "Other lesion",
             11 : "Pleural effusion",
             12 : "Pleural thickening",
             13 : "Pneumothorax",
             14 : "Pulmonary fibrosis"
             }

In [None]:
# sort by image_id 
train.sort_values(by='image_id').head(20)

In [None]:
# checking the dicom image sample

dicom = pydicom.dcmread("../input/vinbigdata-chest-xray-abnormalities-detection/train/0007d316f756b3fa0baea2ff514ce945.dicom")
image = dicom.pixel_array * dicom.RescaleSlope + dicom.RescaleIntercept

plt.imshow(image, cmap='gray')

In [None]:
# create folds
df = train
no_of_folds = 5

df["kfold"] = -1    
df = df.sample(frac=1).reset_index(drop=True)
y = df.class_id.values
kf = model_selection.StratifiedKFold(n_splits=no_of_folds)

for f, (t_, v_) in enumerate(kf.split(X=df, y=y)):
    df.loc[v_, 'kfold'] = f
    
fold = 0
df_train = df[df.kfold != fold].reset_index(drop=True)
df_valid = df[df.kfold == fold].reset_index(drop=True)

df_valid.head()

In [None]:
from torch.utils.data import Dataset

class xray_dataset(Dataset):
    def __init__(self, df, transforms = None):
        self.df = df
        self.image_ids = df['image_id'].unique()
        self.transforms = transforms

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, index):
        image_id = self.image_ids[index]
        bboxes = self.df[self.df['image_id'] == image_id]
        bboxes = bboxes.reset_index(drop=True)

        dicom = pydicom.dcmread('../input/vinbigdata-chest-xray-abnormalities-detection/train/'+ self.image_ids[index] +'.dicom')
        image = dicom.pixel_array
        
        if "PhotometricInterpretation" in dicom:
            if dicom.PhotometricInterpretation == "MONOCHROME1":
                image = np.amax(image) - image
        
        intercept = dicom.RescaleIntercept if "RescaleIntercept" in dicom else 0.0
        slope = dicom.RescaleSlope if "RescaleSlope" in dicom else 1.0
        
        if slope != 1:
            image = slope * image.astype(np.float64)
            image = image.astype(np.int16)
        
        image += np.int16(intercept)        
        image = np.stack([image, image, image])
        image = image.astype('float32')
        image = image - image.min()
        image = image / image.max()
        image = image * 255.0
        image = image.transpose(1,2,0)
       
        if bboxes.loc[0, "class_id"] == 0:
            bboxes = bboxes.loc[[0], :]
        
        boxes = bboxes[['x_min', 'y_min', 'x_max', 'y_max']].values
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)
        labels = torch.tensor(bboxes["class_id"].values, dtype=torch.int64)

        # suppose all instances are not crowd
        iscrowd = torch.zeros((bboxes.shape[0],), dtype=torch.int64)

        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = torch.tensor([index])
        target['area'] = area
        target['iscrowd'] = iscrowd

        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            
            target['boxes'] = torch.tensor(sample['bboxes'])

        if target["boxes"].shape[0] == 0:
            # Albumentation cuts the target (class 14, 1x1px in the corner)
            target["boxes"] = torch.from_numpy(np.array([[0.0, 0.0, 1.0, 1.0]]))
            target["area"] = torch.tensor([1.0], dtype=torch.float32)
            target["labels"] = torch.tensor([0], dtype=torch.int64)
            
        return image, target

In [None]:
def get_train_transform():
    return albumentations.Compose([
        albumentations.Flip(0.5),
        albumentations.ShiftScaleRotate(scale_limit=0.1, rotate_limit=45, p=0.25),
        albumentations.LongestMaxSize(max_size=800, p=1.0),

        # FasterRCNN will normalize.
        albumentations.Normalize(mean=(0, 0, 0), std=(1, 1, 1), max_pixel_value=255.0, p=1.0),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

def get_valid_transform():
    return albumentations.Compose([
        albumentations.Normalize(mean=(0, 0, 0), std=(1, 1, 1), max_pixel_value=255.0, p=1.0),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [None]:
train_data = xray_dataset(df_train, get_train_transform())
val_data = xray_dataset(df_valid, get_valid_transform())

In [None]:
idx = 100
img = train_data[idx][0].permute(1, 2, 0).cpu().numpy()
boxes = train_data[idx][1]["boxes"].cpu().numpy().astype(np.int32)
labels = train_data[idx][1]["labels"].cpu().numpy().astype(np.int32)

fig, ax = plt.subplots(1, 1, figsize = (12,6))

for box, label in zip(boxes, labels):
    cv2.rectangle(img,
                (box[0], box[1]),
                (box[2], box[3]),
                (220, 0, 0), 
                 1)
    cv2.putText(img, 
                label_dict[label], 
                (box[0], box[1]-10), 
                cv2.FONT_HERSHEY_SIMPLEX, 
                0.9, 
                (220, 0, 0), 
                3)
      
ax.set_axis_off()
ax.imshow(img)

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

training_dataloader = DataLoader(
    train_data,
    batch_size=8,
    shuffle=True,
    num_workers=4,
    collate_fn=collate_fn,
    drop_last=True
)

val_dataloader = DataLoader(
    val_data,
    batch_size=8,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn,
    drop_last=True
)

In [None]:
# load a model; pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 15

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [None]:
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1)

num_epochs = 5

In [None]:
# defining the training loop
def train_loop_fn(data_loader, model, optimizer, device, scheduler=None):
    running_loss = 0.0
    
    for images, labels in tqdm(data_loader):
        
        images = list(image.to(device) for image in images)
        labels = [{k: v.to(device) for k, v in l.items()} for l in labels]
        
        optimizer.zero_grad()

        loss_dict = model(images, labels)

        losses = sum(loss for loss in loss_dict.values())

        losses.backward()
        optimizer.step()

        running_loss += losses.item()
        
        del images, labels
        gc.collect()
        torch.cuda.empty_cache()
            
    train_loss = running_loss / float(len(train_data))
    scheduler.step(train_loss)
    
    return train_loss

In [None]:
def eval_loop_fn(data_loader, model, device):
    running_loss = 0.0
    
    for images, labels in tqdm(data_loader):
        
        images = list(image.to(device) for image in images)
        labels = [{k: v.to(device) for k, v in l.items()} for l in labels]
        
        loss_dict = model(images, labels)
        losses = sum(loss for loss in loss_dict.values())

        running_loss += losses.item()
        
        del images , labels
        gc.collect()
        torch.cuda.empty_cache()
    
    valid_loss = running_loss / float(len(val_data))
    
    return valid_loss

In [None]:
def _run():
    no_of_folds = 5
    for i in range(no_of_folds):
        a_string = "*" * 20

        print(a_string, " FOLD NUMBER ", i, a_string)
        
        df_train = df[df.kfold != no_of_folds].reset_index(drop=True)
        df_valid = df[df.kfold == no_of_folds].reset_index(drop=True)
        
        all_losses = []
        
        for epoch in range(num_epochs):
            print(f"Epoch --> {epoch+1} / {num_epochs}")
            print(f"-------------------------------")

            train_loss = train_loop_fn(training_dataloader, model, optimizer, device, scheduler)
            print('training Loss: {:.4f}'.format(train_loss))

            valid_loss = eval_loop_fn(val_dataloader, model, device)
            print('validation Loss: {:.4f}'.format(valid_loss))
            
            all_losses.append(valid_loss)
        print()
        
        if i < 1:
            best_loss = min(all_losses)
            best_model = copy.deepcopy(model)
        else:
            if best_loss < min(all_losses):
                continue
            else:
                best_loss = min(all_losses)
                best_model = copy.deepcopy(model)
        
    print('\n======================Saving the best model==============================')
    torch.save(best_model,'./xray_FRCNN_model.bin')
    print()
    print('& The lowest loss across we got among all the folds is : {:.4f}'.format(best_loss))

In [None]:
if __name__ == "__main__":
    _run()