# WORK IN PROGRESS

# Imports

In [None]:
!pip install pycocotools timm albumentations omegaconf

In [None]:
!git clone https://github.com/rwightman/efficientdet-pytorch.git

In [None]:
import os
import numpy as np 
import pandas as pd
import glob
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
import tqdm
import torch
from itertools import combinations
import albumentations as A
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from albumentations.pytorch.transforms import ToTensorV2
import warnings
warnings.filterwarnings('ignore')


import pydicom 
import cv2
from pydicom.pixel_data_handlers.util import apply_voi_lut # voi = value of interest, lut = lookup table

In [None]:
DATA_DIR = '/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/'
TRAIN_DIR = os.path.join(DATA_DIR, 'train')
TEST_DIR = os.path.join(DATA_DIR, 'test')

LABEL_COLORS = [px.colors.label_rgb(px.colors.convert_to_RGB_255(x)) for x in sns.color_palette("Spectral", 15)]
LABEL_COLORS

In [None]:
LABEL_COLORS_TUPLES = [col[4:-1].split(",") for col in LABEL_COLORS]
LABEL_COLORS_TUPLES = [tuple([int(num) for num in col]) for col in LABEL_COLORS_TUPLES ]
LABEL_COLORS_TUPLES

## Helper Functions

In [None]:
def read_dicom(path: str, voi_lut=True, fix_monochrome=True) -> np.ndarray:
    dicom = pydicom.read_file(path)
    # if voi lut is available, use it to transform raw dicom data to human friendly view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    #MONOCHROME1 indicates that the greyscale ranges from bright to dark with ascending pixel values, 
    #MONOCHROME2 ranges from dark to bright with ascending pixel values
    if dicom.PhotometricInterpretation == 'MONOCHROME1' and fix_monochrome:
        data = np.amax(data) - data # np.amax() -> maximum of flattened array
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data

In [None]:
def plot_image(img, title="", figsize=(10,10), cmap=None):
    plt.figure(figsize=figsize)
    if cmap:
        plt.imshow(img, cmap=cmap)
    else:
        plt.imshow(img)
    plt.title(title)
    plt.axis(False)
    plt.show()

In [None]:
def get_annotations(df: pd.DataFrame, image_id, rad_id=True) -> dict:
    annotations = {}
    if isinstance(image_id, str):
        image_id = [image_id]
    for im in image_id:
        annos_df = df[df['image_id'] == im]
        annos = []
        for ann_idx in annos_df.index:
            if annos_df.loc[ann_idx, 'class_id'] != 14:
                if rad_id:
                    annos.append([annos_df.loc[ann_idx, 'class_id'], 
                                annos_df.loc[ann_idx, 'x_min'],
                                 annos_df.loc[ann_idx, 'y_min'],
                                 annos_df.loc[ann_idx, 'x_max'],
                                 annos_df.loc[ann_idx, 'y_max'],
                                 annos_df.loc[ann_idx, 'rad_id']])
                else:
                    annos.append([annos_df.loc[ann_idx, 'class_id'], 
                                annos_df.loc[ann_idx, 'x_min'],
                                 annos_df.loc[ann_idx, 'y_min'],
                                 annos_df.loc[ann_idx, 'x_max'],
                                 annos_df.loc[ann_idx, 'y_max']])
        annotations[im] = annos
        
    return annotations

In [None]:
def draw_boxes(df, img_id, id_to_classes, annotations, rad_id=True, plot_rad=True):
    """Plot image with bounding box annotations"""
    img = read_dicom(TRAIN_DIR + "/" + img_id + ".dicom")
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    for idx, annos in annotations.items():
        for anno in annos:
            img = cv2.rectangle(img, (int(anno[1]), int(anno[2])), (int(anno[3]), 
                                int(anno[4])), 
                                LABEL_COLORS_TUPLES[anno[0]], 1)
            if plot_rad and rad_id:
                label_text = id_to_classes[anno[0]] + f"({anno[5]})"
            else:
                label_text = id_to_classes[anno[0]]
            font = cv2.FONT_HERSHEY_SIMPLEX 
            img = cv2.putText(img, label_text, 
                              (int(anno[1]), int(anno[2]) - 5), 
                              font, 1.5, LABEL_COLORS_TUPLES[anno[0]], 2)
    
    return img

In [None]:
def intersection_over_union(boxes_1, boxes_2, box_format="corners"):
    """
    Calculate the intersection over union of two bounding boxes
    Parameters:
        boxes_1 (tensor): shape (batch_size,4)
        boxes_2 (tensor): shape (batch_size, 4)
        box_format (str): midpoint (x,y,w,h) or corners (x1,y1,x2,y2)
    Returns:
        tensor: IoU for all inputs
    """
    
    if box_format == "midpoint":
        box1_x1 = boxes_1[..., 0:1] - boxes_1[..., 2:3] / 2
        box1_y1 = boxes_1[..., 1:2] - boxes_1[..., 3:4] / 2
        box1_x2 = boxes_1[..., 0:1] + boxes_1[..., 2:3] / 2
        box1_y2 = boxes_1[..., 1:2] + boxes_1[..., 3:4] / 2

        box2_x1 = boxes_2[..., 0:1] - boxes_2[..., 2:3] / 2
        box2_y1 = boxes_2[..., 1:2] - boxes_2[..., 3:4] / 2
        box2_x2 = boxes_2[..., 2:3] + boxes_2[..., 2:3] / 2
        box2_y2 = boxes_2[..., 3:4] + boxes_2[..., 3:4] / 2
    
    elif box_format == "corners":
        box1_x1 = boxes_1[..., 0:1] # shape (N,1)
        box1_y1 = boxes_1[..., 1:2]
        box1_x2 = boxes_1[..., 2:3]
        box1_y2 = boxes_1[..., 3:4]

        box2_x1 = boxes_2[..., 0:1] # shape (N,1)
        box2_y1 = boxes_2[..., 1:2]
        box2_x2 = boxes_2[..., 2:3]
        box2_y2 = boxes_2[..., 3:4]
    
    # corner points of intersection
    x1 = torch.max(box1_x1, box2_x1)
    y1 = torch.max(box1_y1, box2_y1)
    x2 = torch.max(box1_x2, box2_x2)
    y2 = torch.max(box1_y2, box2_y2)
    
    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0) # clamp is if they don't intersect
    
    # union
    box1_area = abs((box1_x2 - box1_x1) * (box1_y1 - box1_y2))
    box2_area = abs((box2_x2 - box2_x1) * (box2_y1 - box2_y2))
    
    return intersection / (box1_area + box2_area - intersection + 1e-6)

In [None]:
def non_max_suppression(boxes, iou_threshold, threshold, box_format='corners'):
    # boxes = [[class, confidence/probability, x1, y1, x2, y2], [], ...]
    assert type(boxes) == list
    boxes = [box for box in boxes if box[1] > threshold]
    boxes = sorted(boxes, key=lambda x: x[1], reverse=True)
    boxes_after_nms = []
    
    while boxes:
        chosen_box = boxes.pop(0)
        boxes = [box for box in boxes 
                 if box[0] != chosen_box[0] 
                 or intersection_over_union(torch.tensor(chosen_box[2:]), 
                                            torch.tensor(box[2:]), 
                                            box_format=box_format) < iou_threshold]
    
        boxes_after_nms.append(chosen_box)
    
    return boxes_after_nms

In [None]:
def merge_similar_annotations(df, image_id: list, threshold=0.75):
    """Function to merge similar annotations by different radiologists"""
    annotations_ = {}
    if isinstance(image_id, str):
        image_id = [image_id]
    annotations = get_annotations(df, image_id)
    for im, annos in annotations.items():
        a = []
        class_ids = list(set([anno[0] for anno in annos]))
        for i in class_ids:
            anns_ = []
            anns = [anno for anno in annos if anno[0]==i]
            if len(anns) > 1:
                while len(anns) > 1:
                    #p = list(combinations(list(range(len(anns))), 2))
                    b_1 = anns.pop(0)
                    box_1 = torch.tensor(b_1[1:-1]).unsqueeze(0)
                    similar_boxes = []
                    for i, ann in enumerate(anns):
                        box_2 = torch.tensor(ann[1:-1]).unsqueeze(0)
                        iou = intersection_over_union(box_1, box_2)
                        if float(iou) >= threshold:
                            similar_boxes.append(i) 
                    # create a new box from similar boxes
                    x1 = b_1[1]
                    y1 = b_1[2]
                    x2 = b_1[3]
                    y2 = b_1[4]
                    rad_id = b_1[5]
                    for j in similar_boxes:
                        x1 = min(anns[j][1], x1)
                        y1 = min(anns[j][2], y1)
                        x2 = max(anns[j][3], x2)
                        y2 = max(anns[j][4], y2)
                        rad_id += f" {anns[j][5]}"
                    new_box = [b_1[0], x1, y1, x2, y2, rad_id]
                    anns_.append(new_box)
                    # delete other similar boxes
                    for idx in sorted(similar_boxes, reverse=True):
                        del anns[idx]

                a += anns_
                
            else:
                a += anns
        
        annotations_[im] = a
                
    return annotations_

In [None]:
def filter_df(df, include_no_findings=True):
    """Reduce train df (delete no findings and merge boxes)"""
    no_findings = df[df['class_id'] == 14]
    no_findings = no_findings.drop_duplicates(subset=['image_id'])
    print(f"Num annotations before: {len(df[df['class_id'] != 14])}")
    print(f"{len(no_findings)} images with no findings")
    annotations_merged = merge_similar_annotations(df, list(df['image_id'].unique()))
    data = []
    for k, val in annotations_merged.items():
        for v in val:
            data.append([
                # image_id, class_name, class_id, rad_id, min_x, min_y, max_x, max_y
                k, id_to_classes[v[0]], v[0], v[5], v[1], v[2], v[3], v[4]
            ])
    new_df = pd.DataFrame(data, columns=df.columns)
    print(f"Num annotations after: {len(new_df)} (without no findings images)")
    if include_no_findings:
        print("Add no findings to annotations dataframe...")
        result = pd.concat([new_df, no_findings], ignore_index=True)
        print(f"Length of resulting dataframe: {len(result)}")
        return result
    
    return new_df

In [None]:
def add_size_aspect_ratio(df, drop_nan=False):
    """Add the img height and width as well as the aspect ratio to the dataframe"""
    data = pd.DataFrame(df)
    if drop_nan:
        data = data[data['class_id'] != 14].reset_index(drop=True)
    data['img_height'] = 0
    data['img_width'] = 0
    data['aspect_ratio'] = 0
    
    id_img_size = {}
    for idx in tqdm.tqdm(list(data['image_id'].unique())):
        img = read_dicom(os.path.join(TRAIN_DIR, idx + '.dicom'))
        id_img_size[idx] = {'img_height': img.shape[0], 
                            'img_width': img.shape[1], 
                            'aspect_ratio': img.shape[0] / img.shape[1]}
    
    for k, val in id_img_size.items():
        for idx in data[data['image_id'] == k].index:
            data.loc[idx, 'img_height'] = val['img_height']
            data.loc[idx, 'img_width'] = val['img_width']
            data.loc[idx, 'aspect_ratio'] = val['aspect_ratio']
    
    return data

In [None]:
def create_train_df(path):
    df = pd.read_csv(path)
    print("Filter data...")
    df = filter_df(df)
    print("Add size and aspect ratios of imgs...")
    df = add_size_aspect_ratio(df)
    print("Preprocessing finished")
    return df

# Data Exploration

In [None]:
dicoms_train = glob.glob(TRAIN_DIR + '/*.dicom')
len(dicoms_train)

In [None]:
dicoms_test = glob.glob(TEST_DIR + '/*.dicom')
len(dicoms_test)

In [None]:
sample_submission = pd.read_csv(os.path.join(DATA_DIR, 'sample_submission.csv'))
sample_submission.head()

In [None]:
train = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))
train.head()

In [None]:
no_findings = train[train['class_id'] == 14]['image_id']
len(no_findings)

In [None]:
findings = train[train['class_id'] != 14]['image_id']
len(findings)

In [None]:
# do we have images, where one radiologist finds something and another don't ? 
no_findings = set(list(no_findings))
len(no_findings)

In [None]:
findings = set(list(findings))
len(findings)

In [None]:
intersection = list(findings & no_findings)
len(intersection)

In [None]:
len(train)

In [None]:
len(train['image_id'].unique())

As you can see we have 67.914 annotations and 15.000 different images in the training dataset which means we can have more than one annotation per image.

In [None]:
class_ids = sorted(list(train['class_id'].unique()))
class_names = list(train['class_name'])

id_to_classes = {id: list(train.query(f'class_id == {id}')['class_name'])[0] for id in class_ids}
id_to_classes

In [None]:
classes_to_id = {list(train.query(f'class_id == {id}')['class_name'])[0]: id for id in class_ids}
classes_to_id

In [None]:
id_to_color = {id: LABEL_COLORS[id] for id in class_ids}
id_to_color_tuples = {id: LABEL_COLORS_TUPLES[id] for id in class_ids}
id_to_color_tuples

And we are dealing with 14 different diseases we want to classify.

In [None]:
radiologists = list(train['rad_id'].unique())
len(radiologists)

17 different radiologists produced the findings from the training dataset.

In [None]:
class_value_counts = train['class_name'].value_counts().sort_index()
class_value_counts

In [None]:
class_value_counts_norm = train['class_name'].value_counts(normalize=True).sort_index()
class_value_counts_norm

In [None]:
fig = px.bar(class_value_counts, 
             color=train['class_name'].value_counts().sort_index().index, 
             opacity=0.9, 
             color_discrete_sequence=LABEL_COLORS, 
             log_y=True, 
             title='Annotations per class',
             text=class_value_counts)
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(legend_title=None, xaxis_title="", yaxis_title="count")
fig.show()

In [None]:
annos_per_img = train.groupby('image_id')["class_name"].unique().apply(lambda x: len(x))
fig = px.histogram(annos_per_img,
                   nbins=max(annos_per_img),
                   labels={'value': 'number of unique abnormalities'}, 
                   title='Annotations per patient', 
                   log_y=True)

fig.update_layout(showlegend=False, 
                  xaxis_title='number of unique abnormalities', 
                  yaxis_title='number of imgs')
fig.show()

Since radiologists are only human, it can happen that they will come to different conclusions about an image. That is, radiologist X evaluates the image differently than radiologist Y. Unfortunately, we do not have any further information on the radiologists. But we have to keep that in mind. Moreover we have three different opinions per image (three radiologists per image).

In [None]:
test_id = '9a5094b2563a1ef3ff50dc5c7ff71345'
test_data = read_dicom(os.path.join(TRAIN_DIR, test_id + '.dicom'))
test_data.shape

In [None]:
annos = get_annotations(train, '9a5094b2563a1ef3ff50dc5c7ff71345', True)
annos

In [None]:
img = draw_boxes(train, '9a5094b2563a1ef3ff50dc5c7ff71345', id_to_classes, annos)
plot_image(img)

In [None]:
boxes = list(annos.values())[0]
boxes

In [None]:
#annos = get_annotations(train, list(train['image_id'].unique()))

In [None]:
#new_df = filter_df(train)
#new_df.head()

In [None]:
#annos_ = merge_similar_annotations(train, '9a5094b2563a1ef3ff50dc5c7ff71345')
#annos_

In [None]:
#img = draw_boxes(train, '9a5094b2563a1ef3ff50dc5c7ff71345', id_to_classes, annos_)
#plot_image(img)

# Modeling

I want to build a pytorch object detection model (first Faster R-CNN, in the long run I will try to use EfficientDet-D7).

In [None]:
train_df = create_train_df(os.path.join(DATA_DIR, 'train.csv'))

In [None]:
findings = train_df[train_df['class_id'] != 14]['image_id'].unique()
no_findings = train_df[train_df['class_id'] == 14]['image_id'].unique()
len(findings)

In [None]:
len(no_findings)

In [None]:
# Create train/val split dfs (80%/20%)
imgs = train_df['image_id'].unique()
np.random.shuffle(imgs)
frac = int(0.8*len(imgs))
train = imgs[:frac]
val = imgs[frac:]
len(train)

In [None]:
df_train = train_df[train_df['image_id'].isin(train)]
df_val = train_df[train_df['image_id'].isin(val)]
len(df_train)

In [None]:
len(val)

In [None]:
def get_train_transform():
    return A.Compose([
        A.HorizontalFlip(0.3),
        A.Resize(1024, 1024, interpolation=cv2.INTER_LANCZOS4, p=1.0),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

def get_val_transform():
    return A.Compose([
        A.Resize(1024, 1024, interpolation=cv2.INTER_LANCZOS4, p=1.0),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [None]:
class VinBigDataset(Dataset):
    def __init__(self, root: str, annos_df: pd.DataFrame, mode='train', transform=None):
        self.root = root
        self.imgs_dir = os.path.join(self.root, mode)
        self.annotations = annos_df
        self.img_ids = annos_df['image_id'].unique()
        self.transform = transform
        
    
    def __getitem__(self, idx: int):
        img_id = self.img_ids[idx]
        annos = self.annotations[self.annotations['image_id'] == img_id]
        filename = img_id + '.dicom'
        image = read_dicom(os.path.join(self.imgs_dir, filename))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        
        boxes = annos[['x_min', 'y_min', 'x_max', 'y_max']].values
        w = boxes[:,2] - boxes[:,0]
        h = boxes[:,3] - boxes[:,1]
        
        area = w * h
        area = torch.as_tensor(area, dtype=torch.float32)

        labels = annos['class_id'].values
        labels = torch.as_tensor(labels, dtype=torch.int64)
        
        # suppose all instances are not crowd
        iscrowd = torch.zeros((annos.shape[0],), dtype=torch.int64)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = torch.tensor([idx])
        target['area'] = area
        target['iscrowd'] = iscrowd

        if self.transform:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transform(**sample)
            image = sample['image']
            
            target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)

        return image, target, img_id
        
    
    def __len__(self) -> int:
        return self.img_ids.shape[0]

In [None]:
class Averager:   
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True, pretrained_backbone=True)

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

In [None]:
num_classes = len(df_train['class_id'].unique()) # here no_findings (14) == background class
num_classes

In [None]:
in_features = model.roi_heads.box_predictor.cls_score.in_features
in_features

In [None]:
# replace the pretrained head with a new one 
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [None]:
train_dataset = VinBigDataset(DATA_DIR, df_train, mode='train', transform=get_train_transform())
len(train_dataset)

In [None]:
val_dataset = VinBigDataset(DATA_DIR, df_val, mode='train', transform=get_val_transform())
len(val_dataset)

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=False, num_workers=4, collate_fn=collate_fn)

In [None]:
val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=4, collate_fn=collate_fn)

In [None]:
images, targets, img_ids = next(iter(train_dataloader))

In [None]:
model.train()
model.to(device)

In [None]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.01, momentum=0.9, weight_decay=0.0001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.5)
#lr_scheduler = None

num_epochs = 3

loss_hist = Averager()
itr = 1

for epoch in range(num_epochs):
    loss_hist.reset()
    
    for images, targets, image_ids in train_dataloader:
        
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        loss_hist.send(loss_value)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
    
        if itr % 100 == 0:
            print(f"Iteration #{itr} loss: {loss_value}")

        itr += 1
    
    # update the learning rate
    if lr_scheduler is not None:
        lr_scheduler.step()

    print(f"Epoch #{epoch} loss: {loss_hist.value}")

In [None]:
torch.save(model.state_dict(), '/kaggle/input/model/fasterrcnn_resnet50_fpn2nd.pth')

In [None]:
train_df.to_csv('/kaggle/input/my_data/train_df.csv', index=False)

# Training