In [None]:
import os
ROOT = '../input/vinbigdata-chest-xray-abnormalities-detection/'
DATASET_ROOT = '../input/vinbigdata-original-image-dataset/vinbigdata'
os.listdir(ROOT)

In [None]:
ORIGINAL_TRAIN = os.path.join(ROOT, 'train')
TRAIN_DIR = os.path.join(DATASET_ROOT, 'train')
TEST_DIR = os.path.join(DATASET_ROOT, 'test')

In [None]:
import pandas as pd
import numpy as np
import pydicom
import random
import cv2
import torch
import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler

import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch.transforms import ToTensorV2

from pydicom.pixel_data_handlers.util import apply_voi_lut
import warnings
warnings.filterwarnings("ignore")

In [None]:
SEED = 0

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(SEED)    

## Let's Inspect the Data

In [None]:
#data_df = pd.read_csv(os.path.join(ROOT, 'train.csv'))
data_df = pd.read_csv('../input/effdet-latestvinbigdata-wbf-fused/train_wbf_original.csv', index_col='Unnamed: 0')
data_df.head()

In [None]:
# Remove no finding samples
data_df = data_df.loc[data_df['class_id'] != 14].reset_index(drop=True)

In [None]:
def read_xray(path, voi_lut = True, fix_monochrome = True):
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

def plot_img(img, size=(7, 7), is_rgb=True, title="", cmap='gray'):
    plt.figure(figsize=size)
    plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    plt.show()


def plot_imgs(imgs, cols=4, size=7, is_rgb=True, title="", cmap='gray', img_size=(500,500)):
    rows = len(imgs)//cols + 1
    fig = plt.figure(figsize=(cols*size, rows*size))
    for i, img in enumerate(imgs):
        if img_size is not None:
            img = cv2.resize(img, img_size)
        fig.add_subplot(rows, cols, i+1)
        plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    plt.show()

def show_boxes(finding_df, img_dir):
    imgs = []
    img_ids = finding_df['image_id'].values
    class_ids = finding_df['class_id'].unique()

    # map label_id to specify color
    label2color = {class_id:[np.random.randint(0,255) for i in range(3)] for class_id in class_ids}
    thickness = 3
    scale = 5


    for i in range(8):
        img_id = random.choice(img_ids)
        img_path = f'{img_dir}/{img_id}.dicom'
        img = read_xray(path=img_path)
        img = cv2.resize(img, None, fx=1/scale, fy=1/scale)
        img = np.stack([img, img, img], axis=-1)

        boxes = finding_df.loc[finding_df['image_id'] == img_id, ['x_min', 'y_min', 'x_max', 'y_max']].values/scale
        labels = finding_df.loc[finding_df['image_id'] == img_id, ['class_id']].values.squeeze()
        
        try:
            for label_id, box in zip(labels, boxes):
                color = label2color[label_id]
                img = cv2.rectangle(
                    img,
                    (int(box[0]), int(box[1])),
                    (int(box[2]), int(box[3])),
                    color, thickness
            )
            img = cv2.resize(img, (500,500))
            imgs.append(img)
        except:
            continue

    plot_imgs(imgs, cmap=None)

In [None]:
show_boxes(data_df, ORIGINAL_TRAIN)

In [None]:
# Function to calculate IOU and areas of corresponding boxes
def calculate_iou(bbox1, bbox2):
    # Coordinates must be consistent
    assert(bbox1['x_min'] < bbox1['x_max'])
    assert(bbox1['y_min'] < bbox1['y_max'])
    assert(bbox2['x_min'] < bbox2['x_max'])
    assert(bbox2['y_min'] < bbox2['y_max'])
    
    # Calculate coordinates of the top left corner of the intersection area
    x_top_left = max(bbox1['x_min'], bbox2['x_min'])
    y_top_left = max(bbox1['y_min'], bbox2['y_min'])
    
    # Calculate coordinates of the bottom right corner of the intersection area
    x_bottom_right = min(bbox1['x_max'], bbox2['x_max'])
    y_bottom_right = min(bbox1['y_max'], bbox2['y_max'])
    
    # Calculate IOU
    area_bbox1 = ((bbox1['x_max'] - bbox1['x_min']) * (bbox1['y_max'] - bbox1['y_min']))
    assert area_bbox1 > 0
    area_bbox2 = ((bbox2['x_max'] - bbox2['x_min']) * (bbox2['y_max'] - bbox2['y_min']))
    assert area_bbox2 > 0
    
    if x_top_left > x_bottom_right or y_top_left > y_bottom_right:
        return 0.0, area_bbox1, area_bbox2
    
    area_intersection = (x_bottom_right - x_top_left) * (y_bottom_right - y_top_left) 
    assert area_intersection >= 0
    
    area_union = area_bbox1 + area_bbox2 - area_intersection
    
    iou = area_intersection / area_union
    assert iou >= 0.0 and iou <= 1.0
    
    return iou, area_bbox1, area_bbox2

In [None]:
# Remove bounding boxes with high IOU and same class
def remove_bboxs(df, threshold=0.5):
    img_ids = df['image_id'].unique()
    new_records = list()

    for img_id in img_ids:
        records = df[df['image_id'] == img_id].reset_index(drop=True)
        to_drop = list()
        size = records.shape[0]
        for i in range(size-1):
            if i in to_drop:
                continue
            bbox1 = records.iloc[[i],:]
            bbox1 = bbox1.to_dict('records')[0]
            for j in range(i+1, size):
                bbox2 = records.iloc[[j],:]
                bbox2 = bbox2.to_dict('records')[0]

                iou, bb1_area, bb2_area = calculate_iou(bbox1, bbox2)
                if iou >= threshold and bbox1['class_id'] == bbox2['class_id']:
                    if bb1_area >= bb2_area:
                        to_drop.append(i)
                        break
                    else:
                        to_drop.append(j)
        records = records.loc[~records.index.isin(to_drop)]
        new_records.append(records)

    return pd.concat(new_records)

In [None]:
print(data_df.shape)
data_df = remove_bboxs(data_df, threshold = 0.1)
print(data_df.shape)

In [None]:
show_boxes(data_df, ORIGINAL_TRAIN)

In [None]:
data_df = data_df.merge(meta_df, on='image_id')
data_df.head()

In [None]:
# Transform bounding box coordinates to new scale
data_df['x_min'] = data_df.apply(lambda row: (row.x_min)/row.dim1*512, axis =1)
data_df['y_min'] = data_df.apply(lambda row: (row.y_min)/row.dim0*512, axis =1)

data_df['x_max'] = data_df.apply(lambda row: (row.x_max)/row.dim1*512, axis =1)
data_df['y_max'] = data_df.apply(lambda row: (row.y_max)/row.dim0*512, axis =1)

In [None]:
data_df['x_max'].max(), data_df['y_max'].max()

In [None]:
data_df['class_id'] = data_df['class_id'] + 1
print(data_df['class_id'].unique())

## Data Splitting by Applying K-Fold 

In [None]:
from sklearn.model_selection import GroupKFold

def split_data(df, groups_col, n_splits=5, fold=0):
    group_fold = GroupKFold(n_splits=n_splits)
    df['fold'] = -1
    
    for fold, (train_idx, val_idx) in enumerate(group_fold.split(df, groups=df[groups_col].tolist())):
        df.loc[val_idx, 'fold'] = fold

    df_val = df[df.fold==fold]
    df_train = df[df.fold!=fold]
    
    return df_train, df_val

In [None]:
# Take into account one radiologist
# df_split = data_df.loc[data_df['rad_id'] == 'R9'].reset_index(drop=True)
df_split = data_df.copy().reset_index(drop=True)
df_train, df_val = split_data(df_split, 'image_id')
df_train.shape, df_val.shape

## Creating a Custom Dataset Class

In [None]:
class ChestDataset(Dataset):
    def __init__(self, df, image_dir, transforms=None, is_original=False, is_validation=False, 
                 is_inference=False, padding=False, img_size=512, img_ext='.png'):
        super().__init__()
        
        self.df = df
        self.image_ids = df['image_id'].unique()
        self.image_dir = image_dir
        self.transforms = transforms
        self.is_original = is_original
        self.is_validation = is_validation
        self.is_inference = is_inference
        self.img_size = img_size
        self.padding = padding
        self.img_ext = img_ext
        
    def __getitem__(self, index):
        if not self.is_inference:
            if self.is_validation:
                choice = 'as_is'
            else:
                choice = np.random.choice(
                    ['as_is', 'mixup', 'cutmix'],
                    1,
                    p=[0.4, 0.3, 0.3]
                )
                
            if choice == 'as_is':
                image, boxes, labels = self.load_image_and_boxes(index)
            elif choice == 'mixup':
                image, boxes, labels = self.load_mixup_image_and_boxes(index)
            elif choice == 'cutmix':
                image, boxes, labels = self.load_cutmix_image_and_boxes(index, self.img_size)
                
            ## To prevent ValueError: y_max is less than or equal to y_min for bbox from albumentations bbox_utils
            labels = np.array(labels, dtype=np.int).reshape(len(labels), 1)
            combined = np.hstack((boxes.astype(np.int), labels))
            combined = combined[np.logical_and(combined[:,2] > combined[:,0],
                                               combined[:,3] > combined[:,1])]
            boxes = combined[:, :4]
            labels = combined[:, 4].tolist()
            area = (boxes[:,2] - boxes[:,0]) * (boxes[:,3] - boxes[:,1])
            
            if self.transforms:
                sample = self.transforms(image=image, bboxes=boxes, labels=labels)
                image = sample['image']
                boxes = sample['bboxes']
                labels = sample['labels']
                
                if len(boxes) == 0:
                    boxes = np.array([[0.0, 0.0, 1.0, 1.0]])
                    area = [1.0]
                    labels = [0]
            
            target = dict()

            area = torch.as_tensor(area, dtype=torch.float32)
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            labels = torch.as_tensor(labels, dtype=torch.int64)
            iscrowd = torch.zeros(labels.shape[0], dtype=torch.uint8)
            idx = torch.tensor([index])

            target['area'] = area
            target['labels'] = labels
            target['iscrowd'] = iscrowd
            target['image_id'] = idx
            target['boxes'] = boxes
                
            return image, target, self.image_ids[index]
        
        else:
            if self.transforms:
                image = self.transforms(image=image)['image']
            
            return image, image_id
    
    def load_image_and_boxes(self, index):
        image_id = self.image_ids[index]
        info = self.df.loc[self.df['image_id'] == image_id]
        info = info.reset_index(drop=True)
        
        if self.is_original:
            img_path = os.path.join(self.image_dir, image_id + '.dicom')
            image = read_xray(img_path)
            image = np.stack([image, image, image]).transpose(1, 2, 0)  
            image = image.astype(np.float32)
        else:
            img_path = os.path.join(self.image_dir, image_id + self.img_ext)
            image = cv2.imread(img_path, cv2.IMREAD_COLOR)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
            image /= 255.0
            
            if self.padding:
                # padding image
                if image.shape[0] < self.img_size:
                    pad_count = self.img_size - image.shape[0]
                    image = np.pad(image, ((0, pad_count), (0, 0), (0, 0)), 
                                            mode='constant', constant_values=(0))
                elif image.shape[1] < self.img_size:
                    pad_count = self.img_size - image.shape[1]
                    image = np.pad(image, ((0, 0), (0, pad_count), (0, 0)), 
                                   mode='constant', constant_values=(0))
                    
        if not self.is_inference:
            boxes = info.loc[:, ['x_min', 'y_min', 'x_max', 'y_max']].values    
            labels = info['class_id'].values  
            image, boxes, labels = self.resize_img_and_boxes(image, boxes, labels)
            
            return image, boxes, labels
        else:
            return image
        
    def resize_img_and_boxes(self, image, boxes, labels):
        resize_transform = A.Compose([A.Resize(height=self.img_size, width=self.img_size, p=1.0)], 
                                      p=1.0, 
                                      bbox_params=A.BboxParams(
                                          format='pascal_voc',
                                          min_area=0.1, 
                                          min_visibility=0.1,
                                          label_fields=['labels'])
                                     )

        resized = resize_transform(image=image, bboxes=boxes, labels=labels)

        resized_bboxes = np.vstack((list(bx) for bx in resized['bboxes']))
        
        return resized['image'], resized_bboxes, resized['labels']
    
    def load_mixup_image_and_boxes(self, index):
        image, boxes, labels = self.load_image_and_boxes(index)
        r_image, r_boxes, r_labels = self.load_image_and_boxes(random.randint(0, self.image_ids.shape[0] - 1))
        return (image+r_image)/2, np.vstack((boxes, r_boxes)).astype(np.int32), np.concatenate((labels, r_labels))

    def load_cutmix_image_and_boxes(self, index, imsize=512):
        """ 
        This implementation of cutmix author:  https://www.kaggle.com/nvnnghia 
        Refactoring and adaptation: https://www.kaggle.com/shonenkov
        """
        w, h = imsize, imsize
        s = imsize // 2
    
        xc, yc = [int(random.uniform(imsize * 0.25, imsize * 0.75)) for _ in range(2)]  # center x, y
        indexes = [index] + [random.randint(0, self.image_ids.shape[0] - 1) for _ in range(3)]

        result_image = np.full((imsize, imsize, 3), 1, dtype=np.float32)
        result_boxes = []
        result_labels = np.array([], dtype=np.int)

        for i, index in enumerate(indexes):
            image, boxes, labels = self.load_image_and_boxes(index)
            if i == 0:
                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
            elif i == 1:  # top right
                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
            elif i == 2:  # bottom left
                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
            elif i == 3:  # bottom right
                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
            result_image[y1a:y2a, x1a:x2a] = image[y1b:y2b, x1b:x2b]
            padw = x1a - x1b
            padh = y1a - y1b

            boxes[:, 0] += padw
            boxes[:, 1] += padh
            boxes[:, 2] += padw
            boxes[:, 3] += padh

            result_boxes.append(boxes)
            result_labels = np.concatenate((result_labels, labels))

        result_boxes = np.concatenate(result_boxes, 0)
        np.clip(result_boxes[:, 0:], 0, 2 * s, out=result_boxes[:, 0:])
        result_boxes = result_boxes.astype(np.int32)
        index_to_use = np.where((result_boxes[:,2]-result_boxes[:,0])*(result_boxes[:,3]-result_boxes[:,1]) > 0)
        result_boxes = result_boxes[index_to_use]
        result_labels = result_labels[index_to_use]
        
        return result_image, result_boxes, result_labels
    
    def __len__(self):
        return self.image_ids.shape[0] 

## Defining Transformations 

In [None]:
def get_train_transform():
    return A.Compose([A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
                      A.ShiftScaleRotate(scale_limit=0.1, rotate_limit=45, p=0.5),
                      A.ToGray(p=0.01),
                      A.HorizontalFlip(p=0.5),
                      A.VerticalFlip(p=0.5),
                      A.Cutout(num_holes=8, max_h_size=16, max_w_size=16, fill_value=0, p=0.3),
                      #A.Normalize(mean=(0, 0, 0), std=(1, 1, 1), max_pixel_value=255.0, p=1.0),  
                      ToTensorV2(p=1.0)],
                      p=1.0,
                      bbox_params=A.BboxParams(format='pascal_voc',min_area=0, min_visibility=0,label_fields=['labels'])
                    )

def get_valid_transform():
    return A.Compose([A.Resize(height=512, width=512, p=1.0),
                      #A.Normalize(mean=(0, 0, 0), std=(1, 1, 1), max_pixel_value=255.0, p=1.0),
                      ToTensorV2(p=1.0)], 
                      p=1.0, 
                      bbox_params=A.BboxParams(format='pascal_voc',min_area=0, min_visibility=0,label_fields=['labels'])
                    )

## Defining the Model 

In [None]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

def get_model(n_classes):
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, n_classes)
    
    return model

## Creating the DataLoaders 

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

def collate_fn2(batch):
    batch = list(filter(lambda x: x is not None, batch))
    return tupe(zip(*batch))

def get_data_loader(df, data_dir, transform, batch_size=8, shuffle=True, is_original=False, 
                    is_validation=False, is_inference=False, padding=False, img_ext='.png'):
    
    dataset = ChestDataset(df, data_dir, transform, is_original, is_validation, 
                           is_inference, padding=padding, img_ext=img_ext)
    
    loader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=shuffle,
        num_workers=4,
        collate_fn=collate_fn
    )
    
    return loader

## Testing DataLoader 

In [None]:
test_loader = get_data_loader(df_train, TRAIN_DIR, get_train_transform(), img_ext='.jpg')
imgs, targets, ids = next(iter(test_loader))

batch_size = len(imgs)
colsize = batch_size // 2
fig, axes = plt.subplots(2, colsize, figsize=(16, 8))

for i in range(batch_size):
    row = i // colsize    
    col = i % colsize
    axes[row][col].imshow(imgs[i].numpy().transpose(1, 2, 0))
        
plt.show()

In [None]:
test_loader = get_data_loader(df_val, TRAIN_DIR, get_valid_transform(), is_validation=True, img_ext='.jpg')
imgs, targets, ids = next(iter(test_loader))

batch_size = len(imgs)
colsize = batch_size // 2
fig, axes = plt.subplots(2, colsize, figsize=(16, 8))

for i in range(batch_size):
    row = i // colsize    
    col = i % colsize
    axes[row][col].imshow(imgs[i].numpy().transpose(1, 2, 0))
        
plt.show()

## Training 

In [None]:
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0

In [None]:
def train(train_data, valid_data, model, n_epoch=10, 
          learning_rate=1e-3, device='cpu', print_freq=100):
    # Create the optimizer
    print('Device:', device)
    model = model.to(device)
    model.train()
    
    train_loss_hist = Averager()
    valid_loss_hist = Averager()
    
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=learning_rate, momentum=0.6, weight_decay=0.0005, nesterov=True)
    #lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1)

    for e in range(n_epoch):        
        for images, targets, _ in train_data:
            images = [img.to(device) for img in images]
            targets = [{k:v.to(device) for k, v in t.items()} for t in targets]
            
            loss_dict = model(images, targets)
            train_losses = sum([l for l in loss_dict.values()])
            train_loss = train_losses.item()
            train_loss_hist.send(train_loss)
            
            optimizer.zero_grad()
            train_losses.backward()
            optimizer.step()

#         if lr_scheduler is not None:
#              lr_scheduler.step()
        
        print('Saving model, epoch:', e)
        torch.save(model.state_dict(), f'./model_{e}.th')
        
        for images, targets, _ in valid_data:
            with torch.no_grad():
                images = [img.to(device) for img in images]
                targets = [{k:v.to(device) for k, v in t.items()} for t in targets]
                
                loss_dict = model(images, targets)
                val_losses = sum([l for l in loss_dict.values()])
                val_loss = val_losses.item()
                valid_loss_hist.send(val_loss)
        
        print(f"Epoch:{e}, Train loss:{train_loss_hist.value}, Validation loss:{valid_loss_hist.value}")

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = get_model(15)
train_loader = get_data_loader(df_train, TRAIN_DIR, get_train_transform(), batch_size = 8, img_ext='.jpg')
valid_loader = get_data_loader(df_val, TRAIN_DIR, get_valid_transform(), batch_size = 8, img_ext='.jpg')

In [None]:
%%time
train(train_loader, valid_loader, model, device=device, learning_rate=1e-2, n_epoch = 10)

In [None]:
def show_random_prediction(model_path, df, data, device, threshold=0.5):
    model = get_model(15)
    weights = torch.load(model_path)
    model.load_state_dict(weights)
    
    idx = np.random.randint(len(data))
    i = 0
    for sample in data:
        if i == idx:
            break
        else:
            i+=1
            
    img, targets, image_id = sample[0][0], sample[1][0], sample[2][0]

    model.to(device)
    model.eval()
    
    outputs = model(img.unsqueeze(1).to(device))
    img = img.cpu().numpy().transpose(1, 2, 0)
    boxes = targets['boxes']
    
    fig, ax = plt.subplots(1, 1, figsize=(16, 8))
    for box in boxes:
        cv2.rectangle(img,
                  (box[0], box[1]),
                  (box[2]+box[0], box[3]+box[1]),
                  (220, 0, 0), 1)
        
    boxes = outputs[0]['boxes'].detach().cpu().numpy()
    prob = outputs[0]['scores'].detach().cpu().numpy()
    boxes = boxes[prob>threshold]
    for box in boxes:
        cv2.rectangle(img,
                  (box[0], box[1]),
                  (box[2]+box[0], box[3]+box[1]),
                  (0, 0, 220), 1)
    
    ax.set_axis_off()
    ax.imshow(img)

In [None]:
show_random_prediction('./model_9.th', df_train, valid_loader, device, 0.6)