In [None]:
### Installing tools necessary for effdet to work

!pip install antlr4-python3-runtime==4.9.3
!pip install pycocotools==2.0.2
!pip install /kaggle/input/effdet-030-package-dataset/packages/huggingface_hub-0.13.3-py3-none-any.whl
import sys
sys.path.insert(0, "../input/effdet-030-package-dataset/packages/")

In [None]:
### Installing tools to load dicom data (some data is compressed by jpeg)

!cp /kaggle/input/pydicom-conda-helper/*.bz2 /kaggle/working/
!conda install --offline 'libjpeg-turbo-2.1.0-h7f98852_0.tar.bz2' -y
!conda install --offline 'libgcc-ng-9.3.0-h2828fa1_19.tar.bz2' -y
!conda install --offline 'gdcm-2.8.9-py37h500ead1_1.tar.bz2' -y
!conda install --offline 'conda-4.10.1-py37h89c1867_0.tar.bz2' -c conda-forge -y
!conda install --offline 'certifi-2020.12.5-py37h89c1867_1.tar.bz2' -y
!conda install --offline 'openssl-1.1.1k-h7f98852_0.tar.bz2' -y

In [None]:
### Import libraries

import os
import ast

# Basic libraries
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

# Draw a sample
import matplotlib.pyplot as pp
import matplotlib.patches as patches
%matplotlib inline

# For loading image data
from PIL import Image, ImageDraw
import pydicom
import cv2

# Model and training
import torch
from torch import optim
from torch.utils.data import Dataset, DataLoader, default_collate
import torch.nn.functional as F
from torchvision.transforms.functional import to_pil_image
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from effdet import EfficientDet, DetBenchTrain, get_efficientdet_config, DetBenchPredict

In [None]:
### Make database for training images using train_image_level.csv

train_image_list = pd.read_csv('/kaggle/input/siim-covid19-detection/train_image_level.csv')
train_image_list = train_image_list.drop(columns=['label'])
train_image_list = train_image_list.rename(columns={'id':'imageID', 'StudyInstanceUID':'studyID'})
train_image_list = train_image_list.assign(seriesID=pd.Series(dtype=str))
train_image_list = train_image_list.reindex(columns=['studyID', 'seriesID', 'imageID', 'boxes'])

train_root_path = '/kaggle/input/siim-covid19-detection/train'
for i in tqdm(range(len(train_image_list))):
    top_dir = os.path.join(train_root_path, train_image_list.loc[i].studyID)
    file_name = train_image_list.loc[i].imageID.split('_')[0] + '.dcm'
    middle_dir = ''
    for d in os.listdir(top_dir):
        file_path = os.path.join(top_dir, d, file_name)
        if os.path.isfile(file_path):
            middle_dir = d
            break
    train_image_list.loc[i].imageID = file_name
    train_image_list.loc[i].seriesID = middle_dir

train_image_list = train_image_list.dropna(subset=['boxes']).reset_index()

In [None]:
### Make database for classes of training images using train_study_level.csv

train_class_db = pd.read_csv('/kaggle/input/siim-covid19-detection/train_study_level.csv')
train_class_db = train_class_db.rename(columns={
    'id':'studyID',
    'Negative for Pneumonia':'negative',
    'Typical Appearance':'typical',
    'Indeterminate Appearance':'indeterminate',
    'Atypical Appearance':'atypical'})

# Assign labels:
#   negative: -1
#   typical: 1
#   indeterminate: 2
#   atypical: 3
train_image_list = train_image_list.assign(classID=pd.Series(dtype=int))
for i in range(len(train_image_list)):
    studyID = train_image_list.loc[i]['studyID'] + '_study'
    idx_loc = train_class_db[train_class_db['studyID']==studyID].index.item()
    if train_class_db.loc[idx_loc].negative == 1:
        train_image_list.loc[i,'classID'] = int(-1)
    elif train_class_db.loc[idx_loc].typical == 1:
        train_image_list.loc[i,'classID'] = int(1)
    elif train_class_db.loc[idx_loc].indeterminate == 1:
        train_image_list.loc[i,'classID'] = int(2)
    elif train_class_db.loc[idx_loc].atypical == 1:
        train_image_list.loc[i,'classID'] = int(3)

In [None]:
### Dataset definition

class CTDataset(Dataset):
    def __init__(self, train_root_path, train_image_list, image_size=512):
        self.train_image_path = train_root_path
        self.train_image_list = train_image_list
        self.image_size = image_size
        # in case data has no bounding boxes
        self.albu_no_label = A.Compose([
            A.Resize(width=self.image_size, height=self.image_size, p=1),
            ToTensorV2()
        ])
        # normal process for data
        self.albu = A.Compose([
            A.Resize(width=self.image_size, height=self.image_size, p=1),
            ToTensorV2()
        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))
    
    def __len__(self):
        return len(self.train_image_list)
    
    def __getitem__(self, idx):
        # Image
        loc = self.train_image_list.loc[idx]
        top_path = loc.studyID
        middle_path = loc.seriesID
        file_name = loc.imageID
        
        dcm_path = os.path.join(self.train_image_path, top_path, middle_path, file_name)
        dcm = pydicom.dcmread(dcm_path)
        image = dcm.pixel_array.astype("float32")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # bounding boxes & classes
        boxes_str = train_image_list.loc[idx].boxes
        class_id = train_image_list.loc[idx].classID
        
        bboxes, labels = self._get_bounding_boxes(boxes_str, class_id)
        
        if bboxes.shape[0] == 0:
            result = self.albu_no_label(image=image)
            x = result['image']
            bboxes = torch.zeros([1, 4], dtype=torch.float32)
            labels = torch.FloatTensor(np.array([0]))
        else:
            result = self.albu(
                image = np.array(image),
                bboxes = bboxes,
                labels = labels
            )
            x = result['image']
            box = np.array(result['bboxes'])[:,[1,0,3,2]] # from xyxy to yxyx
            bboxes = torch.FloatTensor(box)
            labels = torch.FloatTensor(np.array(result['labels']))

        y = {
            'bbox': bboxes,
            'cls': labels
        }
        
        return x, y
    
    
    def _get_bounding_boxes(self, boxes_str, class_id):
        boxes = []
        labels = []
        
        if isinstance(boxes_str, str) == False:
            return np.array(boxes), np.array(labels)
        
        bounding_boxes = ast.literal_eval(boxes_str)
        
        for bounding_box in bounding_boxes:
            x0 = max(0, int(round(bounding_box['x'])))
            y0 = max(0, int(round(bounding_box['y'])))
            x1 = max(0, x0 + int(round(bounding_box['width'])))
            y1 = max(0, y0 + int(round(bounding_box['height'])))
            box = [x0, y0, x1, y1]
            boxes.append(box)
            labels.append(np.array([class_id]).astype(int))

        boxes = np.array(boxes)
        labels = np.array(labels)

        return boxes, labels

In [None]:
### Padding processes

def pad_collate_fn(batch):
    shapes = [item[1]['bbox'].shape[0] for item in batch]
    max_shape = max(shapes)
    
    padded_batch = []
    for x, y in batch:
        if any(elem == 0 for elem in y['cls']):
            continue
        pad_size = max_shape - y['bbox'].shape[0]
        bbox_padding = [0, 0, 0, pad_size]
        cls_padding = [0, 0, 0, pad_size]
        padded_y = {
            'bbox': F.pad(y['bbox'], bbox_padding, mode='constant', value=0),
            'cls': F.pad(y['cls'].reshape((y['cls'].shape[0],1)), cls_padding, mode='constant', value=0)
        }
        padded_batch.append((x, padded_y))
    
    return default_collate(padded_batch)

In [None]:
### Training

# you can change batch_size and num_workers here
dataset = CTDataset(train_root_path, train_image_list)
loader = DataLoader(
    dataset, batch_size=4, num_workers=2, collate_fn=pad_collate_fn
)

# you can change the number of epochs here
n_epochs = 1

# you can choose which effdet model you use here
cfg = get_efficientdet_config(f'tf_efficientdet_d0')
# you can choose the number of classes here
cfg.num_classes = 3
model = EfficientDet(cfg)
bench = DetBenchTrain(model)

optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, verbose=True)

print('Starting training')
for epoch in range(1, n_epochs+1):
    header = f'[{epoch}/{n_epochs}]'
    
    lr = optimizer.param_groups[0]['lr']
    print(f'{header}Starting lr={lr:7f}')
    
    metrics = {
        'loss': []
    }
    t = tqdm(loader, leave=False)
    
    for inputs, targets in t:
        optimizer.zero_grad()
        losses = bench(inputs, targets)
        loss = losses['loss']
        loss.backward()
        optimizer.step()
        iter_metrics = {
            'loss': float(loss.item())
        }
        message = ' '.join([f'{k}:{v:4f}' for k, v in iter_metrics.items()])
        t.set_description(f'{header}{message}')
        t.refresh()
        for k, v in iter_metrics.items():
            metrics[k].append(v)
        train_metrics = {k:np.mean(v) for k, v in metrics.items()}
        train_message = ' '.join([f'{k}:{v:4f}' for k, v in train_metrics.items()])
        print(f'{header}Train: {train_message}')
        
    if epoch % 10 == 0:
        state = {
            'epoch': epoch,
            'state_dict': model.state_dict()
        }
        checkpoint_dir = f'weights/b0'
        os.makedirs(checkpoint_dir, exist_ok=True)
        
        checkpoint_path = os.path.join(checkpoint_dir, f'{epoch}.pth')
        torch.save(state, checkpoint_path)
        print(f'{header}Saved "{checkpoint_path}"')
    
    scheduler.step(train_metrics['loss'])
    print()


state = {
    'epoch': epoch,
    'state_dict': model.state_dict()
}
checkpoint_dir = f'weights/b0'
os.makedirs(checkpoint_dir, exist_ok=True)

checkpoint_path = os.path.join(checkpoint_dir, f'end_{epoch}.pth')
torch.save(state, checkpoint_path)
print(f'{header}Saved "{checkpoint_path}"')

In [None]:
### Draw the prediction for data#0

num = 0
image, targets = dataset.__getitem__(num)
image = image.unsqueeze(0)

bench = DetBenchPredict(model)
with torch.no_grad():
    output = bench(image)

fig, ax = pp.subplots()
ax.imshow(image[0,0,:,:])

for i in range(output.shape[1]):
    if output[0,i,4]>0.35:
        x1 = int(output[0, i, 0])
        y1 = int(output[0, i, 1])
        width = int(output[0, i, 2] - output[0, i, 0])
        height = int(output[0, i, 3] - output[0, i, 1])
        rect = patches.Rectangle((x1, y1), width, height, edgecolor='r', facecolor='none')
        ax.add_patch(rect)
    
pp.show()