In [None]:
!conda install gdcm -c conda-forge -y
!pip install pycocotools numpy opencv-python tqdm tensorboard tensorboardX pyyaml webcolors matplotlib

In [None]:
!git clone https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch

import os
os.chdir("Yet-Another-EfficientDet-Pytorch")

In [None]:

! mkdir weights
! wget https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch/releases/download/1.0/efficientdet-d0.pth -O weights/efficientdet-d0.pth

In [None]:
siim_yml = '''

'''
with open('projects/siim.yml', 'w') as f:
    f.write(siim_yml)

In [None]:
import os
from PIL import Image
import pandas as pd
from tqdm.auto import tqdm
import numpy as np
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

import torch

def read_xray(path, voi_lut=False, fix_monochrome=True):
    dicom = pydicom.read_file(path)
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array

    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data

    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)

    return data


def resize(array, size, keep_ratio=False, resample=Image.LANCZOS):
    im = Image.fromarray(array)

    if keep_ratio:
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)

    return im

In [None]:
from glob import glob
INPUT_PATH = "/kaggle/input/siim-covid19-detection/"

for split in ["test", "train"]:
    save_dir = f"datasets/siim/{split}/"

    os.makedirs(save_dir, exist_ok=True)

    for path in tqdm(glob(INPUT_PATH + split + '/*/*/*')):
        xray = read_xray(path)
        im = resize(xray, size=256)
        im.save(os.path.join(save_dir, path.split('/')[-1][:-3]+'jpg'))

In [None]:
import os
import pandas as pd
from glob import glob
import pydicom

In [None]:
train_study = pd.read_csv(INPUT_PATH + 'train_study_level.csv')
train_image = pd.read_csv(INPUT_PATH + 'train_image_level.csv')

In [None]:
train_study.head()

In [None]:
train_study = train_study.rename(columns = {
    'Negative for Pneumonia': 'Negative', 'Typical Appearance': 'Typical',
    'Indeterminate Appearance': 'Indeterminate', 'Atypical Appearance': 'Atypical'},
                                       inplace = False)
train_study['StudyInstanceUID'] = train_study['id'].str[:-6]
train_study.drop(columns=['id'], inplace=True)
train_study.head()

In [None]:
train_image.head()

In [None]:
train_image = train_image.merge(train_study, on='StudyInstanceUID')
train_image['id'] = train_image['id'].str[:-6]
train_image.head()

In [None]:
id_path = []
for i in glob('/kaggle/input/siim-covid19-detection/train/*/*/*'):
    id_path.append((i, i.split('/')[-1][:-4]))
id_path = pd.DataFrame(id_path, columns=['path', 'id'])
train_image = train_image.merge(id_path, on='id')
train_image.head()

In [None]:
train_image.iloc[0]['boxes']

In [None]:
train_image.iloc[0]['label']

In [None]:
pydicom.read_file(train_image.iloc[0]['path']).pixel_array.shape

In [None]:
xy = []
for i, data in train_image.iterrows():
    xy.append(pydicom.read_file(data['path']).pixel_array.shape)
train_image[['xcell','ycell']] = xy
train_image.to_csv('datasets/train_image.csv', index=None)
train_image.head()

In [None]:
import random
import os
import shutil
import pandas as pd
import json

random.seed(481)
SRC_PATH = 'datasets/siim/train/'
TRG_PATH = 'datasets/siim/'
train_list = os.listdir(SRC_PATH)
random.shuffle(train_list)

import shutil
os.makedirs(TRG_PATH+'val', exist_ok=True)
for path in train_list[int(len(train_list)*0.8):]:
    shutil.move(SRC_PATH + path, TRG_PATH + 'val/' + path)

In [None]:
def anno(sets='train'):
    image_id = pd.DataFrame(os.listdir(TRG_PATH + sets))[0].str[:-4].values.tolist()
    annotation = {}
    annotation['type'] = 'instances'
    annotation['categories'] = []
    annotation['images'] = []
    annotation['annotations'] = []
    annotation['categories'].append({'supercategory': 'none', 'id': 1, 'name': 'typical'})
    annotation['categories'].append({'supercategory': 'none', 'id': 2, 'name': 'indeterminate'})
    annotation['categories'].append({'supercategory': 'none', 'id': 3, 'name': 'atypical'})
    for i, data in train_image[train_image.id.isin(image_id)].iterrows():
        dic = {}
        dic['file_name'] = data['id']+'.jpg'
        dic['height'] = 256
        dic['width'] = 256
        dic['id'] = data.name + 1
        annotation['images'].append(dic)
        cnt = 1

    for i, data in train_image.iterrows():
        if type(data['boxes']) == float: # nan
            continue
        # split box string
        boxes = json.loads(data['boxes'].replace('\'', '\"'))
        
        # reverse x,y cell count
        ycell, xcell = data['xcell'], data['ycell']
        
        # category
        t, i, a = data['Typical'], data['Indeterminate'], data['Atypical']
        if t==1:
            category = 1
        elif i==1:
            category = 2
        elif a == 1:
            category = 3
        
        # add boxes
        for j in boxes:
            dic = {}
            dic['area'] = (j['width']*256)//xcell * (j['height']*256)//ycell
            dic['iscrowd'] = 0
            dic['image_id'] = data.name + 1
            dic['bbox'] = [(j['x']*256)//xcell, (j['y']*256)//ycell,
                        (j['width']*256)//xcell, (j['height']*256)//ycell]
            dic['category_id'] = category
            dic['id'] = cnt
            dic['ignore'] = 0
            dic['segmentation'] = []
            cnt += 1
            annotation['annotations'].append(dic)
    
            
    # save annotation json files
    with open(f'{TRG_PATH}annotations/instances_{sets}.json', 'w') as f:
        json.dump(annotation, f)


In [None]:
os.makedirs(TRG_PATH + 'annotations', exist_ok=True)
anno('train')
anno('val')