In [None]:
# !pip install effdet
# !pip install timm
!pip install python-gdcm

# Library

In [None]:
import numpy as np
import pandas as pd
import pydicom as dicom
from glob import glob
import random
from numba import jit

import ast
import cv2
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# from torch.utils.data import DataLoader, Dataset, random_split
# from torch.cuda.amp import GradScaler, autocast

import torchvision

from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GroupKFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import average_precision_score
# from sklearn.metrics import roc_auc_score

import matplotlib.pyplot as plt
import matplotlib

import os
import warnings
from datetime import datetime, timedelta
import time
import gc
import re
from tqdm.notebook import tqdm
from fastprogress.fastprogress import master_bar, progress_bar
# from torchinfo import summary

warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", 25)
pd.set_option('display.max_rows', 20)
# %load_ext line_profiler

# Configuration

In [None]:
config = {
    'batch_size': 4,
    'seed': 46,
    'num_classes': 4,
    'fold': 5,
    'image_size': 512,
    'num_workers': 4
}

# Seed

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(config['seed'])

# Path

In [None]:
SAMP_SUB = '../input/siim-covid19-detection/sample_submission.csv'
TRAIN_IMAGE_LEVEL = '../input/siim-covid19-detection/train_image_level.csv'
TRAIN_STUDY_LEVEL = '../input/siim-covid19-detection/train_study_level.csv'
TRAIN_PATH = '../input/siim-covid19-detection/train'
TEST_PATH = '../input/siim-covid19-detection/test'
TDF_PATH = '../input/siim-train-df/train_df.csv'

TRAIN_DFV2 = '../input/siim-train-df/train_df_v2.csv'
TRAIN_PNG = '../input/siim-covid19-resized-to-512px-png/train'

# Setup DataFrame

In [None]:
train_df = pd.read_csv(TDF_PATH)
study_lvl = pd.read_csv(TRAIN_STUDY_LEVEL)
image_lvl = pd.read_csv(TRAIN_IMAGE_LEVEL)

train_df['integer_label'] = np.argmax(train_df.iloc[:,4:8].values, axis=1)
train_df['ImageInstanceUID'] = train_df['id'].str.split('_').apply(lambda row: row[0])
train_df['train_png'] = glob(f'{TRAIN_PNG}/*')

bboxes_list = []
for bbox in train_df['boxes']:
    temp = []
    for i in range(10):
        try:
            box = ast.literal_eval(bbox)[i]
            # pascal
#             x_min = box['x']
#             y_min = box['y']
#             x_max = box['x'] + box['width']
#             y_max = box['y'] + box['height']
            
            # coco 
            x_min = box['x']
            y_min = box['y']
            x_max = box['width']
            y_max = box['height']
            
            box_pascal = [x_min, y_min, x_max, y_max]
            temp.append(box_pascal)
        except IndexError:
            pass
        except ValueError:
            temp.append(np.nan)
            
    if np.isnan(temp).all():
        temp = [[0.0, 0.0, 1.0, 1.0]]

    bboxes_list.append(temp)

bboxes_df = pd.DataFrame(bboxes_list, columns=[f'box_{i}' for i in range(8)])
train_df = pd.concat([train_df, bboxes_df], axis=1)

# drop weird data 
# not negative and no bbox

not_zeros_df = train_df[train_df['integer_label'] != 0]
drop_row = not_zeros_df[not_zeros_df['boxes'].isnull()].index.values
train_df = train_df.drop(drop_row)
train_df = train_df.reset_index(drop=True)
train_df.head()

# Split

In [None]:
skf = StratifiedKFold(config['fold'], shuffle=True, random_state=config['seed'])
folds_df = train_df.copy()

for f, (train_idx, valid_idx) in enumerate(skf.split(X=folds_df.index.values, y=folds_df.integer_label.values)):
    folds_df.loc[folds_df.iloc[valid_idx].index, 'fold'] = f

folds_df['fold'] = folds_df['fold'].astype(np.int)
folds_df.groupby(['fold', folds_df.integer_label]).size()

In [None]:
def dicom2array(path, voi_lut=True, fix_monochrome=True): # dicom
    dicom = pydicom.dcmread(path)
    # VOI LUT (if available by DICOM device) is used to
    # transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data

def normalize_image(image):
    image_max = image.max().item()
    image_min = image.min().item()
    image_norm = (image - image_min) / (image_max - image_min)
    return image_norm

In [None]:
for i, row in train_df[[f'box_{i}' for i in range(8)] + ['integer_label']].iterrows():
    bboxes = row.dropna()[:-1].values
    labels = row.dropna()[-1]
    labels = np.full(len(bboxes), labels)
#     print(labels)
    for bbox, label in zip(bboxes, labels):
        test_dict = dict(bboxes=bbox,
                         labels=label)
    break

In [None]:
def create_anno_file(df):
    data_infos = []
    image_list = df['path_png'].tolist()
    
    for image_path in image_list:
        filename = image_path
        height = config['image_size']
        width = config['image_size']
        
        data_info = dict(filename=filename,
                         width=width,
                         height=height,
                         ann=[])
        
        bboxes_df = df[[f'box_{i}' for i in range(8)] + ['integer_label']]
        
        for idx, row in bboxes_df.itterrows():
            bboxes = row.dropna()[:-1].values
            labels = row.dropna()[-1]
            labels = np.full(len(bboxes), labels)
            for bbox, label in zip(bboxes, labels):
                anno_dict = dict(bboxes=np.array(bbox),
                                 labels=np.array(label))
                
            data_info['ann'].append(anno_dict)
        data_infos.append(data_info)
    return data_infos

In [None]:
import mmcv
from mmdet.datasets.builder import DATASETS
from mmdet.datasets.custom import CustomDataset
from mmcv import Config

@DATASETS.register_module()
class SiimDatset(CustomDataset)
    CLASSES = ()
    
    def load_annotions(self, df):
        data_infos = []
        image_list = df['path_png'].tolist()

        for image_path in image_list:
            filename = image_path
            height = config['image_size']
            width = config['image_size']

            data_info = dict(filename=filename,
                             width=width,
                             height=height,
                             ann=[])

            bboxes_df = df[[f'box_{i}' for i in range(8)] + ['integer_label']]

            for idx, row in bboxes_df.itterrows():
                bboxes = row.dropna()[:-1].values
                labels = row.dropna()[-1]
                labels = np.full(len(bboxes), labels)
                for bbox, label in zip(bboxes, labels):
                    anno_dict = dict(bboxes=np.array(bbox),
                                     labels=np.array(label))

                data_info['ann'].append(anno_dict)
            data_infos.append(data_info)
        return data_infos

In [None]:
#
# [
#     {
#         'filename': 'a.jpg',
#         'width': 1280,
#         'height': 720,
#         'ann': {
#             'bboxes': <np.ndarray> (n, 4),
#             'labels': <np.ndarray> (n, ),
#             'bboxes_ignore': <np.ndarray> (k, 4), (optional field)
#             'labels_ignore': <np.ndarray> (k, 4) (optional field)
#         }
#     },
#     ...
# ]

In [None]:
cfg = Config.fromfile()

In [None]:
cfg.dataset_type = 'SiimDataset'
# cfg.dataroot = ''
cfg.data.test.type
cfg.data.test.data_root
cfg.data.test.ann_file
cfg.data.test.img_prefix

cfg.data.train.type
cfg.data.train.data_root
cfg.data.train.ann_file
cfg.data.train.img_prefix

cfg.data.val.type
cfg.data.val.data_root
cfg.data.val.ann_file
cfg.data.val.img_prefix

cfg.model.roi_head.bbox_head.num_claases = 4
# cfg.load_from = ''
cfg.word_dir = './'

cfg.optimizer.lr = 0.02 / 8
cfg.lr_config.warmup = None
cfg.log_config.interval = 5

cfg.evaluation.metric = 'mAP'
cfg.evaluation.interval = 5
cfg.checkpoint_config.interval = 5

cfg.seed = config['seed']

mmdet.apis.set_random_seed(config['seed'], deterministic=False)
cfg.gpu_ids = range(1)

print(f'Config:\n{cfg.pretty_text}')

In [None]:
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector

dataset = [build_dataset(cfg.data.train)]

model = build_detector(cfg.model,
                       train_cfg=cfg.get('train_cfg'),
                       test_cfg=cfg.get('test_cfg'))
model.CLASSES = datasets[0].CLASSES

mcv.mkdir_or_exist(osp.abspath(cfg.work_dir))

In [None]:
train_detector(model, datasets, cfg, distributed=False, validate=True)

In [None]:
img = mmcv.imread('kitti_tiny/training/image_2/000068.jpeg')

model.cfg = cfg
result = inference_detector(model, img)
show_result_pyplot(model, img, result)