In [None]:
!conda install '/kaggle/input/pydicom-conda-helper/libjpeg-turbo-2.1.0-h7f98852_0.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/libgcc-ng-9.3.0-h2828fa1_19.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/gdcm-2.8.9-py37h500ead1_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/conda-4.10.1-py37h89c1867_0.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/certifi-2020.12.5-py37h89c1867_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/openssl-1.1.1k-h7f98852_0.tar.bz2' -c conda-forge -y

# **Installation Detectron2**

In [None]:
!pip install /kaggle/input/detectron2/omegaconf-2.0.6-py3-none-any.whl

!pip install /kaggle/input/detectron2/iopath-0.1.8-py3-none-any.whl

!pip install /kaggle/input/detectron2/fvcore-0.1.3.post20210317/fvcore-0.1.3.post20210317/

!pip install /kaggle/input/detectron2/pycocotools-2.0.2/dist/pycocotools-2.0.2.tar

!pip install /kaggle/input/detectron2/detectron2-0.4cu110-cp37-cp37m-linux_x86_64.whl

!pip install /kaggle/input/ensembling-boxes-lala/ensemble_boxes-1.0.6-py3-none-any.whl

# **Import libs**

In [None]:
import os
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master/')
from PIL import Image
from tqdm.auto import tqdm
import pandas as pd
import numpy as np
import time
import cv2
import PIL.Image
import random
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import albumentations as A
from albumentations import *
from tqdm import tqdm
from pylab import rcParams
import timm
from albumentations.pytorch import ToTensorV2
import torch.nn.functional as F
from pathlib import Path
from tqdm.notebook import tqdm
from math import ceil
from typing import Any, Dict, List
import detectron2
from numpy import ndarray
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader
from detectron2.engine import DefaultPredictor
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.structures import BoxMode
from detectron2.utils.logger import setup_logger
from detectron2.utils.visualizer import ColorMode, Visualizer
from tqdm import tqdm
import pickle

import numpy as np
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

from ensemble_boxes import *
from collections import Counter

from warnings import filterwarnings
filterwarnings("ignore")
pd.set_option('max_columns', 50)

# **Convert .dcm to .png helper functions**

### Helper Functions

In [None]:
def read_xray(path, voi_lut = True, fix_monochrome = True):
    dicom = pydicom.read_file(path)
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data

def resize(array, size, keep_ratio=False, resample=Image.LANCZOS):
    im = Image.fromarray(array)
    if keep_ratio:
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)
    return im

# **Image Generation**

# Create images 1024x1024 .png for Object Detection Task and collect meta-info about .dicoms

In [None]:
split = 'test'
save_dir = f'/kaggle/tmp/{split}/'

os.makedirs(save_dir, exist_ok=True)

save_dir = f'/kaggle/tmp/{split}/image/'
os.makedirs(save_dir, exist_ok=True)

dicoms = []
image_study_dict = dict()

for dirname, _, filenames in tqdm(os.walk(f'../input/siim-covid19-detection/{split}')):
    for file in filenames:
        # set keep_ratio=True to have original aspect ratio
        xray = read_xray(os.path.join(dirname, file))
        dim0 = xray.shape[0]
        dim1 = xray.shape[1]
        im = resize(xray, size=1024)  
        im.save(os.path.join(save_dir, file.replace('.dcm', '.jpg')))
        
        dicoms.append(file)
        study = dirname.split('/')[-2] + '_study'
        a = file + '_image'
        a = a.replace('.dcm', '')
        image_study_dict[a] = {'dim0': dim0, 'dim1':dim1, 'study': study, 'id':file.replace('.dcm', '')}

# Create meta-data

In [None]:
meta = pd.DataFrame.from_dict(image_study_dict, orient='index')
meta.reset_index(inplace=True)
meta.rename(columns={'index':'image_id'}, inplace=True)
print("meta.shape = ", meta.shape)
meta.head()

# Create **IMAGE <-> STUDY** mapper dataframe

In [None]:
study_image_info = meta[['id', 'study']].copy()
study_image_info.rename(columns={'id': 'image_id', 'study':'study_id'}, inplace=True)
print("study_image_info.shape = ", study_image_info.shape)
study_image_info.head()

# Create meta_df with original sizes of .dicoms

In [None]:
meta_df = meta[['id', 'dim0', 'dim1']]
print("meta_df.shape = ", meta_df.shape)
meta_df.head()

# Create images 512x512 .png for Image Classification Task

In [None]:
split = 'test_512'
save_dir_512 = f'/kaggle/tmp/{split}/'

os.makedirs(save_dir_512, exist_ok=True)

save_dir_512 = f'/kaggle/tmp/{split}/image/'
os.makedirs(save_dir_512, exist_ok=True)

dicoms = []
image_study_dict = dict()

for dirname, _, filenames in tqdm(os.walk(f'../input/siim-covid19-detection/test')):
    for file in filenames:
        # set keep_ratio=True to have original aspect ratio
        xray = read_xray(os.path.join(dirname, file))
        dim0 = xray.shape[0]
        dim1 = xray.shape[1]
        im = resize(xray, size=512)  
        im.save(os.path.join(save_dir_512, file.replace('.dcm', '.png')))

# Create images 384x384 .png for Image Classification Task

In [None]:
split = 'test_384'
save_dir_384 = f'/kaggle/tmp/{split}/'

os.makedirs(save_dir_384, exist_ok=True)

save_dir_384 = f'/kaggle/tmp/{split}/image/'
os.makedirs(save_dir_384, exist_ok=True)

dicoms = []
image_study_dict = dict()

for dirname, _, filenames in tqdm(os.walk(f'../input/siim-covid19-detection/test')):
    for file in filenames:
        # set keep_ratio=True to have original aspect ratio
        xray = read_xray(os.path.join(dirname, file))
        dim0 = xray.shape[0]
        dim1 = xray.shape[1]
        im = resize(xray, size=384)  
        im.save(os.path.join(save_dir_384, file.replace('.dcm', '.png')))

# **Study Level - Image Classification Task**

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Models - EfficientNetV2 and EfficientNet-B4 custom

In [None]:
class CovidGeneralModel(nn.Module):
    def __init__(self, model_name, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, num_classes=4, in_chans=3)

    def forward(self, x):
        output = self.model(x)
        return output
    
class CovidEffnetModel(nn.Module):
    def __init__(self, model_name, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, num_classes=4, in_chans=3)
        n_features = self.model.classifier.in_features
        self.model.global_pool = nn.Identity()
        self.model.classifier = nn.Identity()
        self.dropout_layer = nn.Dropout(0.5)
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(n_features, 4)

    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        # features = self.dropout_layer(features)
        pooled_features = self.pooling(features).view(bs, -1)
        pooled_features = self.dropout_layer(pooled_features)
        output = self.fc(pooled_features)
        return output

# Image Torch Dataset

In [None]:
class CovidDataset(Dataset):
    
    def __init__(self, df, transform=None):
        
        self.df = df.reset_index(drop=True)
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        row = self.df.loc[index]
        image_id = row.image_id
        img = cv2.imread(row.file_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        if self.transform is not None:
            res = self.transform(image=img)
            img = res['image']
                
        img = img.type('torch.FloatTensor')
        return torch.tensor(img).float(), image_id

# Validation transforms for EffNetV2 and EffNet-B4

In [None]:
valid_transform_effnetV2 = A.Compose([
    A.Resize(384, 384, p=1.0),
    A.Normalize(
        mean=(0.485, 0.456, 0.406),
        std=(0.229, 0.224, 0.225)
    ),
    ToTensorV2()
], p=1.0)

valid_transform_effnetB4 = A.Compose([
    A.Resize(512, 512, p=1.0),
    A.Normalize(
        mean=(0.485, 0.456, 0.406),
        std=(0.229, 0.224, 0.225)
    ),
    ToTensorV2()
], p=1.0)

# TTA Inference Function

In [None]:
def tta_inference_func(model, test_loader, device, img_size):
    result = {}
    model.eval()
    bar = tqdm(test_loader)
    LOGITS = []
    PREDS = []
    IMAGES = []
    
    with torch.no_grad():
        for batch_idx, (images, image_id) in enumerate(bar):
            x = images.to(device)
            x = torch.stack([x,x.flip(-1)],0) # hflip
            x = x.view(-1, 3, img_size, img_size)
            logits = model(x)
            logits = logits.view(1, 2, -1).mean(1)
            PREDS += [logits.sigmoid().detach().cpu()]
            LOGITS.append(logits.cpu())
            IMAGES += image_id
        PREDS = torch.cat(PREDS).cpu().numpy()
    result['image_id'] = IMAGES
    result['negative'] = PREDS[:, 0]
    result['typical'] = PREDS[:, 1]
    result['indeterminate'] = PREDS[:, 2]
    result['atypical'] = PREDS[:, 3]
    df = pd.DataFrame(result)
    return df

# Pytorch Data Loaders for EfficientNetV2 and EfficientNet-B4 models

In [None]:
df_test_384 = study_image_info.copy()
df_test_384['image_name'] = df_test_384['image_id'].apply(lambda x: x +'.png')
df_test_384['file_path'] = df_test_384.image_name.apply(lambda x: os.path.join(save_dir_384, f'{x}'))


df_test_512 = study_image_info.copy()
df_test_512['image_name'] = df_test_512['image_id'].apply(lambda x: x +'.png')
df_test_512['file_path'] = df_test_512.image_name.apply(lambda x: os.path.join(save_dir_512, f'{x}'))


dataset_effnetV2 = CovidDataset(df_test_384, transform=valid_transform_effnetV2)

test_loader_effnetV2 = torch.utils.data.DataLoader(
    dataset_effnetV2, batch_size=1, shuffle=False, num_workers=24, pin_memory=True
)

dataset_effnetB4 = CovidDataset(df_test_512, transform=valid_transform_effnetB4)

test_loader_effnetB4 = torch.utils.data.DataLoader(
    dataset_effnetB4, batch_size=1, shuffle=False, num_workers=24, pin_memory=True
)

# EfficientNetV2 model's list

In [None]:
modelsEffNetV2 = []
MODELS_EffNetV2_PATHS = [
    '../input/weights-vit-base-r50-s16-384-from-last-week/vit_base_r50_s16_384_fold0_best_AP.pth',
    '../input/weights-vit-base-r50-s16-384-from-last-week/vit_base_r50_s16_384_fold1_best_AP.pth',
    '../input/weights-vit-base-r50-s16-384-from-last-week/vit_base_r50_s16_384_fold2_best_AP.pth',
    '../input/weights-vit-base-r50-s16-384-from-last-week/vit_base_r50_s16_384_fold3_best_AP.pth',
    '../input/weights-vit-base-r50-s16-384-from-last-week/vit_base_r50_s16_384_fold4_best_AP.pth',
]
for path in MODELS_EffNetV2_PATHS:
    state_dict = torch.load(path, map_location=torch.device('cpu'))
    model = CovidGeneralModel('vit_base_r50_s16_384', pretrained=False)
    model.load_state_dict(state_dict)
    model.eval()
    model.to(device)
    modelsEffNetV2.append(model)
del state_dict

# EfficientNet-B4 model's list

In [None]:
modelsEffNetB4 = []
MODELS_EffNetB4_PATHS = [
    '../input/efficientnet-b4-weights-other-skf/tf_efficientnet_b4_ns_fold0_best_AP.pth',
    '../input/efficientnet-b4-weights-other-skf/tf_efficientnet_b4_ns_fold1_best_AP.pth',
    '../input/efficientnet-b4-weights-other-skf/tf_efficientnet_b4_ns_fold2_best_AP.pth',
    '../input/efficientnet-b4-weights-other-skf/tf_efficientnet_b4_ns_fold3_best_AP.pth',
    '../input/efficientnet-b4-weights-other-skf/tf_efficientnet_b4_ns_fold4_best_AP.pth',
]
for path in MODELS_EffNetB4_PATHS:
    state_dict = torch.load(path, map_location=torch.device('cpu'))
    model = CovidEffnetModel('tf_efficientnet_b4_ns', pretrained=False)
    model.load_state_dict(state_dict)
    model.eval()
    model.to(device)
    modelsEffNetB4.append(model)
del state_dict

# EfficientNetV2 Predictions

In [None]:
df_list_effnetV2 = list()
for model in modelsEffNetV2:
    df = tta_inference_func(model, test_loader_effnetV2, device, 384)
    df_list_effnetV2.append(df)

# EfficientNet-B4 Predictions

In [None]:
df_list_effnetB4 = list()
for model in modelsEffNetB4:
    df = tta_inference_func(model, test_loader_effnetB4, device, 512)
    df_list_effnetB4.append(df)

# Combining the predictions of two architectures

In [None]:
df_list = df_list_effnetV2 + df_list_effnetB4

# Creating a dataframe containing the probabilities of the absence of opacity for detection

In [None]:
df_none = pd.concat(df_list).groupby('image_id').mean()
df_none.reset_index(inplace=True)
df_none.rename(columns={'negative':'none'}, inplace=True)
df_none = df_none[['image_id', 'none']]
df_none['none'] = df_none['none'].astype('str')
df_none['none'] = 'none' + ' ' + df_none['none'] + ' ' + '0 0 1 1'
print("df_none.shape = ", df_none.shape)
df_none['image_id'] += '_image'
df_none.rename(columns={'image_id':'id'}, inplace=True)
df_none.head()

# Create result of classification task

### Helper Func

In [None]:
def prepare_data(df):
    df['negative'] = df['negative'].astype(str)
    df['typical'] = df['typical'].astype(str)
    df['indeterminate'] = df['indeterminate'].astype(str)
    df['atypical'] = df['atypical'].astype(str)
    df.loc[:, 'negative'] = 'negative ' + df['negative'] + ' 0 0 1 1 '
    df.loc[:, 'typical'] = 'typical ' + df['typical'] + ' 0 0 1 1 '
    df.loc[:, 'indeterminate'] = 'indeterminate ' + df['indeterminate'] + ' 0 0 1 1 '
    df.loc[:, 'atypical'] = 'atypical ' + df['atypical'] + ' 0 0 1 1'
    df['PredictionString'] = df['negative'] + df['typical'] + df['indeterminate'] + df['atypical']
    df = df[['study_id', 'PredictionString']]
    return df

In [None]:
df_result = pd.concat(df_list).groupby('image_id').mean()
df_result = df_result.reset_index()
meta = df_test_512[['image_id', 'study_id']]
df_result = meta.merge(df_result, on='image_id')
df_result['negative'] = df_result.groupby(['study_id'])['negative'].transform(lambda x: np.max(x))
df_result['typical'] = df_result.groupby(['study_id'])['typical'].transform(lambda x: np.max(x))
df_result['indeterminate'] = df_result.groupby(['study_id'])['indeterminate'].transform(lambda x: np.max(x))
df_result['atypical'] = df_result.groupby(['study_id'])['atypical'].transform(lambda x: np.max(x))
df_result = df_result[['study_id', 'negative', 'typical', 'indeterminate', 'atypical']]
df_result = df_result.drop_duplicates()
print("df_result.shape = ", df_result.shape)
df_result.head()

In [None]:
df_result = prepare_data(df_result)
df_result = df_result.reset_index(drop=True)
df_result.rename(columns={'study_id':'id'}, inplace=True)
print("df_result.shape = ", df_result.shape)
df_result.head()

### Prepare results to submit

In [None]:
sample_submission = pd.read_csv('../input/siim-covid19-detection/sample_submission.csv')
print("sample_submission.shape = ", sample_submission.shape)
sample_submission.head()

In [None]:
df_sample_submit = sample_submission.set_index('id')
df_result = df_result.set_index('id')
df_submit = df_sample_submit.copy()
df_submit.loc[df_result.index, 'PredictionString'] = df_result.PredictionString.values
df_submit = df_submit.reset_index(drop=False)
print("df_submit.shape = ", df_submit.shape)
df_submit.head()

# **Object Detection Task**

### Helper functions

In [None]:
def get_COVID19_data_dicts_test(
        root_imgdir: str,
        test_meta: pd.DataFrame,
        use_cache: bool = False,
        debug: bool = False,
):
    debug_str = f"_debug{int(debug)}"
    cache_path = Path(".") / f"dataset_dicts_cache_test.pkl"
    if not use_cache or not cache_path.exists():
        print("Creating data...")
        if debug:
            test_meta = test_meta.iloc[:100]  # For debug....
        image_id = test_meta.iloc[0, 0]
        image_path = root_imgdir + f'{image_id}.jpg'
        image = cv2.imread(image_path)
        resized_height, resized_width, ch = image.shape

        dataset_dicts = []
        for index, test_meta_row in tqdm(test_meta.iterrows(), total=len(test_meta)):
            record = {}

            image_id, height, width = test_meta_row.values
            filename = root_imgdir + f'{image_id}.jpg'
            record["file_name"] = filename
            record["image_id"] = image_id
            record["height"] = resized_height
            record["width"] = resized_width
            dataset_dicts.append(record)
        with open(cache_path, mode="wb") as f:
            pickle.dump(dataset_dicts, f)

    with open(cache_path, mode="rb") as f:
        dataset_dicts = pickle.load(f)
    return dataset_dicts


def format_pred(labels: ndarray, boxes: ndarray, scores: ndarray) -> str:
    pred_strings = []
    for label, score, bbox in zip(labels, scores, boxes):
        if label == 0:
            label = 'opacity'
        elif label == 1:
            label = 'none'
        xmin, ymin, xmax, ymax = bbox.astype(np.int64)
        
        if label ==  'none':
            xmin, ymin, xmax, ymax = np.array([0, 0, 1, 1])
        
        pred_strings.append(f"{label} {score} {xmin} {ymin} {xmax} {ymax}")
    return " ".join(pred_strings)


def get_pred(image_id: str, labels: ndarray, boxes: ndarray, scores: ndarray) -> list:
    preds = []
    for label, score, bbox in zip(labels, scores, boxes):
        xmin, ymin, xmax, ymax = bbox.astype(np.float64)
        if xmin is None:
            print("ALARM")
        res_dict = {
            'image_id': image_id,
            'x_min': xmin,
            'y_min': ymin,
            'x_max': xmax,
            'y_max': ymax,
            'score': score,
            'label': label
        }
        preds.append(res_dict)
    return preds



def predict_batch(predictor: DefaultPredictor, im_list: List[ndarray]) -> List:
    with torch.no_grad():  # https://github.com/sphinx-doc/sphinx/issues/4258
        inputs_list = []
        for original_image in im_list:
            if predictor.input_format == "RGB":
                original_image = original_image[:, :, ::-1]
            height, width = original_image.shape[:2]
            image = original_image
            image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
            inputs = {"image": image, "height": height, "width": width}
            inputs_list.append(inputs)
        predictions = predictor.model(inputs_list)
        return predictions

    
def get_model_predictions(dataset_dicts, predictor, meta_df, fold_num):
    results_list = []
    index = 0
    batch_size = 4

    for i in tqdm(range(ceil(len(dataset_dicts) / batch_size))):
        inds = list(range(batch_size * i, min(batch_size * (i + 1), len(dataset_dicts))))
        dataset_dicts_batch = [dataset_dicts[i] for i in inds]
        im_list = [cv2.imread(d["file_name"]) for d in dataset_dicts_batch]
        outputs_list = predict_batch(predictor, im_list)

        for im, outputs, d in zip(im_list, outputs_list, dataset_dicts_batch):
            resized_height, resized_width, ch = im.shape

            image_id, dim0, dim1 = meta_df.iloc[index].values

            instances = outputs["instances"]
            if len(instances) == 0:
                result = [
                    {
                        'image_id': image_id,
                        'x_min': 0,
                        'y_min': 0,
                        'x_max': 1,
                        'y_max': 1,
                        'score': 1.0,
                        'label': 1
                    }
                ]
            else:
                # Find some bbox...
                # print(f"index={index}, find {len(instances)} bbox.")
                fields: Dict[str, Any] = instances.get_fields()
                pred_classes = fields["pred_classes"]  # (n_boxes,)
                pred_scores = fields["scores"]
                # shape (n_boxes, 4). (xmin, ymin, xmax, ymax)
                pred_boxes = fields["pred_boxes"].tensor

                h_ratio = dim0 / resized_height
                w_ratio = dim1 / resized_width
                pred_boxes[:, [0, 2]] *= w_ratio
                pred_boxes[:, [1, 3]] *= h_ratio

                pred_classes_array = pred_classes.cpu().numpy()
                pred_boxes_array = pred_boxes.cpu().numpy()
                pred_scores_array = pred_scores.cpu().numpy()

                result = get_pred(image_id, pred_classes_array, pred_boxes_array, pred_scores_array)
            results_list.append(result)
            index += 1
    final_list = [item for sublist in results_list for item in sublist]
    result_df = pd.DataFrame(final_list)
    result_df['weight'] = fold_num
    return result_df

## Configuration

In [None]:
thing_classes = [
    "opacity"
]
category_name_to_id = {class_name: index for index, class_name in enumerate(thing_classes)}

debug=False
root_imgdir = "/kaggle/tmp/test/image/"
outdir = "results/detection"

# Create Object Detection Model

In [None]:
cfg = get_cfg()
original_output_dir = cfg.OUTPUT_DIR
cfg.OUTPUT_DIR = str(outdir)
print(f"cfg.OUTPUT_DIR {original_output_dir} -> {cfg.OUTPUT_DIR}")

cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_101_FPN_3x.yaml"))
cfg.DATALOADER.NUM_WORKERS = 4
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(thing_classes)
cfg.MODEL.WEIGHTS = str("../input/weights-retinanet-4000-jpg/F0_RetinaNet_4000_jpg.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.0
predictor_0 = DefaultPredictor(cfg)

In [None]:
cfg = get_cfg()
original_output_dir = cfg.OUTPUT_DIR
cfg.OUTPUT_DIR = str(outdir)
print(f"cfg.OUTPUT_DIR {original_output_dir} -> {cfg.OUTPUT_DIR}")

cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_101_FPN_3x.yaml"))
cfg.DATALOADER.NUM_WORKERS = 4
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(thing_classes)
cfg.MODEL.WEIGHTS = str("../input/weights-retinanet-4000-jpg/F1_RetinaNet_4000_jpg_2_train_iter.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.0
predictor_1 = DefaultPredictor(cfg)

In [None]:
cfg = get_cfg()
original_output_dir = cfg.OUTPUT_DIR
cfg.OUTPUT_DIR = str(outdir)
print(f"cfg.OUTPUT_DIR {original_output_dir} -> {cfg.OUTPUT_DIR}")

cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_101_FPN_3x.yaml"))
cfg.DATALOADER.NUM_WORKERS = 4
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(thing_classes)
cfg.MODEL.WEIGHTS = str("../input/weights-retinanet-4000-jpg/F2_RetinaNet_4000_jpg.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.0
predictor_2 = DefaultPredictor(cfg)

In [None]:
cfg = get_cfg()
original_output_dir = cfg.OUTPUT_DIR
cfg.OUTPUT_DIR = str(outdir)
print(f"cfg.OUTPUT_DIR {original_output_dir} -> {cfg.OUTPUT_DIR}")

cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_101_FPN_3x.yaml"))
cfg.DATALOADER.NUM_WORKERS = 4
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(thing_classes)
cfg.MODEL.WEIGHTS = str("../input/weights-retinanet-4000-jpg/F3_RetinaNet_4000_jpg.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.0
predictor_3 = DefaultPredictor(cfg)

In [None]:
cfg = get_cfg()
original_output_dir = cfg.OUTPUT_DIR
cfg.OUTPUT_DIR = str(outdir)
print(f"cfg.OUTPUT_DIR {original_output_dir} -> {cfg.OUTPUT_DIR}")

cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_101_FPN_3x.yaml"))
cfg.DATALOADER.NUM_WORKERS = 4
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(thing_classes)
cfg.MODEL.WEIGHTS = str("../input/weights-retinanet-4000-jpg/F4_RetinaNet_4000_jpg.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.0
predictor_4 = DefaultPredictor(cfg)

## Register COCO dicts

In [None]:
DatasetCatalog.register(
    "COVID19_data_test", lambda: get_COVID19_data_dicts_test(
        root_imgdir=root_imgdir,
        test_meta=meta_df,
        use_cache=False,
        debug=debug
    )
)
MetadataCatalog.get("COVID19_data_test").set(thing_classes=thing_classes)
metadata = MetadataCatalog.get("COVID19_data_test")
dataset_dicts = get_COVID19_data_dicts_test(
                                                root_imgdir=root_imgdir,
                                                test_meta=meta_df,
                                                use_cache=False,
                                                debug=debug
)

## Make Object Detection Predictions

In [None]:
results_dfs = []
for fold_id, predictor in enumerate([predictor_0, predictor_1, predictor_2, predictor_3, predictor_4]):
    results_dfs.append(get_model_predictions(dataset_dicts, predictor, meta_df, fold_id))

In [None]:
result_df = pd.concat(results_dfs)
print("result_df.shape = ", result_df.shape)
result_df.head()

# Apply non-maximum weighted

In [None]:
iou_thr = 0.55
skip_box_thr = 0.0000
sigma = 0.1
viz_images = []
wbf_result_dicts = []

for i, img_id in tqdm(enumerate(result_df.image_id.unique())):
    path = f'/home/hdd/storage/siim_covid_detection/resized_images_from_kaggle/1024x1024/test/{img_id}.jpg'
    _, dim0, dim1 = meta_df[meta_df.id == img_id].iloc[0].values
    img_array  = cv2.imread(path)

    boxes_viz = list()
    labels_viz = list()
    scores_viz = list()
    for weight in [0, 1, 2, 3, 4]:
        tmp_df = result_df[result_df.weight==weight]
        img_annotations = tmp_df[tmp_df.image_id==img_id]

        boxes_viz_tmp = (img_annotations[['x_min', 'y_min', 'x_max', 'y_max']].to_numpy() \
        / (dim1, dim0, dim1, dim0)).tolist()
        labels_viz_tmp = img_annotations['label'].to_numpy().tolist()
        scores_viz_tmp = img_annotations['score'].to_numpy().tolist()
        
        boxes_viz.append(boxes_viz_tmp)
        labels_viz.append(labels_viz_tmp)
        scores_viz.append(scores_viz_tmp)
    
        
    count_dict = Counter(img_annotations['label'].tolist())
    
#     boxes, scores, box_labels= nms(boxes_viz, scores_viz, labels_viz, weights=None,
#                                                      iou_thr=iou_thr)

    boxes, scores, box_labels= non_maximum_weighted(boxes_viz, scores_viz, labels_viz, weights=None,
                                                         iou_thr=iou_thr, skip_box_thr=skip_box_thr)

#     boxes, scores, box_labels= weighted_boxes_fusion(boxes_viz, scores_viz, labels_viz, weights=None,
#                                                      iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    
    boxes = boxes*(dim1, dim0, dim1, dim0)
    boxes = boxes.tolist()
    box_labels = box_labels.astype(int).tolist()
    box_scores = scores.tolist()
    
    for i in range(len(box_labels)):
        x_min = boxes[i][0]
        y_min = boxes[i][1]
        x_max = boxes[i][2]
        y_max = boxes[i][3]
        tmp_dict = {
            'image_id': img_id,
            'x_min': x_min,
            'y_min': y_min,
            'x_max': x_max,
            'y_max': y_max,
            'score': box_scores[i],
            'label': box_labels[i]
        }
        wbf_result_dicts.append(tmp_dict)

In [None]:
wbf_result = pd.DataFrame(wbf_result_dicts)
print("wbf_result.shape = ", wbf_result.shape)
wbf_result.head()

In [None]:
results_list = list()
for im_id in wbf_result.image_id.unique():
    annotations = wbf_result[wbf_result.image_id == im_id]
    boxes = annotations[['x_min', 'y_min', 'x_max', 'y_max']].to_numpy()
    scores = annotations['score'].to_numpy()
    labels = annotations[['label']].to_numpy()
    result = {
                "image_id": im_id,
                "PredictionString": format_pred(
                    labels, boxes, scores
                ),
            }
    results_list.append(result)
submission_det = pd.DataFrame(results_list, columns=['image_id', 'PredictionString'])

In [None]:
submission_det.rename(columns={'image_id':'id'}, inplace=True)
submission_det['id'] += '_image'
submission_det = submission_det.merge(df_none, on='id')
submission_det['PredictionString'] = submission_det['PredictionString'] + ' ' + submission_det['none']
submission_det = submission_det[['id', 'PredictionString']]
print("submission_det.shape = ", submission_det.shape)
submission_det.head()

# Prepare detection results

In [None]:
sample_submission = df_submit.copy()

df_sample_submit = sample_submission.set_index('id')
df_result = submission_det.set_index('id')

df_submit_result = df_sample_submit.copy()
df_submit_result.loc[df_result.index, 'PredictionString'] = df_result.PredictionString.values
# ---------------------------------------------------------------------------------

df_submit_result = df_submit_result.reset_index(drop=False)
print("df_submit_result.shape= ", df_submit_result.shape)
df_submit_result.head()

In [None]:
df_submit_result.tail()

In [None]:
df_submit_result[['id', 'PredictionString']].to_csv('submission.csv', index=False)