In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import os
import matplotlib.pyplot as plt
import cv2

In [None]:
!pip install /kaggle/input/weighted-box-fusion

In [None]:
!cp /kaggle/input/gdcm-conda-install/gdcm.tar .
!tar -xvzf gdcm.tar
!conda install --offline ./gdcm/gdcm-2.8.9-py37h71b2a6d_0.tar.bz2

In [None]:
!cp -r /kaggle/input/siim-covid-inference-models ./models

In [None]:
!cp -r /kaggle/input/yolov5 ./
!cp -r /kaggle/input/yolov4 ./

# Prepare test data

In [None]:
data_path = "/kaggle/input/siim-covid19-detection/"

In [None]:
sub_df = pd.read_csv(os.path.join(data_path, "sample_submission.csv"))

In [None]:
test_path = os.path.join(data_path, "test")
test_filenames = [os.path.join(dirname,filename) for dirname,_,filenames in os.walk(test_path) for filename in filenames]

In [None]:
study_mapping = {}

for study_dir in os.listdir(test_path):
    for sub_dir in os.listdir(os.path.join(test_path, study_dir)):
        for image_name in os.listdir(os.path.join(os.path.join(test_path, study_dir),
                                                 sub_dir)):
            image_id = image_name[:-4] 
            study_mapping[image_id] = study_dir

In [None]:
os.makedirs("dataset/test", exist_ok=True)

In [None]:
import os

from PIL import Image
import pandas as pd
from tqdm.auto import tqdm

In [None]:
orig_shapes = {}

In [None]:
import numpy as np
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

def read_xray(path, voi_lut = True, fix_monochrome = True):
    # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to 
    # "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

In [None]:
def resize(array, size, keep_ratio=False, resample=Image.LANCZOS):
    # Original from: https://www.kaggle.com/xhlulu/vinbigdata-process-and-resize-to-image
    im = Image.fromarray(array)
    
    if keep_ratio:
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)
    
    return im

In [None]:
def save_dcm_as_png(source, dest, size = 832):
    image = read_xray(source)
    
    orig_shapes[source.split("/")[-1][:-4]] = (image.shape[1], image.shape[0])
    
    image = resize(image, size)
    image.save(dest)

In [None]:
for x in test_filenames:
    save_dcm_as_png(x, os.path.join("dataset/test", 
                                    x.split("/")[-1][:-3] + "png"))

# Image Level Inference

## YOLOv5

In [None]:
!python ./yolov5/detect.py --augment --weights ./models/yolov5x_v2.pt \
                                      --source ./dataset/test \
                                      --img 608 \
                                      --conf 0.005 \
                                      --iou-thres 0.5 \
                                      --save-txt \
                                      --save-conf \
                                      --nosave

In [None]:
preds_path = 'runs/detect/exp/labels'
prediction_files = os.listdir(preds_path)

In [None]:
images_list = os.listdir("dataset/test")

In [None]:
yolov5_boxes = {image_name[:-4] : list() for image_name in images_list}
yolov5_scores = {image_name[:-4] : list() for image_name in images_list}
yolov5_labels = {image_name[:-4] : list() for image_name in images_list}

In [None]:
for pred_file in prediction_files:
    id = pred_file[:-4]
    
    image_width, image_height = orig_shapes[id]
    
    pred_file_path = os.path.join(preds_path, pred_file)
    
    pred_str = ""
    
    with open(pred_file_path, "r") as f:
        preds = f.readlines()
        for pred in preds:
            pred = np.array(pred.replace("\n", "").split(" "), dtype = np.float)
            x_c, y_c, w_b, h_b = pred[1:5] #* np.array([image_width, image_height, image_width, image_height])
            
            xmin = x_c - (w_b / 2)
            xmax = x_c + (w_b / 2)
            ymin = y_c - (h_b / 2)
            ymax = y_c + (h_b / 2)

            score = pred[-1]
            #opacity 0.44043 1568 446 2146 1793 opacity 0.543457 689 405 1290 1588
            #pred_str += "opacity {} {} {} {} {} ".format(score, xmin, ymin, xmax, ymax)
            
            yolov5_boxes[id].append([xmin, ymin, xmax, ymax])
            yolov5_scores[id].append(score)
            yolov5_labels[id].append(0)
    #sub_df.loc[sub_df["id"] == id + "_image", "PredictionString"] = pred_str

## YOLOv4

In [None]:
%cd yolov4
!sed -i 's/OPENCV=0/OPENCV=1/' Makefile
!sed -i 's/GPU=0/GPU=1/' Makefile
!sed -i 's/CUDNN=0/CUDNN=1/' Makefile
!sed -i 's/CUDNN_HALF=0/CUDNN_HALF=1/' Makefile

In [None]:
# verify CUDA
!/usr/local/cuda/bin/nvcc --version

In [None]:
%cd /usr/lib/gcc/x86_64-linux-gnu/7/../../../x86_64-linux-gnu/
!rm libcuda.so

In [None]:
!cp /kaggle/input/libcuda/libcuda.so .

In [None]:
%cd /kaggle/working/yolov4
!make

In [None]:
os.makedirs("../runs/v4", exist_ok=True)

In [None]:
%%writefile ../models/obj.data
classes = 1
train = ./data/train.txt
valid = ./data/valid.txt
names = ../models/obj.names
backup = /content/drive/My Drive/siim_covid/models/yolov4

In [None]:
!cp -r ../dataset/test ../runs/v4 

In [None]:
import os

image_files = []

for filename in os.listdir(os.path.join("../runs/v4", "test")):
    if filename.endswith(".png"):
        image_files.append("../runs/v4/test/" + filename)

with open("../runs/v4/test.txt", "w") as outfile:
    for image in image_files:
        outfile.write(image)
        outfile.write("\n")
    outfile.close()


In [None]:
!./darknet detector test ../models/obj.data ../models/yolov4-obj.cfg \
../models/yolov4.weights -ext_output -dont_show -thresh 0.005 \
-out ../runs/v4/result.json < ../runs/v4/test.txt

In [None]:
import json
with open ("../runs/v4/result.json", "r") as f:
    results = json.load(f)

In [None]:
yolov4_boxes = {result["filename"].split("/")[-1][:-4] : list() for result in results}
yolov4_scores = {result["filename"].split("/")[-1][:-4] : list() for result in results}
yolov4_labels = {result["filename"].split("/")[-1][:-4] : list() for result in results}

In [None]:
for result in results:
    id = result["filename"].split("/")[-1][:-4]
    json_det = result["objects"]
    if len(json_det) > 0:
        for det in json_det:
            coords = det["relative_coordinates"]
            x_c = float(coords["center_x"])
            y_c = float(coords["center_y"])
            w = float(coords["width"])
            h = float(coords["height"])
            
            x1 = x_c - w/2
            x2 = x_c + w/2
            y1 = y_c - h/2
            y2 = y_c + h/2
            
            conf = det["confidence"]
            
            yolov4_boxes[id].append([x1, y1, x2, y2])
            yolov4_scores[id].append(conf)
            yolov4_labels[id].append(0)

## FRCNN

In [None]:
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

In [None]:
%cd ..

In [None]:
!pip install /kaggle/input/detectron2/omegaconf-2.0.6-py3-none-any.whl
!pip install /kaggle/input/detectron2/iopath-0.1.8-py3-none-any.whl
!pip install /kaggle/input/detectron2/fvcore-0.1.3.post20210317/fvcore-0.1.3.post20210317/
!pip install /kaggle/input/detectron2/pycocotools-2.0.2/dist/pycocotools-2.0.2.tar
!pip install /kaggle/input/detectron2/detectron2-0.4cu110-cp37-cp37m-linux_x86_64.whl

In [None]:
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.utils.visualizer import ColorMode

In [None]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"))
cfg.MODEL.WEIGHTS = "./models/model_0004199.pth"  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.005  # set a custom testing threshold
cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.383, 0.61, 1.0, 1.64, 2.61]]
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 

In [None]:
frcnn_boxes = {image_name[:-4] : list() for image_name in images_list}
frcnn_scores = {image_name[:-4] : list() for image_name in images_list}
frcnn_labels = {image_name[:-4] : list() for image_name in images_list}

In [None]:
predictor = DefaultPredictor(cfg)

In [None]:
for image_name in images_list:
    image_path = os.path.join("./dataset/test", image_name)
    image = cv2.imread(image_path)
    
    id = image_name[:-4]
    #image_width, image_height = orig_shapes[id]
    
    dets = predictor(image)["instances"]
    
    fields = dets.get_fields()
    #pred_classes = np.array(fields["pred_classes"])  # (n_boxes,)
    pred_scores = np.array(fields["scores"].cpu()) # shape (n_boxes, 4). (xmin, ymin, xmax, ymax)
    pred_boxes = np.array(fields["pred_boxes"].tensor.cpu()) / 832.
    
    if len(pred_boxes) > 0:
        for i, box in enumerate(pred_boxes):
            frcnn_boxes[id].append(list(box))
            frcnn_scores[id].append(list(pred_scores)[i])
            frcnn_labels[id].append(0)

## Ensemble

In [None]:
from ensemble_boxes import *

In [None]:
weights = [2, 2, 3, 2, 2, 3]

In [None]:
for id in list(yolov4_boxes.keys()):
    boxes_list = [yolov4_boxes[id], yolov5_boxes[id], frcnn_boxes[id],
                 yolov4_boxes[id], yolov5_boxes[id], frcnn_boxes[id]]
    scores_list = [yolov4_scores[id], yolov5_scores[id], frcnn_scores[id],
                  yolov4_scores[id], yolov5_scores[id], frcnn_scores[id]]
    labels_list = [yolov4_labels[id], yolov5_labels[id], frcnn_labels[id],
                  yolov4_labels[id], yolov5_labels[id], frcnn_labels[id]]
    
    boxes, scores, labels = weighted_boxes_fusion(
        boxes_list, 
        scores_list, 
        labels_list, 
        weights=weights, 
        iou_thr=0.5)
    
    pred_str = ""
    image_width, image_height = orig_shapes[id]
    if len(boxes) > 1:
        for i, box in enumerate(boxes):
            xmin, ymin, xmax, ymax = np.array(box * np.array([image_width, image_height, image_width, image_height]), dtype = np.int)
            score = scores[i]
            pred_str += "opacity {} {} {} {} {} ".format(score, xmin, ymin, xmax, ymax)
        sub_df.loc[sub_df["id"] == id + "_image", "PredictionString"] = pred_str

# Study Inference

In [None]:
import tensorflow as tf
from tensorflow import keras 
from tensorflow.keras import layers
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard

In [None]:
x_test = os.listdir("./dataset/test")
x_test = list(map(lambda x : os.path.join("./dataset/test", x), x_test))

In [None]:
class_mapping = { 
    0 : "negative",
    1: "typical",
    2: "indeterminate",
    3: "atypical"}

In [None]:
class Generator(Sequence) :
    def __init__(self, image_filenames, batch_size, img_size): #, img_size) :
        self.image_filenames = image_filenames
        self.batch_size = batch_size
        self.img_size = img_size

    def __len__(self) :
        return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)

    def __getitem__(self, idx) :
        batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
        return np.array([self.get_image(file_name) for file_name in batch_x])
    
    def get_image(self, path):
        img0 = cv2.imread(path)  # BGR
        assert img0 is not None, 'Image Not Found ' + path
        img = cv2.resize(np.copy(img0), (self.img_size, self.img_size))
        # Convert
        img = np.float32(img) / 255.
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)     
        return img

In [None]:
batch_size = 8
test_gen = Generator(x_test, batch_size, 416)

In [None]:
#resnet_model = keras.models.load_model('./models/resnet50.h5')

In [None]:
def get_lr_metric(optimizer):
    def lr(y_true, y_pred):
        return optimizer.lr
    return lr

optimizer = keras.optimizers.Adam(0.001)
lr_metric = get_lr_metric(optimizer)

In [None]:
models = []
for i in range(4):
    model = tf.keras.models.load_model("./models/model_{}.h5".format(i),
                                      custom_objects={"lr":lr_metric})
    models.append(model)
    del model

In [None]:
all_preds = []
for i in range(4):
    preds = models[i].predict(test_gen)
    all_preds.append(preds)

In [None]:
"""weights = [0.8, 0.73, 0.82, 0.92]
s = np.sum(weights)
for i in range(4):
    weights[i] /= s
np.sum(weights)"""

In [None]:
"""predictions = np.zeros(shape=(all_preds[0].shape[0], 4))

for i in range(all_preds[0].shape[0]):
    pos_pred = []
    neg_pred = []
    c_pred = []
  
    for j in range(4):
        pos_pred.append(all_preds[j][i][-1])
        neg_pred.append(all_preds[j][i][0])
    
    for j in range(4):
        p = pos_pred[j] * np.sum(neg_pred[:j] + neg_pred[j+1:])
        #print(np.sum(neg_pred[:j] + neg_pred[j+1:]))
        c_pred.append(p)

    final_p = []
    for j in range(4):
        #final_p.append(c_pred[j] / np.sum(c_pred))
        final_p.append(weights[j] * c_pred[j] / np.sum(c_pred * np.array(weights)))

    predictions[i] = final_p"""

In [None]:
predictions = np.zeros(shape=(all_preds[0].shape[0], 4))

for i in range(all_preds[0].shape[0]):
    pos_pred = []
    for j in range(4):
        pos_pred.append(all_preds[j][i][-1])

    final_p = []
    for j in range(4):
        final_p.append(pos_pred[j] / np.sum(pos_pred))
        #final_p.append(weights[j] * pos_pred[j] / np.sum(pos_pred * np.array(weights)))
    predictions[i] = final_p

In [None]:
#predictions = resnet_model.predict(test_gen)

In [None]:
preds_dict = {}

In [None]:
for k, pred in enumerate(predictions):
    image_name = x_test[k].split("/")[-1][:-4]
    preds_dict[image_name] = pred

In [None]:
preds_study_dict = {study_id: list() for study_id in set(study_mapping.values())}

In [None]:
for image_id in study_mapping:
    study_id = study_mapping[image_id]
    
    preds_study_dict[study_id].append(preds_dict[image_id])

In [None]:
for study_id in preds_study_dict:
    if len(preds_study_dict[study_id]) > 1:
        pred = np.mean(preds_study_dict[study_id], axis = 0)
    else:
        pred = preds_study_dict[study_id][0]
    
    pred_str = ""
    
    for i, score in enumerate(pred):
        pred_str += "{} {} 0 0 1 1 ".format(class_mapping[i], score)
    sub_df.loc[sub_df["id"] == study_id + "_study", "PredictionString"] = pred_str
    #print(pred_str)

In [None]:
!rm -r gdcm
!rm -r models
!rm -r yolov5
!rm -r yolov4
!rm -r dataset
!rm -r runs
!rm gdcm.tar

In [None]:
sub_df.to_csv('submission.csv', index=False)
sub_df