In [None]:
#Credits: https://www.kaggle.com/pestipeti/competition-metric-map-0-4

# Competiton metric calculator

> The challenge uses the standard [PASCAL VOC 2010 mean Average Precision (mAP)](http://host.robots.ox.ac.uk/pascal/VOC/voc2010/devkit_doc_08-May-2010.pdf) at IoU > 0.5.

In [None]:
!pip install pycocotools -q

In [None]:
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import shutil, os
from tqdm.auto import tqdm
from glob import glob
import warnings
warnings.filterwarnings("ignore")

In [None]:
image_level_df = pd.read_csv("/kaggle/input/siim-covid19-detection/train_image_level.csv")
dim_df = pd.read_csv("/kaggle/input/siim-covid19-detection-512/train.csv")
dim_df["id"] = dim_df["id_image"] + '_image'
image_level_df = pd.merge(image_level_df, dim_df[['id', 'width', 'height']] , on = 'id', how = 'left')
image_level_df["none"]=image_level_df.label.apply(lambda x: 0 if x=='none 1 0 0 1 1' else 1)

image_level_df = image_level_df[0:50]

In [None]:
for i in range(image_level_df.shape[0]):
    if image_level_df.loc[i,'label'] == "none 1 0 0 1 1":
        image_level_df.loc[i,'label']='0 1 0 0 1 1'
        continue
    sub_df_split = image_level_df.loc[i,'label'].split()
    sub_df_list = []
    for j in range(int(len(sub_df_split) / 6)):
        sub_df_list.append('1')
        sub_df_list.append(sub_df_split[6 * j + 1])
        sub_df_list.append(sub_df_split[6 * j + 2])
        sub_df_list.append(sub_df_split[6 * j + 3])
        sub_df_list.append(sub_df_split[6 * j + 4])
        sub_df_list.append(sub_df_split[6 * j + 5])
    image_level_df.loc[i,'label'] = ' '.join(sub_df_list)

In [None]:
from sklearn.model_selection import GroupKFold

gkf  = GroupKFold(n_splits = 5)
image_level_df['fold'] = -1
for fold, (train_idx, val_idx) in enumerate(gkf.split(image_level_df, 
                groups = image_level_df.StudyInstanceUID.tolist())):
    image_level_df.loc[val_idx, 'fold'] = fold

In [None]:
image_level_df['image_path'] = f'/kaggle/input/siimcovid19-512-img-png-600-study-png/image/'+ image_level_df.id + '.png'
image_level_df.head()

In [None]:
for fold in tqdm(range(5)):
    test_dir = f'/kaggle/tmp/image_fold_{fold}'
    os.makedirs(test_dir, exist_ok=True)
    for path in image_level_df[image_level_df['fold'] == fold]['image_path'].values:
        shutil.copyfile(path, os.path.join(test_dir, path.split("/")[-1]))

# **2 Class**

In [None]:
PUBLIC=True

In [None]:
if PUBLIC:
    !pip install /kaggle/input/kerasapplications/keras-team-keras-applications-3b180cb -q
    !pip install /kaggle/input/efficientnet-keras-source-code/ -q --no-deps

    import os

    import efficientnet.tfkeras as efn
    import numpy as np
    import pandas as pd
    import tensorflow as tf

    def auto_select_accelerator():
        try:
            tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            strategy = tf.distribute.experimental.TPUStrategy(tpu)
            print("Running on TPU:", tpu.master())
        except ValueError:
            strategy = tf.distribute.get_strategy()
        print(f"Running on {strategy.num_replicas_in_sync} replicas")

        return strategy


    def build_decoder(with_labels=True, target_size=(300, 300), ext='jpg'):
        def decode(path):
            file_bytes = tf.io.read_file(path)
            if ext == 'png':
                img = tf.image.decode_png(file_bytes, channels=3)
            elif ext in ['jpg', 'jpeg']:
                img = tf.image.decode_jpeg(file_bytes, channels=3)
            else:
                raise ValueError("Image extension not supported")

            img = tf.cast(img, tf.float32) / 255.0
            img = tf.image.resize(img, target_size)

            return img

        def decode_with_labels(path, label):
            return decode(path), label

        return decode_with_labels if with_labels else decode


    def build_augmenter(with_labels=True):
        def augment(img):
            img = tf.image.random_flip_left_right(img)
            img = tf.image.random_flip_up_down(img)
            return img

        def augment_with_labels(img, label):
            return augment(img), label

        return augment_with_labels if with_labels else augment


    def build_dataset(paths, labels=None, bsize=32, cache=True,
                      decode_fn=None, augment_fn=None,
                      augment=True, repeat=True, shuffle=1024, 
                      cache_dir=""):
        if cache_dir != "" and cache is True:
            os.makedirs(cache_dir, exist_ok=True)

        if decode_fn is None:
            decode_fn = build_decoder(labels is not None)

        if augment_fn is None:
            augment_fn = build_augmenter(labels is not None)

        AUTO = tf.data.experimental.AUTOTUNE
        slices = paths if labels is None else (paths, labels)

        dset = tf.data.Dataset.from_tensor_slices(slices)
        dset = dset.map(decode_fn, num_parallel_calls=AUTO)
        dset = dset.cache(cache_dir) if cache else dset
        dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
        dset = dset.repeat() if repeat else dset
        dset = dset.shuffle(shuffle) if shuffle else dset
        dset = dset.batch(bsize).prefetch(AUTO)

        return dset
    
    strategy = auto_select_accelerator()
    BATCH_SIZE = strategy.num_replicas_in_sync * 16
    
    sub_df_2 = image_level_df.copy()
    sub_df_2['none'] = 0
    test_paths = [sub_df_2[sub_df_2["fold"]==fold]["image_path"].values for fold in range(5)]
    
    IMSIZE = (224, 240, 260, 300, 380, 456, 528, 600, 512)

    test_decoder = build_decoder(with_labels=False, target_size=(IMSIZE[8], IMSIZE[8]), ext='png')
    
    
    with strategy.scope():

        models = []

        models0 = tf.keras.models.load_model(
            '/kaggle/input/siim-covid19-efnb7-train-fold0-5-2class/model0.h5'
        )
        models1 = tf.keras.models.load_model(
            '/kaggle/input/siim-covid19-efnb7-train-fold0-5-2class/model1.h5'
        )
        models2 = tf.keras.models.load_model(
            '/kaggle/input/siim-covid19-efnb7-train-fold0-5-2class/model2.h5'
        )
        models3 = tf.keras.models.load_model(
            '/kaggle/input/siim-covid19-efnb7-train-fold0-5-2class/model3.h5'
        )
        models4 = tf.keras.models.load_model(
            '/kaggle/input/siim-covid19-efnb7-train-fold0-5-2class/model4.h5'
        )

        models.append(models0)
        models.append(models1)
        models.append(models2)
        models.append(models3)
        models.append(models4)

    two_class_df=[]

    for fold in range(5):
        dtest = build_dataset(
        test_paths[fold], bsize=BATCH_SIZE, repeat=False, 
        shuffle=False, augment=False, cache=False,
        decode_fn=test_decoder
        )
        df = sub_df_2[sub_df_2["fold"]==fold]
        df['none'] = models[fold].predict(dtest, verbose=1)
        two_class_df.append(df)

In [None]:
if PUBLIC:
    del models
    del models0, models1, models2, models3, models4
    from numba import cuda
    cuda.select_device(0)
    cuda.close()
    cuda.select_device(0)

# **Yolo-V5**

In [None]:
weights_dir = ['/kaggle/input/siim-cov19-yolov5-train/yolov5/runs/train/exp/weights/best.pt',
               '/kaggle/input/siim-cov19-yolov5-train/yolov5/runs/train/exp/weights/best.pt',
               '/kaggle/input/siim-cov19-yolov5-train/yolov5/runs/train/exp/weights/best.pt',
               '/kaggle/input/siim-cov19-yolov5-train/yolov5/runs/train/exp/weights/best.pt',
               '/kaggle/input/siim-cov19-yolov5-train/yolov5/runs/train/exp/weights/best.pt']

shutil.copytree('/kaggle/input/yolov5-official-v31-dataset/yolov5', '/kaggle/working/yolov5')
os.chdir('/kaggle/working/yolov5') # install dependencies
#!pip install -r requirements.txt -q

import torch
#from IPython.display import Image, clear_output  # to display images

#clear_output()
#print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

#fold 0
wts = weights_dir[0]
!python detect.py --weights $wts\
--img 512\
--conf 0.001\
--iou 0.5\
--source /kaggle/tmp/image_fold_0/ \
--project /kaggle/working/yolov5_fold0/ --name test_iou_0.5_0.001\
--save-txt --save-conf --exist-ok

#fold 1
wts = weights_dir[1]
!python detect.py --weights $wts\
--img 512\
--conf 0.001\
--iou 0.5\
--source /kaggle/tmp/image_fold_1/ \
--project /kaggle/working/yolov5_fold1/ --name test_iou_0.5_0.001\
--save-txt --save-conf --exist-ok

#fold 2
wts = weights_dir[2]
!python detect.py --weights $wts\
--img 512\
--conf 0.001\
--iou 0.5\
--source /kaggle/tmp/image_fold_2/ \
--project /kaggle/working/yolov5_fold2/ --name test_iou_0.5_0.001\
--save-txt --save-conf --exist-ok

#fold 3
wts = weights_dir[3]
!python detect.py --weights $wts\
--img 512\
--conf 0.001\
--iou 0.5\
--source /kaggle/tmp/image_fold_3/ \
--project /kaggle/working/yolov5_fold3/ --name test_iou_0.5_0.001\
--save-txt --save-conf --exist-ok

#fold 4
wts = weights_dir[4]
!python detect.py --weights $wts\
--img 512\
--conf 0.001\
--iou 0.5\
--source /kaggle/tmp/image_fold_4/ \
--project /kaggle/working/yolov5_fold4/ --name test_iou_0.5_0.001\
--save-txt --save-conf --exist-ok

def yolo2voc(image_height, image_width, bboxes):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    voc  => [x1, y1, x2, y1]

    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int

    bboxes[..., [0, 2]] = bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]]* image_height

    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    bboxes[..., [2, 3]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]]

    return bboxes

preds_df_all = []

for fold in range(5):

    image_ids = []
    PredictionStrings = []

    for file_path in tqdm(glob('/kaggle/working/yolov5_fold{}/test_iou_0.5_0.001/labels/*.txt'.format(fold))):
        image_id = file_path.split('/')[-1].split('.')[0]
        w, h = image_level_df.loc[image_level_df.id==image_id,['width', 'height']].values[0]
        f = open(file_path, 'r')
        data = np.array(f.read().replace('\n', ' ').strip().split(' ')).astype(np.float32).reshape(-1, 6)
        data = data[:, [0, 5, 1, 2, 3, 4]]
        bboxes = list(np.round(np.concatenate((data[:, :2], np.round(yolo2voc(h, w, data[:, 2:]))), axis =1).reshape(-1), 12).astype(str))
        for idx in range(len(bboxes)):
            bboxes[idx] = str(int(float(bboxes[idx]))) if idx%6!=1 else bboxes[idx]
        image_ids.append(image_id)
        PredictionStrings.append(' '.join(bboxes))


    full_df = image_level_df[image_level_df['fold']==fold].copy()["id"]
    preds_df = pd.DataFrame({'id':image_ids,'PredictionString':PredictionStrings})
    preds_df_full = pd.merge(full_df, preds_df, on = 'id', how = 'left').fillna("none 1 0 0 1 1")
    preds_df_all.append(preds_df_full)


In [None]:
for fold in range(5):
    
    preds_df_all[fold] = pd.merge(preds_df_all[fold], two_class_df[fold][['id', 'none']] , on = 'id', how = 'left')
    
    for i in tqdm(range(preds_df_all[fold].shape[0])):
        if preds_df_all[fold].loc[i,'PredictionString'] == "none 1 0 0 1 1":
            preds_df_all[fold].loc[i,'PredictionString']='0 1 0 0 1 1'
            continue
        sub_df_split = preds_df_all[fold].loc[i,'PredictionString'].split()
        sub_df_list = []
        for j in range(int(len(sub_df_split) / 6)):
            sub_df_list.append('1')
            sub_df_list.append(sub_df_split[6 * j + 1])
            sub_df_list.append(sub_df_split[6 * j + 2])
            sub_df_list.append(sub_df_split[6 * j + 3])
            sub_df_list.append(sub_df_split[6 * j + 4])
            sub_df_list.append(sub_df_split[6 * j + 5])
        preds_df_all[fold].loc[i,'PredictionString'] = ' '.join(sub_df_list)
        preds_df_all[fold].loc[i,'PredictionString'] = preds_df_all[fold].loc[i,'PredictionString'] + ' 0 ' + \
        str(preds_df_all[fold].loc[i,'none']) + ' 0 0 1 1'

In [None]:
class CovidDataEval:
    """Helper class for calculating the competition metric.
    
    You should remove the duplicated annoatations from the `true_df` dataframe
    before using this script. Otherwise it may give incorrect results.

        >>> covideval = CovidDataEval(valid_df)
        >>> cocoEvalResults = covideval.evaluate(pred_df)

    Arguments:
        true_df: pd.DataFrame Clean (no duplication) Training/Validating dataframe.

    Authors:
        Peter (https://kaggle.com/pestipeti)

    See:
        https://www.kaggle.com/pestipeti/competition-metric-map-0-4

    Returns: None
    
    """
    def __init__(self, true_df, study=False):
        
        self.true_df = true_df
        self.study = study

        self.image_ids = true_df["id"].unique()
        self.annotations = {
            "type": "instances",
            "images": self.__gen_images(self.image_ids),
            "categories": self.__gen_categories(self.true_df),
            "annotations": self.__gen_annotations(self.true_df, self.image_ids)
        }
        
        self.predictions = {
            "images": self.annotations["images"].copy(),
            "categories": self.annotations["categories"].copy(),
            "annotations": None
        }

        
    def __gen_images(self, image_ids):
        print("Generating image data...")
        results = []

        for idx, image_id in enumerate(image_ids):

            # Add image identification.
            results.append({
                "id": idx,
            })
            
        return results
    
    
    def __gen_categories(self, df):
        print("Generating category data...")
        
        if self.study:
        
            if "class_name" not in df.columns:
                df["class_name"] = df["class_id"]

            cats = df[["class_name", "class_id"]]
            cats = cats.drop_duplicates().sort_values(by='class_id').values

            results = []

            for cat in cats:
                results.append({
                    "id": cat[1],
                    "name": cat[0],
                    "supercategory": "none",
                })

            return results
        
        else:
            results = []
            
            cats = df[["label","none"]]
            for cat in cats:
                results.append({
                    "id": cat[1],
                    "name": cat[0].split(" ")[0],
                    "supercategory": " ",
                })
            return results
        
    def __decode_prediction_string(self, pred_str):
        data = np.array(list(pred_str.split(" ")))
        return data.reshape(-1, 6)    
    
    def __gen_annotations(self, df, image_ids):
        print("Generating annotation data...")
        k = 0
        results = []
        
        for i, row in df.iterrows():
            
            image_id = row["id"]
            preds = self.__decode_prediction_string(row["label"])

            for j, pred in enumerate(preds):

                results.append({
                    "id": k,
                    "image_id": int(np.where(image_ids == image_id)[0]),
                    "category_id": int(pred[0]),
                    "bbox": np.array([
                        float(pred[2]), float(pred[3]), (float(pred[4])-float(pred[2])), (float(pred[5])-float(pred[3]))
                    ]),
                    "segmentation": [],
                    "ignore": 0,
                    "area": (float(pred[4]) - float(pred[2])) * (float(pred[5]) - float(pred[3])),
                    "iscrowd": 0,
                    "score": float(pred[1])
                })

                k += 1
                
        return results
                
    
    def __gen_predictions(self, df, image_ids):
        print("Generating prediction data...")
        k = 0
        results = []
        
        for i, row in df.iterrows():
            
            image_id = row["id"]
            preds = self.__decode_prediction_string(row["PredictionString"])

            for j, pred in enumerate(preds):

                results.append({
                    "id": k,
                    "image_id": int(np.where(image_ids == image_id)[0]),
                    "category_id": int(pred[0]),
                    "bbox": np.array([
                         float(pred[2]), float(pred[3]), (float(pred[4])-float(pred[2])), (float(pred[5])-float(pred[3]))
                    ]),
                    "segmentation": [],
                    "ignore": 0,
                    "area": (float(pred[4]) - float(pred[2])) * (float(pred[5]) - float(pred[3])),
                    "iscrowd": 0,
                    "score": float(pred[1])
                })

                k += 1
                
        return results
                
    def evaluate(self, pred_df, n_imgs = -1):
        """Evaluating your results
        
        Arguments:
            pred_df: pd.DataFrame your predicted results in the
                     competition output format.

            n_imgs:  int Number of images use for calculating the
                     result.All of the images if `n_imgs` <= 0
                     
        Returns:
            COCOEval object
        """
        
        if pred_df is not None:
            self.predictions["annotations"] = self.__gen_predictions(pred_df, self.image_ids)

        coco_ds = COCO()
        coco_ds.dataset = self.annotations
        coco_ds.createIndex()
        
        coco_dt = COCO()
        coco_dt.dataset = self.predictions
        coco_dt.createIndex()
        
        imgIds=sorted(coco_ds.getImgIds())
        
        if n_imgs > 0:
            imgIds = np.random.choice(imgIds, n_imgs)

        cocoEval = COCOeval(coco_ds, coco_dt, 'bbox')
        cocoEval.params.imgIds  = imgIds
        cocoEval.params.useCats = True
        cocoEval.params.iouType = "bbox"
        cocoEval.params.iouThrs = np.array([0.5])

        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()
        
        return cocoEval

# Usage

In [None]:
study_scores=[
0.39388390121767813,
0.39234564782357945,
0.3847761244542205,
0.37818329809109086,
0.38300730435642993]

In [None]:
fold_mAP=[]
for fold in range(5):
    covideval=CovidDataEval(image_level_df[image_level_df["fold"]==fold])
    cocoEvalRes = covideval.evaluate(preds_df_all[fold])
    fold_mAP.append(cocoEvalRes.stats[1]*1/3)

In [None]:
for fold in range(5):
    print(f"\nStudy Level mAP Score fold {fold+1}: {study_scores[fold]}\nImage Level mAP Score fold {fold+1}: {fold_mAP[fold]}")
    
print(f"\nStudy Level mAP Score: {np.array(study_scores).mean()}\nImage Level mAP Score: {np.array(fold_mAP).mean()}\n\nOverall mAP: {np.array(study_scores).mean()+np.array(fold_mAP).mean()}")