In [None]:
#Credits: https://www.kaggle.com/pestipeti/competition-metric-map-0-4

# Competiton metric calculator

> The challenge uses the standard [PASCAL VOC 2010 mean Average Precision (mAP)](http://host.robots.ox.ac.uk/pascal/VOC/voc2010/devkit_doc_08-May-2010.pdf) at IoU > 0.5.

In [None]:
!pip install pycocotools

In [None]:
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

In [None]:
study_level_df = pd.read_csv("/kaggle/input/siim-covid19-detection/train_study_level.csv")

In [None]:
study_level_df.head()

In [None]:
names = np.array(['negative', 'typical', 'indeterminate', 'atypical'])
study_level_df['class_id'] = np.where(study_level_df.iloc[:,1:])[1]
study_level_df['class_name'] = [names[i] for i in study_level_df['class_id'].values]

In [None]:
study_level_df[['x_min','y_min', 'x_max', 'y_max']] = np.array([0.,0.,1.,1.])

In [None]:
study_level_df['image_id'] = study_level_df['id']

In [None]:
study_level_df.head()

In [None]:
class VinBigDataEval:
    """Helper class for calculating the competition metric.
    
    You should remove the duplicated annoatations from the `true_df` dataframe
    before using this script. Otherwise it may give incorrect results.

        >>> vineval = VinBigDataEval(valid_df)
        >>> cocoEvalResults = vineval.evaluate(pred_df)

    Arguments:
        true_df: pd.DataFrame Clean (no duplication) Training/Validating dataframe.

    Authors:
        Peter (https://kaggle.com/pestipeti)

    See:
        https://www.kaggle.com/pestipeti/competition-metric-map-0-4

    Returns: None
    
    """
    def __init__(self, true_df):
        
        self.true_df = true_df

        self.image_ids = true_df["image_id"].unique()
        self.annotations = {
            "type": "instances",
            "images": self.__gen_images(self.image_ids),
            "categories": self.__gen_categories(self.true_df),
            "annotations": self.__gen_annotations(self.true_df, self.image_ids)
        }
        
        self.predictions = {
            "images": self.annotations["images"].copy(),
            "categories": self.annotations["categories"].copy(),
            "annotations": None
        }

        
    def __gen_images(self, image_ids):
        print("Generating image data...")
        results = []

        for idx, image_id in enumerate(image_ids):

            # Add image identification.
            results.append({
                "id": idx,
            })
            
        return results
    
    
    def __gen_categories(self, df):
        print("Generating category data...")
        
        if "class_name" not in df.columns:
            df["class_name"] = df["class_id"]
        
        cats = df[["class_name", "class_id"]]
        cats = cats.drop_duplicates().sort_values(by='class_id').values
        
        results = []
        
        for cat in cats:
            results.append({
                "id": cat[1],
                "name": cat[0],
                "supercategory": "none",
            })
            
        return results

    
    def __gen_annotations(self, df, image_ids):
        print("Generating annotation data...")
        k = 0
        results = []
        
        for idx, image_id in enumerate(image_ids):

            # Add image annotations
            for i, row in df[df["image_id"] == image_id].iterrows():

                results.append({
                    "id": k,
                    "image_id": idx,
                    "category_id": row["class_id"],
                    "bbox": np.array([
                        row["x_min"],
                        row["y_min"],
                        row["x_max"],
                        row["y_max"]]
                    ),
                    "segmentation": [],
                    "ignore": 0,
                    "area":(row["x_max"] - row["x_min"]) * (row["y_max"] - row["y_min"]),
                    "iscrowd": 0,
                })

                k += 1
                
        return results

    def __decode_prediction_string(self, pred_str):
        data = list(map(float, pred_str.split(" ")))
        data = np.array(data)

        return data.reshape(-1, 6)    
    
    def __gen_predictions(self, df, image_ids):
        print("Generating prediction data...")
        k = 0
        results = []
        
        for i, row in df.iterrows():
            
            image_id = row["image_id"]
            preds = self.__decode_prediction_string(row["PredictionString"])

            for j, pred in enumerate(preds):

                results.append({
                    "id": k,
                    "image_id": int(np.where(image_ids == image_id)[0]),
                    "category_id": int(pred[0]),
                    "bbox": np.array([
                        pred[2], pred[3], pred[4], pred[5]
                    ]),
                    "segmentation": [],
                    "ignore": 0,
                    "area": (pred[4] - pred[2]) * (pred[5] - pred[3]),
                    "iscrowd": 0,
                    "score": pred[1]
                })

                k += 1
                
        return results
                
    def evaluate(self, pred_df, n_imgs = -1):
        """Evaluating your results
        
        Arguments:
            pred_df: pd.DataFrame your predicted results in the
                     competition output format.

            n_imgs:  int Number of images use for calculating the
                     result.All of the images if `n_imgs` <= 0
                     
        Returns:
            COCOEval object
        """
        
        if pred_df is not None:
            self.predictions["annotations"] = self.__gen_predictions(pred_df, self.image_ids)

        coco_ds = COCO()
        coco_ds.dataset = self.annotations
        coco_ds.createIndex()
        
        coco_dt = COCO()
        coco_dt.dataset = self.predictions
        coco_dt.createIndex()
        
        imgIds=sorted(coco_ds.getImgIds())
        
        if n_imgs > 0:
            imgIds = np.random.choice(imgIds, n_imgs)

        cocoEval = COCOeval(coco_ds, coco_dt, 'bbox')
        cocoEval.params.imgIds  = imgIds
        cocoEval.params.useCats = True
        cocoEval.params.iouType = "bbox"
        cocoEval.params.iouThrs = np.array([0.5])

        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()
        
        return cocoEval

# Usage

In [None]:
# df = pd.read_csv("../input/vinbigdata-chest-xray-abnormalities-detection/train.csv")
# df.fillna(0, inplace=True)
# df.loc[df["class_id"] == 14, ['x_max', 'y_max']] = 1.0

# df.head()

In [None]:
# # Removing duplications! DO NOT USE THIS in your training!!!
# df = df.groupby(by=['image_id', 'class_id']).first().reset_index()

In [None]:
# You only need to run this once.
vineval = VinBigDataEval(study_level_df)

### Predict single class for each study

In [None]:
# Predicting with 1 class
# {0: 'negative', 1: 'typical', 2: 'indeterminate', 3: 'atypical'}
pred_df = study_level_df[["image_id"]]
pred_df = pred_df.drop_duplicates()
class_id = 0
pred_df["PredictionString"] = f"{class_id} 1.0 0 0 1 1"
pred_df.reset_index(drop=True, inplace=True)

pred_df.head()

In [None]:
# You should evaluate after every n epochs.
cocoEvalRes = vineval.evaluate(pred_df)

We get same results as LB probing for negative which is 0.050. We need to multiply mAP for study by 4/6 to get contributions of 4 study classes to final LB score. You can check this [discussion](https://www.kaggle.com/c/siim-covid19-detection/discussion/244066) for LB probing to study level predictions.

In [None]:
cocoEvalRes.stats[1]*2/3 

### Predict all classes for each study

In [None]:
class_probas = pd.value_counts(study_level_df['class_id'], normalize=True); class_probas

Probability doesn't matter when predicting all classes since all IOUs = 1.0 and 1 box is TP and remaining 3 all always FP.

In [None]:
# Predicting with all classes
dfs = []
for class_id in range(4):
    pred_df = study_level_df[["image_id"]]
    pred_df = pred_df.drop_duplicates()
    proba = class_probas[class_id]
    pred_df["PredictionString"] = f"{class_id} {proba} 0 0 1 1"
    pred_df.reset_index(drop=True, inplace=True)
    dfs.append(pred_df)
pred_df = pd.concat(dfs)
pred_df.head()

In [None]:
# You should evaluate after every n epochs.
cocoEvalRes = vineval.evaluate(pred_df)

In [None]:
cocoEvalRes.stats[1]*2/3 

#### Recalculating with random samples

In [None]:
%%capture
stats = []

# Recalculate the validation score using randomly selected images
for i in range(100):
    cocoEvalRes = vineval.evaluate(pred_df = None, n_imgs = 300)
    stats.append(cocoEvalRes.stats[0])
    
avg = np.array(stats).mean()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=[x for x in range(len(stats))], y=stats, mode="markers", name="Stats"))
fig.add_trace(go.Scatter(x=[0, 100], y=[avg, avg], mode="lines", name="Mean"))
fig.add_trace(go.Scatter(x=[0, 100], y=[0.052, 0.052], mode="lines", name="Public Baseline"))

fig.update_yaxes(
    range=[0.03, 0.07]
)

fig.update_layout(title='Results of mAP@0.4 (randomly selected 300 images)',
                  yaxis_title='Score',
                  xaxis_title='')

fig.show()