# Competiton metric calculator

> The challenge uses the standard [PASCAL VOC 2010 mean Average Precision (mAP)](http://host.robots.ox.ac.uk/pascal/VOC/voc2010/devkit_doc_08-May-2010.pdf) at IoU > 0.4.

In [1]:
!pip install --upgrade pip

Collecting pip
  Downloading pip-23.1.2-py3-none-any.whl (2.1 MB)
[K     |████████████████████████████████| 2.1 MB 8.1 MB/s eta 0:00:01
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 20.3.1
    Uninstalling pip-20.3.1:
      Successfully uninstalled pip-20.3.1
Successfully installed pip-23.1.2


In [2]:
!pip install pycocotools

Collecting pycocotools
  Downloading pycocotools-2.0.6.tar.gz (24 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: pycocotools
  Building wheel for pycocotools (pyproject.toml) ... [?25ldone
[?25h  Created wheel for pycocotools: filename=pycocotools-2.0.6-cp37-cp37m-linux_x86_64.whl size=275287 sha256=cc01bdf4304e7cc4eb2da88134efccc85ee0dbafbf495baef7f86bd748de14ec
  Stored in directory: /root/.cache/pip/wheels/06/f6/f9/9cc49c6de8e3cf27dfddd91bf46595a057141d4583a2adaf03
Successfully built pycocotools
Installing collected packages: pycocotools
Successfully installed pycocotools-2.0.6
[0m

In [3]:
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

In [12]:
class VinBigDataEval:
    """Helper class for calculating the competition metric.
    
    You should remove the duplicated annoatations from the `true_df` dataframe
    before using this script. Otherwise it may give incorrect results.

        >>> vineval = VinBigDataEval(valid_df)
        >>> cocoEvalResults = vineval.evaluate(pred_df)

    Arguments:
        true_df: pd.DataFrame Clean (no duplication) Training/Validating dataframe.

    Authors:
        Peter (https://kaggle.com/pestipeti)

    See:
        https://www.kaggle.com/pestipeti/competition-metric-map-0-4

    Returns: None
    
    """
    def __init__(self, true_df):
        
        self.true_df = true_df

        self.image_ids = true_df["image_id"].unique()
        self.annotations = {
            "type": "instances",
            "images": self.__gen_images(self.image_ids),
            "categories": self.__gen_categories(self.true_df),
            "annotations": self.__gen_annotations(self.true_df, self.image_ids)
        }
        
        self.predictions = {
            "images": self.annotations["images"].copy(),
            "categories": self.annotations["categories"].copy(),
            "annotations": None
        }
        
    def __gen_categories(self, df):
        print("Generating category data...")
        
        if "class_name" not in df.columns:
            df["class_name"] = df["class_id"]
        
        cats = df[["class_name", "class_id"]]
        cats = cats.drop_duplicates().sort_values(by='class_id').values
        
        results = []
        
        for cat in cats:
            results.append({
                "id": cat[1],
                "name": cat[0],
                "supercategory": "none",
            })
            
        return results

    def __gen_images(self, image_ids):
        print("Generating image data...")
        results = []

        for idx, image_id in enumerate(image_ids):

            # Add image identification.
            results.append({
                "id": idx,
            })
            
        return results
    
    def __gen_annotations(self, df, image_ids):
        print("Generating annotation data...")
        k = 0
        results = []
        
        for idx, image_id in enumerate(image_ids):

            # Add image annotations
            for i, row in df[df["image_id"] == image_id].iterrows():

                results.append({
                    "id": k,
                    "image_id": idx,
                    "category_id": row["class_id"],
                    "bbox": np.array([
                        row["x_min"],
                        row["y_min"],
                        row["x_max"],
                        row["y_max"]]
                    ),
                    "segmentation": [],
                    "ignore": 0,
                    "area":(row["x_max"] - row["x_min"]) * (row["y_max"] - row["y_min"]),
                    "iscrowd": 0,
                })

                k += 1
                
        return results

    def __decode_prediction_string(self, pred_str):
        data = list(map(float, pred_str.split(" ")))
        data = np.array(data)

        return data.reshape(-1, 6)    
    
    def __gen_predictions(self, df, image_ids):
        print("Generating prediction data...")
        k = 0
        results = []
        
        for i, row in df.iterrows():
            
            image_id = row["image_id"]
            preds = self.__decode_prediction_string(row["PredictionString"])

            for j, pred in enumerate(preds):

                results.append({
                    "id": k,
                    "image_id": int(np.where(image_ids == image_id)[0]),
                    "category_id": int(pred[0]),
                    "bbox": np.array([
                        pred[2], pred[3], pred[4], pred[5]
                    ]),
                    "segmentation": [],
                    "ignore": 0,
                    "area": (pred[4] - pred[2]) * (pred[5] - pred[3]),
                    "iscrowd": 0,
                    "score": pred[1]
                })

                k += 1
                
        return results
                
    def evaluate(self, pred_df, n_imgs = -1):
        """Evaluating your results
        
        Arguments:
            pred_df: pd.DataFrame your predicted results in the
                     competition output format.

            n_imgs:  int Number of images use for calculating the
                     result.All of the images if `n_imgs` <= 0
                     
        Returns:
            COCOEval object
        """
        
        if pred_df is not None:
            self.predictions["annotations"] = self.__gen_predictions(pred_df, self.image_ids)

        coco_ds = COCO()
        coco_ds.dataset = self.annotations
        coco_ds.createIndex()
        
        coco_dt = COCO()
        coco_dt.dataset = self.predictions
        coco_dt.createIndex()
        
        imgIds=sorted(coco_ds.getImgIds())
        
        if n_imgs > 0:
            imgIds = np.random.choice(imgIds, n_imgs)

        cocoEval = COCOeval(coco_ds, coco_dt, 'bbox')
        cocoEval.params.imgIds  = imgIds
        cocoEval.params.useCats = True
        cocoEval.params.iouType = "bbox"
        cocoEval.params.iouThrs = np.array([0.4])

        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()
        
        return cocoEval

# Usage

In [13]:
test_df = pd.read_csv("/kaggle/input/splitdata/test.csv")

In [14]:
IMG_SIZE = 512
test_df['x_min'] = (test_df['x_min']/test_df['width'])*IMG_SIZE
test_df['y_min'] = (test_df['y_min']/test_df['height'])*IMG_SIZE
test_df['x_max'] = (test_df['x_max']/test_df['width'])*IMG_SIZE
test_df['y_max'] = (test_df['y_max']/test_df['height'])*IMG_SIZE

In [15]:
test_df

Unnamed: 0,image_id,class_name,class_id,rad_id,x_min,y_min,x_max,y_max,width,height
0,18a61a07e6f5f13ebfee57fa36cd8b6f,Pulmonary fibrosis,13,R9,87.333333,50.311111,182.666667,114.311111,2304,2880
1,d7aea6f37597df566725da73655edcc2,Infiltration,6,R8,348.965305,171.470987,463.050116,304.636230,2594,3395
2,0cfae3f740f87ca3b96dec765d06fad0,Aortic enlargement,0,R9,253.777778,131.200000,322.444444,198.933333,2304,2880
3,b42e930c16c0166dbeae813b47bb8b07,Pleural thickening,11,R9,328.666667,66.833333,375.000000,95.666667,3072,3072
4,d07557904cbe57fc1bdebac1e8aeefa1,Nodule/Mass,8,R9,84.000000,159.822222,89.555556,165.155556,2304,2880
...,...,...,...,...,...,...,...,...,...,...
3523,7db70125d7739e6cd0c442e7b7592d4c,Other lesion,9,R10,15.908139,164.624397,64.670044,512.000000,2961,3107
3524,970dcfd176e4fdaaf9bcb511a82e3b70,Infiltration,6,R9,331.777778,130.311111,379.333333,160.355556,2304,2880
3525,6ed469a35361b49e04971347281bc312,Aortic enlargement,0,R10,247.333333,154.133333,293.333333,198.400000,2304,2880
3526,611077472d2fc78135ad2ad32059888e,Cardiomegaly,3,R9,168.000000,234.488889,371.777778,289.066667,2304,2880


In [16]:
# You only need to run this once.
vineval = VinBigDataEval(test_df)

Generating image data...
Generating category data...
Generating annotation data...


# pred nor

In [17]:
pred_df = pd.read_csv('/kaggle/input/submisstion/submission.csv')

In [18]:
pred_df.loc[pred_df['image_id']=='0061cf6d35e253b6e7f03940592cc35e']

Unnamed: 0,image_id,PredictionString
311,0061cf6d35e253b6e7f03940592cc35e,9 0.7911 236 124 285 180 13 0.6194 358 281 398...


In [19]:
# You should evaluate after every n epochs.
cocoEvalRes = vineval.evaluate(pred_df)

Generating prediction data...
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.85s).
Accumulating evaluation results...
DONE (t=0.18s).
 Average Precision  (AP) @[ IoU=0.40:0.40 | area=   all | maxDets=100 ] = 0.136
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.40:0.40 | area= small | maxDets=100 ] = 0.072
 Average Precision  (AP) @[ IoU=0.40:0.40 | area=medium | maxDets=100 ] = 0.141
 Average Precision  (AP) @[ IoU=0.40:0.40 | area= large | maxDets=100 ] = 0.229
 Average Recall     (AR) @[ IoU=0.40:0.40 | area=   all | maxDets=  1 ] = 0.121
 Average Recall     (AR) @[ IoU=0.40:0.40 | area=   all | maxDets= 10 ] = 0.175
 Average Recall     (AR) @[ IoU=0.40:0.40 | area=   all | maxDets=100 ] = 0.175
 Average Recall     (AR) @[ IoU=0.40:0.40 | area= s

In [20]:
cocoEvalRes.stats

array([ 0.1355009 , -1.        , -1.        ,  0.07160473,  0.14106882,
        0.22862415,  0.12112488,  0.17453993,  0.17453993,  0.07798001,
        0.17373596,  0.28529163])

In [41]:
recall = cocoEvalRes.eval['recall']
avg_recall = np.mean(recall)

In [42]:
avg_recall

0.1025322385837882

# pred pr 1

In [14]:
pred_df_pr1 = pd.read_csv('/kaggle/input/submisstion/submission_pr1.csv')

In [15]:
# You should evaluate after every n epochs.
cocoEvalRes = vineval.evaluate(pred_df_pr1)

Generating prediction data...
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.65s).
Accumulating evaluation results...
DONE (t=0.14s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.077
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.101
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.036
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.082
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.125
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.074
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.096
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.096
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= sm

In [16]:
cocoEvalRes.stats

array([ 0.07662092,  0.10140584, -1.        ,  0.03550723,  0.08217186,
        0.12477337,  0.07374557,  0.09631998,  0.09631998,  0.03996677,
        0.10248708,  0.14713558])

# pred pr2

In [9]:
pred_df_pr2 = pd.read_csv('/kaggle/input/submisstion/submission_pr2.csv')

In [10]:
# You should evaluate after every n epochs.
cocoEvalRes = vineval.evaluate(pred_df_pr2)

Generating prediction data...
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.71s).
Accumulating evaluation results...
DONE (t=0.17s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.047
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.064
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.018
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.061
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.071
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.045
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.053
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.053
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= sm

# pred pr3

In [13]:
pred_df_pr3 = pd.read_csv('/kaggle/input/submisstion/submission_pr3.csv')

In [14]:
# You should evaluate after every n epochs.
cocoEvalRes = vineval.evaluate(pred_df_pr3)

Generating prediction data...
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.77s).
Accumulating evaluation results...
DONE (t=0.18s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.052
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.069
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.023
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.063
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.073
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.049
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.061
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.061
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= sm

# pred pr4

In [10]:
pred_df_pr4 = pd.read_csv('/kaggle/input/submisstion/submission_pr4.csv')

In [11]:
cocoEvalRes = vineval.evaluate(pred_df_pr4)

Generating prediction data...
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.71s).
Accumulating evaluation results...
DONE (t=0.17s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.010
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.006
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.021
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.027
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.013
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.016
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.016
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= sm

# Pred pr5

In [13]:
pred_df_pr5 = pd.read_csv('/kaggle/input/submisstion/submission_pr5.csv')

In [15]:
cocoEvalRes = vineval.evaluate(pred_df_pr5)

Generating prediction data...
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.52s).
Accumulating evaluation results...
DONE (t=0.11s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.073
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.096
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.035
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.085
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.135
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.065
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.087
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.087
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= sm

#### Recalculating with random samples

In [None]:
%%capture
stats = []

# Recalculate the validation score using randomly selected images
for i in range(100):
    cocoEvalRes = vineval.evaluate(pred_df = None, n_imgs = 300)
    stats.append(cocoEvalRes.stats[0])
    
avg = np.array(stats).mean()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=[x for x in range(len(stats))], y=stats, mode="markers", name="Stats"))
fig.add_trace(go.Scatter(x=[0, 100], y=[avg, avg], mode="lines", name="Mean"))
fig.add_trace(go.Scatter(x=[0, 100], y=[0.052, 0.052], mode="lines", name="Public Baseline"))

fig.update_yaxes(
    range=[0.03, 0.07]
)

fig.update_layout(title='Results of mAP@0.4 (randomly selected 300 images)',
                  yaxis_title='Score',
                  xaxis_title='')

fig.show()