# Imports

In [1]:
import pandas as pd
import numpy as np

from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

# Usage

## Prepare stuff

In [2]:
%run ./coco_wrapper.py

In [3]:
%run ../../datasets/vinbig.py

### Prepare GT

In [4]:
fpath = os.path.join(DATASET_DIR, '..', 'train.csv')
df = pd.read_csv(fpath)
df.fillna(0, inplace=True)
df.loc[df["class_id"] == 14, ['x_max', 'y_max']] = 1.0

df.head()

Unnamed: 0,image_id,class_name,class_id,rad_id,x_min,y_min,x_max,y_max
0,50a418190bc3fb1ef1633bf9678929b3,No finding,14,R11,0.0,0.0,1.0,1.0
1,21a10246a5ec7af151081d0cd6d65dc9,No finding,14,R7,0.0,0.0,1.0,1.0
2,9a5094b2563a1ef3ff50dc5c7ff71345,Cardiomegaly,3,R10,691.0,1375.0,1653.0,1831.0
3,051132a778e61a86eb147c7c6f564dfe,Aortic enlargement,0,R10,1264.0,743.0,1611.0,1019.0
4,063319de25ce7edb9b1c6b8881290140,No finding,14,R10,0.0,0.0,1.0,1.0


In [5]:
len(df)

67914

In [6]:
df = df.groupby(by=['image_id', 'class_id']).first().reset_index()
print(len(df))
df.head()

25971


Unnamed: 0,image_id,class_id,class_name,rad_id,x_min,y_min,x_max,y_max
0,000434271f63a053c4128a0ba6352c7f,14,No finding,R6,0.0,0.0,1.0,1.0
1,00053190460d56c53cc3e57321387478,14,No finding,R11,0.0,0.0,1.0,1.0
2,0005e8e3701dfb1dd93d53e2ff537b6e,4,Consolidation,R8,932.0,567.0,1197.0,896.0
3,0005e8e3701dfb1dd93d53e2ff537b6e,6,Infiltration,R10,900.0,587.0,1205.0,888.0
4,0005e8e3701dfb1dd93d53e2ff537b6e,7,Lung Opacity,R10,900.0,587.0,1205.0,888.0


In [14]:
df.to_csv(os.path.join(DATASET_DIR, 'true_df.csv'), index=False)

### Prepare preds

In [8]:
# dummy predictions
pred_df = df[["image_id"]]
pred_df = pred_df.drop_duplicates()
pred_df["PredictionString"] = "14 1.0 0 0 1 1"
pred_df.reset_index(drop=True, inplace=True)

pred_df.head()

Unnamed: 0,image_id,PredictionString
0,000434271f63a053c4128a0ba6352c7f,14 1.0 0 0 1 1
1,00053190460d56c53cc3e57321387478,14 1.0 0 0 1 1
2,0005e8e3701dfb1dd93d53e2ff537b6e,14 1.0 0 0 1 1
3,0006e0a85696f6bb578e84fafa9a5607,14 1.0 0 0 1 1
4,0007d316f756b3fa0baea2ff514ce945,14 1.0 0 0 1 1


## Run

In [7]:
%run ./coco_wrapper.py

In [9]:
%%time

vineval = VinBigDataEval(df)

CPU times: user 27.8 s, sys: 11.9 ms, total: 27.8 s
Wall time: 27.8 s


In [10]:
%%time

cocoEvalRes = vineval.evaluate(pred_df)

creating index...
index created!
creating index...
index created!
CPU times: user 15 s, sys: 253 ms, total: 15.2 s
Wall time: 15.1 s


In [11]:
cocoEvalRes.stats[0]

0.04687975565642615

In [26]:
l = [d['image_id'] for d in vineval.annotations['annotations']]
len(l), len(set(l))

(25971, 15000)

In [15]:
cocoEvalRes.stats

array([ 0.04687976, -1.        , -1.        ,  0.11719939,  0.        ,
        0.        ,  0.06666038,  0.06666038,  0.06666038,  0.16665095,
        0.        ,  0.        ])

In [23]:
cocoEvalRes.stats[0]

0.04687975565642615