In [2]:
import csv
import copy
import pandas as pd
import numpy as np
from tqdm import tqdm, tqdm_notebook
from util import box3d, evatwobox

In [3]:
### set the path to groud_truth_file and prediction_file
groud_truth_file = '/home/ys3152/val_gt.csv'
prediction_file = '/home/ys3152/unet_ensemble_180_val_pred.csv'

In [4]:
### read these two csv file row by row and skip the header
trainreader = csv.reader(open(groud_truth_file, newline=''))
next(trainreader)
testreader = csv.reader(open(prediction_file, newline=''))
next(testreader)

['Id', 'PredictionString']

In [5]:
def get_ap(recalls, precisions):
    """Calculate average precision.
    Args:
      recalls:
      precisions: Returns (float): average precision.
    Returns:
    """
    # correct AP calculation
    # first append sentinel values at the end
    recalls = np.concatenate(([0.0], recalls, [1.0]))
    precisions = np.concatenate(([0.0], precisions, [0.0]))

    precisions = get_envelope(precisions)

    # to calculate area under PR curve, look for points where X axis (recall) changes value
    i = np.where(recalls[1:] != recalls[:-1])[0]

    # and sum (\Delta recall) * prec
    ap = np.sum((recalls[i + 1] - recalls[i]) * precisions[i + 1])
    return ap



def get_envelope(precisions):
    """Compute the precision envelope.
    Args:
      precisions:
    Returns:
    """
    for i in range(precisions.size - 1, 0, -1):
        precisions[i - 1] = np.maximum(precisions[i - 1], precisions[i])
    return precisions

In [6]:
### nrows is used as early stop, for the use of debugging
nrows = -1


progress_bar = tqdm_notebook(zip(trainreader, testreader))
image_score = []
ap = []
for idx, (row1, row2) in enumerate(progress_bar):
    thd_score = []
    precision = []
    recall = []
    for thd in np.arange(0.5, 1.0, 0.05):
        tp = 0
        fp = 0
        
        ## convert the raw string data into list of predictions
        listpred = row2[1].split(" ")
        predicts = [listpred[int(i)*9: int(i)*9+9] for i in range(int(len(listpred) /9) )]
        listgt = row1[1].split(" ")
        gt = [listgt[int(i)*8: int(i)*8+8] for i in range(int(len(listgt) /8) )]
        
        ## scan of prediction to see if there is a box hit
        for i in range(len(predicts)):
            for j in range(len(gt)):
                if evatwobox(predicts[i], gt[j], thd):
                    if predicts[i][8] == gt[j][7]:
                        tp+=1
                        gt[j][7]="used"
                        ## note that a single ground truth box cannot be used by multiple predictions
                    else:
                        fp+=1
        fn = int(len(gt)) - tp
        score = tp / (tp + fp + fn + 0.001)
        precision.append(tp / (tp + fp + 0.001))
        recall.append(tp / (tp + fn + 0.001))
        ### score is true positive / (true positive + false positive + false negative)
        ### the 0.001 is used to prevent dividing by 0 from happening
        thd_score.append(score)
        
    ap.append(get_ap(recall, precision))
    image_score.append(np.mean(thd_score))
    if nrows != -1 and idx == nrows:
        break

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [None]:
print("The score is : {}".format(np.mean(image_score)))

In [None]:
print("The mAP is : {}".format(np.mean(ap)))