In [1]:
import argparse
import glob
from hydra import compose, initialize
import hydra
import numpy as np
import pandas as pd
import torch
import yaml
from omegaconf import DictConfig, OmegaConf
from dl4cv.utils.utils import set_seed
from dl4cv.lightning_classes.plmodel import LitODModel
from dl4cv.datasets import build_taco 
from dl4cv.datasets.taco_data import taco_val_test_collate_fn
from dl4cv.utils.object_detect_utils import get_iou, fix_orientation
from tqdm import tqdm
import os


In [2]:
hydra.core.global_hydra.GlobalHydra.instance().clear()
initialize(config_path="./configs")
inference_cfg = compose(config_name="config_taco_training")
inference_cfg["inference"]["run_name"] = 'Resnet50_Whale_bs1'
inference_cfg["inference"]["device"] = 'cuda:1'
path = f"outputs/{inference_cfg.inference.run_name}/.hydra/config.yaml"
with open(path) as cfg:
    cfg_yaml = yaml.safe_load(cfg)
cfg_yaml["inference"] = inference_cfg["inference"]
cfg_yaml["datamodule"]["test"]["params"]["num_to_return"] = 100
cfg = OmegaConf.create(cfg_yaml)


In [3]:
def main(cfg: DictConfig) -> None:
    """
    Run pytorch-lightning model inference
    Args:
        cfg: hydra config
    Returns:
        None
    """
    set_seed(cfg.training.seed)

    device = torch.device(cfg.inference.device)

    model_names = glob.glob(f"outputs/{cfg.inference.run_name}/saved_models/*.ckpt")
    _, _, test_set,_ = build_taco(cfg)
    test_set.num_to_return = 100
    # Dirty trick to get the ground truth boxes
    loader = torch.utils.data.DataLoader(
        test_set,
        collate_fn=taco_val_test_collate_fn,
        batch_size=1,
        num_workers=1,
        shuffle=False,
    )
    lit_model = LitODModel.load_from_checkpoint(checkpoint_path=model_names[0], cfg=cfg)
    lit_model.to(device)
    predictions = []
    for batch in tqdm(loader):
        # move batch elements to device
        batch = tuple(b.to(device) for b in batch)
        predictions.append(
            lit_model.nms_on_image(
                batch,
            )
        )

    return predictions




In [4]:
predictions = main(cfg)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 250/250 [02:52<00:00,  1.45it/s]


In [5]:
predictions[0].keys()

dict_keys(['kept_preds', 'P', 'gt_regions', 'gt_labels'])

In [42]:
# find an index in predictions where the kept_preds are not empty
valids = []
for i in range(len(predictions)):
    if len(predictions[i]['kept_preds']) > 0:
        valids.append(i)

In [9]:
predictions[3]
predictions[4]
predictions[6]

{'kept_preds': [{'bbox': {'x1': 188, 'y1': 97, 'x2': 536, 'y2': 367},
   'conf': 0.9986655712127686,
   'pred_class': 9,
   'true_class': 9,
   'image_id': 217},
  {'bbox': {'x1': 723, 'y1': 153, 'x2': 958, 'y2': 695},
   'conf': 0.9917469024658203,
   'pred_class': 19,
   'true_class': 19,
   'image_id': 217}],
 'P': [{'bbox': {'x1': 723, 'y1': 158, 'x2': 958, 'y2': 695},
   'conf': 0.974744439125061,
   'pred_class': 19,
   'true_class': 19,
   'image_id': 217},
  {'bbox': {'x1': 188, 'y1': 99, 'x2': 548, 'y2': 395},
   'conf': 0.9873728156089783,
   'pred_class': 9,
   'true_class': 9,
   'image_id': 217},
  {'bbox': {'x1': 188, 'y1': 88, 'x2': 549, 'y2': 397},
   'conf': 0.7924853563308716,
   'pred_class': 9,
   'true_class': 9,
   'image_id': 217},
  {'bbox': {'x1': 189, 'y1': 99, 'x2': 521, 'y2': 368},
   'conf': 0.9373885989189148,
   'pred_class': 9,
   'true_class': 9,
   'image_id': 217},
  {'bbox': {'x1': 187, 'y1': 101, 'x2': 536, 'y2': 396},
   'conf': 0.939198911190033,


In [10]:
pred = []
truth = []

for i in range(len(predictions)):
    if len(predictions[i]['kept_preds']) > 0:
        pred.append(predictions[i]['kept_preds'])
        _gt_boxes = predictions[i]['gt_regions']
        _gt_classes = predictions[i]['gt_labels']
        _truth = []
        for j in range(len(_gt_boxes)):
            _box = {'x1':_gt_boxes[j][0].item(), 'x2':_gt_boxes[j][1].item(), 'y1':_gt_boxes[j][2].item(), 'y2':_gt_boxes[j][3].item()}
            _class = _gt_classes[j].item()
            _truth.append({'bbox':_box, 'pred_class':_class})
        truth.append(_truth)

In [16]:
pred[198]

[{'bbox': {'x1': 805, 'y1': 541, 'x2': 859, 'y2': 548},
  'conf': 0.8125687837600708,
  'pred_class': 20,
  'true_class': 28,
  'image_id': 1338},
 {'bbox': {'x1': 421, 'y1': 166, 'x2': 608, 'y2': 316},
  'conf': 0.7461764812469482,
  'pred_class': 8,
  'true_class': 8,
  'image_id': 1338}]

In [101]:
def split_by_class(preds,truths):
    pred_by_class = {i:[] for i in range(29)}
    truth_by_class = {i:[] for i in range(29)}
    for pred in preds:
        if pred['true_class'] == 28:
            continue
        pred_by_class[pred['pred_class']].append(pred)
    
    for truth in truths:
        truth_by_class[truth['pred_class']].append(truth)

    valids_preds = []
    for i in range(len(pred_by_class)):
        if len(pred_by_class[i]) > 0:
            valids_preds.append(i)
    
    valids_truths = []
    for i in range(len(truth_by_class)):
        if len(truth_by_class[i]) > 0:
            valids_truths.append(i)

    preds_by_class = {i: pred_by_class[i] for i in valids_preds}
    truth_by_class ={i: truth_by_class[i] for i in valids_truths}
    
    return preds_by_class,truth_by_class


In [102]:
print(pred[4])
print(truth[4])

split_by_class(pred[4],truth[4])

[{'bbox': {'x1': 171, 'y1': 197, 'x2': 534, 'y2': 366}, 'conf': 0.9998736381530762, 'pred_class': 9, 'true_class': 9, 'image_id': 1105}, {'bbox': {'x1': 608, 'y1': 383, 'x2': 640, 'y2': 417}, 'conf': 0.995933473110199, 'pred_class': 27, 'true_class': 28, 'image_id': 1105}, {'bbox': {'x1': 592, 'y1': 381, 'x2': 888, 'y2': 497}, 'conf': 0.975390613079071, 'pred_class': 8, 'true_class': 8, 'image_id': 1105}]
[{'bbox': {'x1': 607, 'x2': 885, 'y1': 381, 'y2': 492}, 'pred_class': 8}, {'bbox': {'x1': 178, 'x2': 533, 'y1': 199, 'y2': 357}, 'pred_class': 9}]


({8: [{'bbox': {'x1': 592, 'y1': 381, 'x2': 888, 'y2': 497},
    'conf': 0.975390613079071,
    'pred_class': 8,
    'true_class': 8,
    'image_id': 1105}],
  9: [{'bbox': {'x1': 171, 'y1': 197, 'x2': 534, 'y2': 366},
    'conf': 0.9998736381530762,
    'pred_class': 9,
    'true_class': 9,
    'image_id': 1105}]},
 {8: [{'bbox': {'x1': 607, 'x2': 885, 'y1': 381, 'y2': 492}, 'pred_class': 8}],
  9: [{'bbox': {'x1': 178, 'x2': 533, 'y1': 199, 'y2': 357},
    'pred_class': 9}]})

In [103]:
from sklearn.metrics import auc

def calc_mAP_class(pred,truth):
    # sort predictions by confidence
    detections = sorted(pred, key=lambda k: k['conf'], reverse=True)

    total_truth = len(truth) # TP + FN
    
    true_pos = 0
    false_pos = 0

    recalls = []
    precisions = []

    removed_truths = [False for t in truth]

    # compute precision and recall for each detection
    for d_idx, d in enumerate(detections):
        ious = {}
        for t_idx, t in enumerate(truth):
            if removed_truths[t_idx]:
                continue
            
            iou = get_iou(d['bbox'], t['bbox'])

            if iou > 0.5:
                ious[t_idx] = iou
        
        if len(ious) == 0:
            false_pos += 1
        elif len(ious) == 1:
            true_pos += 1
            removed_truths[list(ious.keys())[0]] = True
        elif len(ious) > 1:
            true_pos += 1
            # find index with hishest iou
            max_iou = 0
            max_idx = 0
            for idx, iou in ious.items():
                if iou > max_iou:
                    max_iou = iou
                    max_idx = idx
            removed_truths[max_idx] = True
        else:
            raise Exception('Something went wrong')
        
        precision = true_pos / (true_pos + false_pos)
        recall = true_pos / total_truth

        recalls.append(recall)
        precisions.append(precision)

    if len(precisions) == 1 and  precisions[0]==1.0:
        return 1
    elif len(precisions) == 1 and  precisions[0]==0.0:
        return 0
    else:
        return auc(recalls, precisions)
    



In [104]:
def calc_mAPs_image(pred,truth):
    preds_by_class, truths_by_class = split_by_class(pred,truth)
    mAP = {}
    for c in set(list(preds_by_class.keys()) + list(truths_by_class.keys())):
        if c not in preds_by_class.keys() or c not in truths_by_class.keys():
            mAP[c] = 0
        else:
            mAP[c] = calc_mAP_class(preds_by_class[c],truths_by_class[c])
    return mAP

def calc_total_mAP(pred, truth):
    image_mAPs = {}
    for i in range(len(pred)):
        image_mAPs[i] = calc_mAPs_image(pred[i],truth[i])

    print(f'Image mAPs: {image_mAPs}')
    # mean to get class average
    class_mAPs = {i:None for i in range(29)}
    for i in range(29):
        class_mAP = [image_mAPs[j][i] for j in range(len(image_mAPs)) if i in image_mAPs[j].keys()]
        class_mAPs[i] = np.mean(class_mAP)
    
    # remove nan values
    class_mAPs = {k:v for k,v in class_mAPs.items() if not(np.isnan(v))}
    print(f'Class mAPs: {class_mAPs}')
    return  np.mean(list(class_mAPs.values()))


In [105]:
calc_total_mAP(pred,truth) 

Image mAPs: {0: {0: 0, 25: 0, 2: 0, 26: 0}, 1: {17: 0, 27: 1, 4: 0}, 2: {24: 0, 9: 1, 20: 1, 4: 0}, 3: {0: 0, 2: 1}, 4: {8: 1, 9: 1}, 5: {9: 1, 19: 1}, 6: {8: 1, 4: 0}, 7: {21: 1}, 8: {8: 1, 4: 0}, 9: {9: 0, 27: 0, 17: 1}, 10: {8: 0, 2: 0}, 11: {9: 0, 11: 0}, 12: {17: 1}, 13: {2: 0, 26: 0, 20: 0}, 14: {8: 1, 4: 1}, 15: {26: 0, 27: 0}, 16: {9: 1, 2: 0}, 17: {0: 0, 1: 0, 2: 0, 4: 0, 9: 0, 17: 0}, 18: {0: 0, 9: 1, 11: 1}, 19: {8: 1}, 20: {0: 0, 12: 0, 21: 1}, 21: {2: 1}, 22: {8: 0.3333333333333333, 9: 1, 4: 0.6000000000000001}, 23: {27: 0, 4: 0}, 24: {8: 1, 19: 0, 4: 1, 20: 1}, 25: {2: 0, 12: 0}, 26: {12: 1}, 27: {2: 0, 19: 0, 12: 0}, 28: {2: 0, 27: 0}, 29: {20: 0.5}, 30: {9: 1}, 31: {2: 1}, 32: {9: 1}, 33: {8: 0.5, 0: 0}, 34: {0: 0, 9: 0, 23: 1}, 35: {0: 0, 9: 0, 27: 0, 20: 0}, 36: {0: 0, 4: 1}, 37: {2: 1}, 38: {2: 0, 6: 0, 9: 0, 23: 1, 27: 1}, 39: {0: 0, 8: 0, 17: 0, 20: 0, 21: 0, 23: 0, 27: 0}, 40: {21: 0.5}, 41: {0: 0, 2: 0, 4: 0, 8: 0, 20: 1, 24: 0, 27: 0}, 42: {9: 0, 2: 0, 20: 0, 23

0.21762237107997648

In [115]:
def mean_average_precision(pred, truth, iou_threshold = 0.5, num_classes = 29, per_class = False):
    # compute mAP https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/object_detection/metrics/mean_avg_precision.py
    # `pred` is given in [[{'bbox':{'x1', 'x2', 'y1', 'y2'}, 'class'(int), 'conf'}, ...], ...]
    # `truth` is given in [[{'x1', 'x2', 'y1', 'y2', 'class'(int)}, more boxes...], ...]
    average_precisions = []

    # used for numerical stability later on
    epsilon = 1e-6

    for c in range(1, num_classes): # class '0' is background

        TP = 0
        FP = 0
        total_true_bboxes = 0

        # list detected(predicted) objects of class 'c'
        detections = []

        for idx, prs in enumerate(pred):
          for pr in prs:
            if pr['pred_class'] == c:
                detections.append((pr['conf'], idx, pr['bbox']))

        # make checkbox for checking whether gt object was detected
        total_true_bboxes = 0
        is_detected = []
        for gts in pred:
          is_detected.append([False for _ in gts])
          total_true_bboxes += sum([gt['pred_class']==c for gt in gts])

        detections.sort(reverse=True)

        TP = torch.zeros((len(detections)))
        FP = torch.zeros((len(detections)))

        if total_true_bboxes == 0:
            continue

        for detection_idx, detection in enumerate(detections):
            # Only take out the ground_truths that have the same
            # training idx as detection
            num_gts = len(truth[detection[1]])

            # find most closest g.t box to pred as best_gt_idx
            best_iou = 0
            for idx, gt in enumerate(truth[detection[1]]):
                #print(gt, detection[2])
                
                iou = get_iou(gt['bbox'], detection[2])

                if iou > best_iou:
                    best_iou = iou
                    best_gt_idx = idx

            # if considered found
            #try:
            #  print(best_iou, truth[detection[1]][best_gt_idx], detection[2])
            #except:
            #  pass
            if best_iou > iou_threshold:
                # only detect ground truth detection once
                print(len(is_detected))
                print(detection[1], best_gt_idx,best_gt_idx)
                if is_detected[detection[1]][best_gt_idx] == False:
                    # true positive and add this bounding box to seen
                    TP[detection_idx] = 1
                    is_detected[detection[1]][best_gt_idx] = True
                else: # duplicate is FP
                    FP[detection_idx] = 1
            # if IOU is lower then the detection is a false positive
            else:
                FP[detection_idx] = 1
        
        TP_cumsum = torch.cumsum(TP, dim=0)
        FP_cumsum = torch.cumsum(FP, dim=0)
        #if len(TP)>0 and len(FP)>0:
        #  print(TP_cumsum[-1], FP_cumsum[-1])
        #print(total_true_bboxes)
        recalls = TP_cumsum / (total_true_bboxes + epsilon)  # ratio of detected objects!
        precisions = TP_cumsum / (TP_cumsum + FP_cumsum + epsilon)         # ratio of predictions that are true objects!

        precisions = torch.cat((torch.tensor([1]), precisions))
        recalls = torch.cat((torch.tensor([0]), recalls))
        # torch.trapz for numerical integration
        #print(precisions, recalls, torch.trapz(precisions, recalls))
        average_precisions.append(torch.trapz(precisions, recalls))
        #print('----------')
    if per_class: 
        return average_precisions
    else:
        return sum(average_precisions) / len(average_precisions)

In [116]:
mean_average_precision(pred,truth, per_class=False)

236
74 0
236
148 0
236
173 0
236
79 7
236
83 0
236
137 0
236
165 0
236
47 0
236
42 0
236
120 0
236
3 0
236
204 0
236
144 1
236
31 0
236
108 0
236
135 0
236
72 1
236
100 6


IndexError: list index out of range