# Evaluation scrrip
Use ground truth file and detection file in coco format to obtain AP and AR 

In [3]:
import json
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import matplotlib.pyplot as plt
import numpy as np

# load ground truth
gt_file = "filtered_output.json"
coco_gt = COCO(gt_file)

# Load detection results
dt_file = "output/detection_results.json"
coco_dt = coco_gt.loadRes(dt_file)

# Initialize COCO evaluation tool
coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')

# define size categories for detections
coco_eval.params.areaRng = [
    [0, 1e10],
    [0, 200**2],     # small objects: between 0 und 200x200 pixel
    [200**2, 400**2],   # medium objects: between 200x200 und 400x400 Pixel
    [400**2, 1e10]      # large objects: larger than 400x400 Pixel
]
coco_eval.params.areaRngLbl = ['all','small', 'medium', 'large']

# Führe Bewertung durch
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.07s).
Accumulating evaluation results...
DONE (t=0.05s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.467
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.535
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.532
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.289
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.484
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.644
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.589
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.651
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets

In [52]:
import json
import numpy as np

# Load detection results and ground truth annotations
with open(dt_file) as f:
    detection_results = json.load(f)

with open(gt_file) as f:
    gt_annotations = json.load(f)

# Extract the actual annotations
gt_annotations = gt_annotations['annotations'] if 'annotations' in gt_annotations else gt_annotations

# Convert ground truth annotations to a dictionary for quick lookup
gt_dict = {}
for ann in gt_annotations:
    image_id = ann['image_id']
    if image_id not in gt_dict:
        gt_dict[image_id] = []
    gt_dict[image_id].append({
        'category_id': ann['category_id'],
        'bbox': ann['bbox'],
        'matched': False  # Add a flag to track matching
    })

# Compute IoU
def compute_iou(box1, box2):
    x1, y1, w1, h1 = box1
    x2, y2, w2, h2 = box2
    
    xi1 = max(x1, x2)
    yi1 = max(y1, y2)
    xi2 = min(x1 + w1, x2 + w2)
    yi2 = min(y1 + h1, y2 + h2)
    
    inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
    
    box1_area = w1 * h1
    box2_area = w2 * h2
    
    union_area = box1_area + box2_area - inter_area
    
    iou = inter_area / union_area
    return iou

# IoU threshold
iou_threshold = 0.5

# Initialize counters
true_positives = 0
false_positives = 0
false_negatives = 0

# For each detection, determine if it is a true positive or false positive
for det in detection_results:
    image_id = det['image_id']
    category_id = det['category_id']
    det_bbox = det['bbox']
    
    matched = False
    if image_id in gt_dict:
        for gt in gt_dict[image_id]:
            gt_bbox = gt['bbox']
            gt_category_id = gt['category_id']
            
            if category_id == gt_category_id and not gt['matched']:
                iou = compute_iou(det_bbox, gt_bbox)
                if iou >= iou_threshold:
                    true_positives += 1
                    gt['matched'] = True  # Mark this ground truth as matched
                    matched = True
                    break
    
    if not matched:
        false_positives += 1

# For each ground truth annotation, determine if it is a false negative
for image_id, annotations in gt_dict.items():
    for gt in annotations:
        if not gt['matched']:
            false_negatives += 1

# True negatives are not applicable in this context
true_negatives = "Na"

# Calculate total number of objects based on ground truth
total_objects = len(gt_annotations)

print(f'Number of true positives: {true_positives}')
print(f'Number of false positives: {false_positives}')
print(f'Number of false negatives: {false_negatives}')
print(f'Number of true negatives: {true_negatives}')
print(f'Total number of objects (ground truth): {total_objects}')

assert true_positives + false_negatives == total_objects, "TP + FN should equal the total number of objects"

Number of true positives: 43
Number of false positives: 25
Number of false negatives: 18
Number of true negatives: Not applicable
Total number of objects (ground truth): 61
