In [1]:
from torchvision.ops import box_iou
import os
import pandas as pd
import json
import torch
import numpy as np
from collections import defaultdict

  from .autonotebook import tqdm as notebook_tqdm


In [59]:
BASE_DIR = "/mnt/Enterprise/safal/AI_assisted_microscopy_system/"
sample_types = ["smartphone_sample", "smartphone_reference", "brightfield_sample", "brightfield_reference"]
model_type = "retinanet"

In [23]:
def calculate_precision_recall_f1(pred_annotations_file, gt_annotations_file):
    # Precision x Recall is obtained individually by each class
    # Loop through each class and calculate the precision and recall

    # Precision = TP / (TP + FP)
    # Recall = TP / (TP + FN)
    gt_annotations = json.load(open(gt_annotations_file))
    pred_annotations = json.load(open(pred_annotations_file))

    gt_annotations_df = pd.DataFrame(gt_annotations["annotations"])
    pred_annotations_df = pd.DataFrame(pred_annotations)

    # change bbox width and height to x2, y2
    pred_annotations_df["bbox"] = pred_annotations_df["bbox"].apply(
        lambda x: [x[0], x[1], x[0] + x[2], x[1] + x[3]]
    )
    gt_annotations_df["bbox"] = gt_annotations_df["bbox"].apply(
        lambda x: [x[0], x[1], x[0] + x[2], x[1] + x[3]]
    )

    categories = sorted(gt_annotations_df.category_id.unique())

    # dataframe to store the precision, recall and f1 score for each class
    metrics_df = pd.DataFrame(
        columns=["category", "precision", "recall", "f1_score", "TP", "FP"]
    )

    for category in categories:
        # get the ground truth annotations for the current class
        gt_annotations_df_class = gt_annotations_df[
            gt_annotations_df.category_id == category
        ]
        # get the predicted annotations for the current class
        pred_annotations_df_class = pred_annotations_df[
            pred_annotations_df.category_id == category
        ]

        # sort the predicted annotations by score
        pred_annotations_df_class = pred_annotations_df_class.sort_values(
            by="score", ascending=False
        )

        # filter predictions with score > 0.3
        pred_annotations_df_class = pred_annotations_df_class[
            pred_annotations_df_class.score > 0.1
        ]

        true_positives_class = 0
        false_positives_class = 0

        # get image ids for the current class from both ground truth and predicted annotations
        image_ids = pred_annotations_df_class["image_id"].unique()
        images_len = len(image_ids)

        for image in image_ids:
            # get the ground truth annotations for the current image
            gt_annotations_df_image = gt_annotations_df_class[
                gt_annotations_df_class.image_id == image
            ]
            # get the predicted annotations for the current image
            pred_annotations_df_image = pred_annotations_df_class[
                pred_annotations_df_class.image_id == image
            ]

            # get the ground truth bounding boxes
            gt_bboxes = list(gt_annotations_df_image.bbox.values)
            gt_bboxes = torch.tensor(gt_bboxes)

            # get the predicted bounding boxes
            pred_bboxes = list(pred_annotations_df_image.bbox.values)
            pred_bboxes = torch.tensor(pred_bboxes)

            if len(gt_bboxes) == 0:
                false_positives_class += len(pred_bboxes)
                continue

            # get the intersection over union for each predicted bounding box
            ious = box_iou(pred_bboxes, gt_bboxes)

            # get the maximum iou for each ground truth bounding box
            max_ious, _ = torch.max(ious, dim=0)

            # get the indices of the predicted bounding boxes with iou > 0.5
            tp_indices = torch.where(max_ious >= 0.5)[0]
            # print(ious)

            # get the indices of the predicted bounding boxes with iou < 0.5
            fp_indices = torch.where(max_ious < 0.5)[0]

            # update the true positives and false positives
            true_positives_class += len(tp_indices)
            false_positives_class += len(fp_indices)

        # calculate the precision and recall
        precision = true_positives_class / (
            true_positives_class + false_positives_class
        )
        recall = true_positives_class / gt_annotations_df_class.shape[0]

        category_name = gt_annotations["categories"][category]["name"]
        f1_score = 2 * (precision * recall) / (precision + recall)

        category_metrics_df = pd.DataFrame(
            {
                "category": category_name,
                "precision": precision,
                "recall": recall,
                "f1_score": f1_score,
                "TP": true_positives_class,
                "FP": false_positives_class,
            },
            index=[0],
        )

        # concatenate the metrics for the current class to the metrics dataframe
        metrics_df = pd.concat([metrics_df, category_metrics_df], axis=0)

    return metrics_df


## Combine metrics from all folds into a single csv

In [60]:
# combine metrics from all folds into single csv with mean and variance
for sample_type in sample_types:
    metrics_df_all = None
    for fold in range(1, 6):
        gt_annotation_file = os.path.join(
            BASE_DIR,
            f"cysts_dataset_all/{sample_type}/fold_{fold}/{sample_type}_coco_annos_val.json",
        )

        pred_annotation_file = os.path.join(
            BASE_DIR,
            f"outputs/{sample_type}/{model_type}/fold_{fold}/results.bbox.json"
        )
        if not os.path.exists(pred_annotation_file):
            continue

        metrics_file = os.path.join(
            BASE_DIR,
            f"outputs/{sample_type}/{model_type}/fold_{fold}/metrics_pr.csv"
        )
        metrics_df = calculate_precision_recall_f1(pred_annotation_file, gt_annotation_file)
        
        if fold == 1:
            metrics_df_all = metrics_df
        else:
            metrics_df_all = pd.concat([metrics_df_all, metrics_df], ignore_index=True)
    
    # calculate class wise mean and standard deviation
    metrics_df_all = metrics_df_all.groupby("category").agg(
        {
            "precision": ["mean", "std"],
            "recall": ["mean", "std"],
            "f1_score": ["mean", "std"],
        }
    )
    metrics_df_all.columns = ["_".join(x) for x in metrics_df_all.columns]
    metrics_df_all = metrics_df_all.reset_index()
    metrics_df_all.to_csv(
        os.path.join(BASE_DIR, f"outputs/{sample_type}/{model_type}/metrics_pr.csv"),
        index=False,
        float_format="%.3f",
    )
    print("sample_type: ", sample_type)
    print(metrics_df_all)




sample_type:  smartphone_sample
  category  precision_mean  precision_std  recall_mean  recall_std  \
0   Crypto        0.422358       0.054276     0.822667    0.024773   
1  Giardia        0.291082       0.030711     0.862016    0.057513   

   f1_score_mean  f1_score_std  
0       0.556436      0.047688  
1       0.433837      0.033559  
sample_type:  smartphone_reference
  category  precision_mean  precision_std  recall_mean  recall_std  \
0   Crypto        0.504157       0.051878     0.784562    0.045485   
1  Giardia        0.641107       0.030574     0.915260    0.016448   

   f1_score_mean  f1_score_std  
0       0.613215      0.050537  
1       0.753749      0.023947  
sample_type:  brightfield_sample
  category  precision_mean  precision_std  recall_mean  recall_std  \
0   Crypto        0.479775       0.045590     0.945138    0.028571   
1  Giardia        0.499527       0.032596     0.946294    0.020586   

   f1_score_mean  f1_score_std  
0       0.635760      0.044433  
1  

In [None]:
metrics_df_all

Unnamed: 0,category,precision_mean,precision_std,recall_mean,recall_std,f1_score_mean,f1_score_std
0,Crypto,0.600674,0.121902,0.966356,0.017384,0.734561,0.088251
1,Giardia,0.905362,0.020524,0.984545,0.004398,0.943187,0.010864
