# Codes for Evaluating the Faster R-CNN Model performance on the test data
The evaluations in this notebook were run on Jarret's M1 Max MacBook Pro. 

## Initial imports and constants

In this notebook, the imports required to run each cell are defined within that cell, if they are not already defined.

In [1]:
import torch
import os
from torchvision.ops import boxes as box_ops

# Define constants
iou_threshold = 0.5
num_classes = 2  # Change to the number of classes in your dataset
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


## Load in the test data and instantiate the DataLoader

In [2]:
from torch.utils.data import DataLoader
from src.PolypDataset import PolypDataset
from src.config import *
from src.utils import Averager, collate_fn, get_transform

# Define the test data directory
test_data_directory = os.path.join(os.getcwd(), 'data', 'test')

# Load the test dataset
test_dataset = PolypDataset(
    dir_path = TEST_DIR,
    width = RESIZE_TO,
    height = RESIZE_TO,
    classes = CLASSES,
    transforms = get_transform(train=False)
)

test_dataloader = DataLoader(
    dataset=test_dataset, 
    batch_size=1, 
    shuffle=False, 
    num_workers=4 if torch.cuda.is_available() else 2,
    pin_memory=True,
    drop_last=False,
    collate_fn=collate_fn
)

## Instantiate the model and load the weights

In [3]:
from src.model import create_model

model_weights = os.path.join('weights', 'model.pth')
model = create_model(num_classes=NUM_CLASSES)
model.load_state_dict(torch.load(model_weights, map_location=device))
model.to(device)
model.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

## Predicting on the test set

### Function to save predictions and labels as pickle files

In [5]:
import os
import pickle

def save_outputs(predictions, labels, folder_path):
    """
    Pickles the predictions and labels objects and saves them to the given folder path.
    
    Args:
    predictions (list): list of tensors containing model predictions.
    labels (list): list of dictionaries containing corresponding labels.
    folder_path (str): path to the folder where the pickled files will be saved.
    """
    # Create folder if it doesn't exist
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    
    # Pickle objects and save to files
    with open(os.path.join(folder_path, "predictions.pkl"), "wb") as f:
        pickle.dump(predictions, f)
    with open(os.path.join(folder_path, "labels.pkl"), "wb") as f:
        pickle.dump(labels, f)

### Run through the test data and get predictions for each image.

In [4]:
from tqdm import tqdm_notebook as tqdm

all_predictions = []
all_labels = []

with tqdm(test_dataloader) as pbar:
    for image, label in pbar:
        image = list(im.to(device) for im in image)
        label = [{k: v.to(device) for k, v in t.items()} for t in label]
        with torch.no_grad():
            prediction = model(image)
        all_predictions.append(prediction)
        all_labels.append(label)


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  with tqdm(test_dataloader) as pbar:


  0%|          | 0/4872 [00:00<?, ?it/s]

### Save the predictions and labels to a pickle file

In [6]:
predictions_dir = os.path.join(os.getcwd(), 'predictions')
save_outputs(all_predictions, all_labels, predictions_dir)

## Calculating the metrics from the predictions 

### Calculate the intersection over union (IoU) of two sets of boxes.

It might be more efficient to push the labels and predictions into numpy arrays in order to speed up the calculations. (look at `src/model_evaluation.py` for an example of how to calculate over two sets of boxes)

The bbox format needs to be examined to make this work. 

In [8]:
from torchvision.ops import boxes as box_ops

def bbox_iou(box1, box2):
    """Calculate IoU between two bounding boxes"""
    box1_area = box_ops.box_area(box1)
    box2_area = box_ops.box_area(box2)
    inter_area = box_ops.box_iou(box1, box2)
    iou = inter_area / (box1_area + box2_area - inter_area)
    return iou

In [9]:
all_ious = []
with tqdm(zip(all_predictions, all_labels), total=len(all_predictions)) as pbar:
    for predictions, labels in pbar:
        for prediction, label in zip(predictions, labels):
            predicted_boxes = prediction['boxes']
            predicted_scores = prediction['scores']
            predicted_labels = prediction['labels']
            true_boxes = label['boxes']
            true_labels = label['labels']
            iou_for_image = []
            for box_idx, true_box in enumerate(true_boxes):
                true_label = true_labels[box_idx]
                if true_label == 0:  # Skip the background class
                    continue
                max_iou = 0
                for pred_box_idx, predicted_box in enumerate(predicted_boxes):
                    predicted_label = predicted_labels[pred_box_idx]
                    if predicted_label == true_label:
                        iou = bbox_iou(true_box, predicted_box)
                        if iou > max_iou:
                            max_iou = iou
                iou_for_image.append(max_iou)
            all_ious.append(iou_for_image)


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  with tqdm(zip(all_predictions, all_labels), total=len(all_predictions)) as pbar:


  0%|          | 0/4872 [00:00<?, ?it/s]

IndexError: too many indices for tensor of dimension 1

### Calcluate the Recall, Precision, F1 Score for a given IoU threshold

In [None]:
def calculate_metrics(iou, threshold):
    """Calculate recall, precision, and f1 score for a given IoU threshold"""
    tp = (iou >= threshold).sum().item()
    fp = (iou < threshold).sum().item()
    fn = (iou <= threshold).sum().item()
    recall = tp / (tp + fn)
    precision = tp / (tp + fp)
    f1 = 2 * (precision * recall) / (precision + recall)
    return recall, precision, f1

average_recalls = []
average_precisions = []
average_f1s = []

for threshold in [0.5, 0.6, 0.7, 0.8, 0.9]:
    total_recall = 0
    total_precision = 0
    total_f1 = 0
    for ious in all_ious:
        if len(ious) > 0:
            iou = torch.Tensor(ious).to(device)
            recall, precision, f1 = calculate_metrics(iou, threshold)
            total_recall += recall
            total_precision += precision
            total_f1 += f1
    num_images_with_boxes = len([ious for ious in all_
