### Evaluation of a single model and Average Domain Accuracy 2021 Calculation

- The notebook is for evaluating given single model (not ensemble) with the test dataset of GWHD 2021 and calculates Average Domain Accuracy 2021 (AA2021)
- You can reach the data via: https://www.kaggle.com/datasets/cetinkayaevren/gwdh-2021 
- You need to put the data in `data(2021)` folder
- You need to put your best model to the `output_models` folder

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import seaborn as sns
from PIL import Image, ImageDraw
import os
import shutil

# Model
import ultralytics
from ultralytics import YOLO
#from ultralytics import YOLOv10
ultralytics.checks()
import torch
import csv

#Test Result Saving
from comet_ml import ExistingExperiment


Ultralytics YOLOv8.2.52  Python-3.10.14 torch-2.3.0 CUDA:0 (NVIDIA GeForce GTX 1050, 4096MiB)
Setup complete  (12 CPUs, 15.9 GB RAM, 233.3/237.9 GB disk)


In [2]:
model_test = YOLO("output_models\\best.pt")  # load the best model in directory for trained model

In [3]:
test_df = pd.read_csv("data(2021)\gwhd_2021\competition_test.csv")      #Read the test csv file
test_df.head()

Unnamed: 0,image_name,BoxesString,domain
0,255b6ca9fea63f44125e5174bc932470b604c760430715...,481 820 604 922;655 957 732 1024;930 926 1013 ...,UQ_7
1,7f5eb37cab658de6fd0d688bf27f16e423794fed6184d8...,896 911 977 955;800 898 821 941;770 867 804 90...,UQ_7
2,7bcfff43b356f4a94948367782aa704a37ff4579baf45d...,892 993 922 1024;844 943 871 965;758 926 801 9...,UQ_7
3,e535384eda9d0f9c6ac57dd9397d5d614e4cad48c144d8...,648 886 722 1024;392 950 507 1024;876 677 981 ...,UQ_7
4,66e9fa7379fd7b7fd64024ac1b03b8e56f9ad020c10635...,559 939 623 1009;775 875 829 919;853 883 888 9...,UQ_7


In [4]:
def make_predictions(image_path, trained_model):           #Predict and extract the predicted boxes in an image
    results = trained_model.predict(image_path, stream=True)
        
    boxes_prediction = next(results).boxes.xyxy.cpu().numpy()

    return boxes_prediction

In [5]:
project_path = os.getcwd()
test_data_path = 'data(2021)\\gwhd_2021\\test\\images'
test_total_path = os.path.join(project_path, test_data_path)

#Predict images' bounding boxes and add to the dictionary with their image name
predictions_dict = {}
for sample_path in test_df["image_name"]:
    sample_abs_path = os.path.join(test_total_path, sample_path)
    boxes_predictions = make_predictions(sample_abs_path, model_test)
    predictions_dict[sample_path] = boxes_predictions




  return F.conv2d(input, weight, bias, self.stride,


image 1/1 c:\Users\evren\Desktop\application_project\project_code\data(2021)\gwhd_2021\test\images\255b6ca9fea63f44125e5174bc932470b604c76043071522ba0ef63abb1a544b.png: 1024x1024 24 wheat_heads, 38.6ms

image 1/1 c:\Users\evren\Desktop\application_project\project_code\data(2021)\gwhd_2021\test\images\7f5eb37cab658de6fd0d688bf27f16e423794fed6184d8b82a42612e20ff01ce.png: 1024x1024 47 wheat_heads, 36.4ms

image 1/1 c:\Users\evren\Desktop\application_project\project_code\data(2021)\gwhd_2021\test\images\7bcfff43b356f4a94948367782aa704a37ff4579baf45d6f7b58f54681ea73eb.png: 1024x1024 59 wheat_heads, 36.7ms

image 1/1 c:\Users\evren\Desktop\application_project\project_code\data(2021)\gwhd_2021\test\images\e535384eda9d0f9c6ac57dd9397d5d614e4cad48c144d861dc83b039ac327a18.png: 1024x1024 33 wheat_heads, 32.9ms

image 1/1 c:\Users\evren\Desktop\application_project\project_code\data(2021)\gwhd_2021\test\images\66e9fa7379fd7b7fd64024ac1b03b8e56f9ad020c106354f00d7329b44b24615.png: 1024x1024 45 wheat_

### Collecting Ground Truths in a dictionary

In [6]:
gt_dictionary = {}
for idx, img in test_df.iterrows():
    boxes_list = img["BoxesString"].split(";")
    labels_list = []
    for box in boxes_list:
        xyxy = box.split(" ")
        labels_list.append(xyxy)
    gt_dictionary[img["image_name"]] = labels_list

### Calculating IoU in 0.5 threshold for each image

In [7]:
import numpy as np

# Function to calculate IoU (Intersection over Union)
def calculate_iou(box1, box2):
    x1_max = max(box1[0], box2[0])
    y1_max = max(box1[1], box2[1])
    x2_min = min(box1[2], box2[2])
    y2_min = min(box1[3], box2[3])

    inter_area = max(0, x2_min - x1_max) * max(0, y2_min - y1_max)

    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])

    union_area = box1_area + box2_area - inter_area

    iou = inter_area / union_area
    return iou


### Getting TP, FP, FN values for each Image

In [8]:
def evaluate(predictions, ground_truths, iou_threshold=0.5): #Calculates TP, FP, FN for each image with the IoU threshold 0.5
    TP, FP, FN = 0, 0, 0
    matched_gt_indices = set()
    for pred_box in predictions:
        match_found = False
        for i, gt_box in enumerate(ground_truths):
            if gt_box == "no_box":
                FP = len(pred_box)
                TP = 0
            gt_box = list(map(int, gt_box))
            if i not in matched_gt_indices:
                iou = calculate_iou(pred_box, gt_box)
                if iou >= iou_threshold:
                    TP += 1
                    matched_gt_indices.add(i)
                    match_found = True
                    break
        if not match_found:
            FP += 1

    FN = len(ground_truths) - len(matched_gt_indices)
    return TP, FP, FN

### Calculation Based on Domain-Specific Approach

In [9]:
#Store total accuracy and number of samples based on their domains in dictionary
keys = test_df["domain"].unique()
default_val = 0
domain_total_acc = dict.fromkeys(keys, default_val)
domain_sample_count = dict.fromkeys(keys, default_val)

In [10]:
#Evaluate predicted labels with ground truth labels and extract TP, FP, FN values for each image
#Calculate the accuracy and store it to the corresponding domain in the dictionary
for idx, img in test_df.iterrows():
    gt_labels = gt_dictionary[img["image_name"]]
    predicted_labels = predictions_dict[img["image_name"]]
    if gt_labels[0][0] == "no_box":
        if len(predicted_labels)>0:
            accuracy_img = 0.
        else:
            accuracy_img = 1.
    else:
        TP, FP, FN = evaluate(predicted_labels, gt_labels)
        accuracy_img = TP/(TP + FP + FN)
    domain_total_acc[img["domain"]] += accuracy_img
    domain_sample_count[img["domain"]] += 1

In [11]:
#Calculate average for each domain
avg_domains = {k: domain_total_acc[k]/ domain_sample_count[k] for k in domain_total_acc}
print(avg_domains)

{'UQ_7': 0.5036104411187201, 'UQ_8': 0.4041909537289468, 'UQ_9': 0.38241221262235453, 'UQ_10': 0.3999058451085659, 'UQ_11': 0.29603601660444484, 'Terraref_1': 0.07989467546476127, 'Terraref_2': 0.028734578624693044, 'KSU_1': 0.3989513268342884, 'KSU_2': 0.35935510054703257, 'KSU_3': 0.3438826684614344, 'KSU_4': 0.34285348178045616, 'CIMMYT_1': 0.274067042586207, 'CIMMYT_2': 0.3718345668845338, 'CIMMYT_3': 0.2970612259842221, 'Ukyoto_1': 0.29943398419218437, 'NAU_2': 0.49106413057614984, 'NAU_3': 0.5801940243432128, 'ARC_1': 0.3579158649895922}


In [12]:
#Calculate the average of all domains to get Average Domain Accuracy
average_domain_accuracy = sum(avg_domains.values()) / len(avg_domains)
print("AVERAGE DOMAIN ACCURACY FOR GWHC 2021: ", average_domain_accuracy)

AVERAGE DOMAIN ACCURACY FOR GWHC 2021:  0.3450776744695444


In [13]:
# OPTIONAL: Storing the experiment's Average Domain Accuracy results to comet.ml

"""
test_metrics = {
   "metrics/test/avg_domain_acc": average_domain_accuracy
}

exp = ExistingExperiment(api_key="pZv8flZqLNNV4mLfiakCpNCGM", previous_experiment="<experiment_id>") #Add experiment ID
exp.log_metrics(test_metrics)
exp.end()
"""

[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/cetinkayaevren/wheat-head-detection/33700029cb9a49b88e54be057f298863

[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml ExistingExperiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : inclined_sap_7489
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/cetinkayaevren/wheat-head-detection/33700029cb9a49b88e54be057f298863
[1;38;5;39mCOMET INFO:[0m   Metrics:
[1;38;5;39mCOMET INFO:[0m     metrics/test/avg_domain_acc : 0.4806479385592161
[1;38;5;39mCOMET INFO:[0m 
[1;38;5;39mCOMET INFO:[0m Please wait for metadata to finish uploading (timeout is 360