# Home Exercise

In [1]:
import os
from glob import glob
import itertools
import json

import pandas as pd

## Introduction

### Objective

Compare the effect of training a seedling detector on your own annotated dataset Vs full dataset (all annotations merged) on the detector’s performance.

## Methods

### Annotations

Two sets of annotations were used for model training: *"My annotations"* and *"All annotations"*.

- *"My annotations"*
  - Annotations made by me.
- *"All annotations"*
  - Annotations from all students merged.

In [49]:
def number_of_images(anno_data_path):
    n_train = len(glob(os.path.join(anno_data_path, "train", "images", "*.tif")))
    n_val = len(glob(os.path.join(anno_data_path, "val", "images", "*.tif")))
    n_images = {"train": n_train, "val": n_val, "all": n_train + n_val}

    return n_images


def number_of_boxes(anno_data_path):
    labels_train = glob(os.path.join(anno_data_path, "train", "labels", "*.txt"))
    n_train = sum(map(count_boxes, labels_train))
    labels_val = glob(os.path.join(anno_data_path, "val", "labels", "*.txt"))
    n_val = sum(map(count_boxes, labels_val))
    n_boxes = {"train": n_train, "val": n_val, "all": n_train + n_val}

    return n_boxes


def count_boxes(label):
    with open(label, "r") as f:
        n_boxes = len(f.readlines())

    return n_boxes


def anno_summary(anno_data_path):
    annotations_set = os.path.basename(anno_data_path).replace("_", " ").capitalize()
    n_images = number_of_images(anno_data_path)
    n_boxes = number_of_boxes(anno_data_path)
    df_index = pd.MultiIndex.from_product([["Number of images", "Number of trees"], ["Train", "Val", "Sum"]], names=["Count", "Split"])
    summary = pd.DataFrame({annotations_set: list(n_images.values()) + list(n_boxes.values())}, index=df_index)

    return summary



In [50]:
annotations = glob(os.path.join("data", "annotated_data", "train", "*"))
summaries = pd.concat(map(anno_summary, annotations), axis=1)
summaries

Unnamed: 0_level_0,Unnamed: 1_level_0,All annotations,My annotations
Count,Split,Unnamed: 2_level_1,Unnamed: 3_level_1
Number of images,Train,271,34
Number of images,Val,116,14
Number of images,Sum,387,48
Number of trees,Train,3492,201
Number of trees,Val,1570,79
Number of trees,Sum,5062,280


### Model training

YOLO V8 models were trained using the data set *"My annotations"* and the data set *"All annotations"*. A grid search were performed for model sizes "Nano", "Medium", and "Xtra large" and image sizes 256, 640 and 1024. The best models for each data set were selected using mAP@.5.

In [61]:
grid_search = pd.DataFrame(list(itertools.product(["yolov8n.pt", "yolov8m.pt", "yolov8x.pt"], [256, 640, 1024])), columns=["Model", "Image size"])
grid_search

Unnamed: 0,Model,Image size
0,yolov8n.pt,256
1,yolov8n.pt,640
2,yolov8n.pt,1024
3,yolov8m.pt,256
4,yolov8m.pt,640
5,yolov8m.pt,1024
6,yolov8x.pt,256
7,yolov8x.pt,640
8,yolov8x.pt,1024


### Model evaluation

Selected models from the model training were evaluated using machine-learning metrics and domain metrics.

#### ML metrics

#### Domain metrics

RMSE, RMSE%, MD and MD% were calculated for each site and for all sites combined.

## Results & Discussion

### Model training

Detailed view of trained models can be found in [this Comet project](<https://www.comet.com/juliwold/home-exercise-sapling-detector/view/uxR2erf0uJlERPXPjybwdN2yE/panels>).

#### My annotations

![](<figures\\metrics_mAP50_my.jpeg>)

*Figure 1. My annotations - mAP@.5*

The best performing model for "My annotations" were a YOLOn models with an
image size of 256. This image size seems far too low for use in detecting
saplings, the next best performing model (YOLOn, image size = 640) were
therefore also selected.

#### All annotations

![](<figures\\metrics_mAP50_all.jpeg>)

*Figure 2. All annotations - mAP@.5*



### Effect of hyperparamaters

#### Detectors performance

- Larger models improved faster in the beginning of training.
  - YOLOm and YOLOx models improved faster than YOLOn.
- Larger model sizes seems more vulnerable for overfit.


#### Inferrence speed

- Inferrence speed decreases with model size and image size.
- The effect of image size increased with model size.

![](<figures\\model_speed.jpeg>)

*Figure 3. Model speed.*

### Evaluation of models

In [None]:
models = [
    "models\\all_annotations\\all_annotations_yolov8m.pt_640",
    "models\\my_annotations\\my_annotations_yolov8n.pt_256",
    "models\\my_annotations\\my_annotations_yolov8n.pt_640",
]

#### ML metrics

In [21]:
def collect_ml_metrics(model_path):
    with open(os.path.join(model_path, "val", "test_results.json"), "r") as f:
        ml_metrics = json.load(f)
    metrics_table = pd.DataFrame(ml_metrics, index=[os.path.basename(model_path)])

    return(metrics_table)

In [32]:
ml_metrics = pd.concat(map(collect_ml_metrics, models)).round(decimals=2)
ml_metrics

Unnamed: 0,metrics/precision(B),metrics/recall(B),metrics/mAP50(B),metrics/mAP50-95(B),fitness
all_annotations_yolov8m.pt_640,0.56,0.45,0.38,0.12,0.14
my_annotations_yolov8n.pt_256,0.48,0.33,0.28,0.08,0.1
my_annotations_yolov8n.pt_640,0.49,0.42,0.36,0.11,0.14


#### Domain

In [53]:
def collect_domain_metrics(model_path):
    metrics = pd.read_csv(os.path.join(model_path, "predictions_processed", "metrics.csv"))
    metrics["Model"] = os.path.basename(model_path)
    return(metrics)

In [81]:
dm_all = pd.concat(map(collect_domain_metrics, models))

In [80]:
domain_metrics = dm_all.set_index(mi).round()
domain_metrics = domain_metrics.drop(["aoi_name", "Model", "rmse_n", "rmse_n (%)","bias_n", "bias_n (%)",], axis = 1)
domain_metrics = domain_metrics.rename(columns={"rmse_dens": "RMSE", "rmse_dens (%)": "RMSE (%)", "bias_dens": "MD", "bias_dens (%)": "MD (%)"})
domain_metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,RMSE,RMSE (%),MD,MD (%)
Model,aoi_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
all_annotations_yolov8m.pt_640,braatan,981.0,67.0,891.0,61.0
all_annotations_yolov8m.pt_640,galbyveien,692.0,34.0,662.0,33.0
all_annotations_yolov8m.pt_640,hobol,99.0,9.0,90.0,8.0
all_annotations_yolov8m.pt_640,krakstad,386.0,29.0,353.0,27.0
all_annotations_yolov8m.pt_640,all,632.0,43.0,499.0,34.0
my_annotations_yolov8n.pt_256,braatan,1363.0,93.0,1307.0,89.0
my_annotations_yolov8n.pt_256,galbyveien,1360.0,67.0,1331.0,66.0
my_annotations_yolov8n.pt_256,hobol,471.0,43.0,465.0,43.0
my_annotations_yolov8n.pt_256,krakstad,691.0,52.0,684.0,52.0
my_annotations_yolov8n.pt_256,all,1049.0,71.0,947.0,64.0


### Effect of Training data

*Is the model trained on more data better than the one trained on your own dataset?*

### Examples of poor performance