# 3. YOLO RarePlanes Model Scoring

This last notebook converts the ground truth annotations to the bounding box format whcih YOLO creates in order to compare them with the predictions. Then, precision, recall, and f1 are calculated by class and as a whole. 

In [1]:
from shapely.geometry import box
import argparse
import geopandas as gpd
import pandas as pd
import os
import glob
from tqdm import tqdm
import numpy as np
from solaris.eval.iou import calculate_iou

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### Convert Ground Truth to Bounding Boxes

The following scripts take the geojson ground truths for the test set and convert them to bounding boxes. Then, in the two cells, the same custom classes are created in the ground truth. 

In [6]:
input_folder = "/home/ubuntu/src/yolo_planes/wdata/test/geojson_aircraft"
output_folder = "/home/ubuntu/src/yolo_planes/wdata/test/geojson_aircraft_bbox"
os.makedirs(output_folder,exist_ok=True)
os.chdir(input_folder)
geojsons = glob.glob("*.geojson")
for geojson in tqdm(geojsons):
    gdf = gpd.read_file(geojson)
    box_geoms= []
    for _, row in gdf.iterrows():
        x = row.geometry.bounds
        bbox = box(x[0], x[1], x[2], x[3])
        box_geoms.append(bbox)
    gdf['box_geom'] = box_geoms
    gdf = gdf.drop('geometry',axis=1)
    gdf = gpd.GeoDataFrame(gdf, geometry='box_geom')
    out_name=os.path.join(output_folder, geojson)
    gdf.to_file(out_name, driver='GeoJSON', encoding='utf-8')

100%|██████████| 66/66 [00:03<00:00, 16.67it/s]


In [14]:
def count_unique_index(df, by):
    return df.groupby(by).size().reset_index().rename(columns={0: 'count'})

def create_custom_classes(all_annotations_geojson, geojson_dir, output_path, category_attributes):
    """ parse the geojson files and create custom classes based upon
    unique variatons of the RarePlanes attributes.
        -all_annotations_geojson (str): The path to the
        `RarePlanes_Public_All_Annotations.geojson` file.
        - geojson_dir (str): directory containing the geojson files
        for individual images or tiles
        - output_path (str): directory to output the customized geojsons. Need to provide the absolute path.
        - category_attributes (list): A list of attributes to combine
        to create a custom class.  Choose any combintaion of the following:
        ['role','num_engines', 'propulsion', 'canards', 'num_tail_fins',
       'wing_position', 'wing_type', 'faa_wingspan_class']
    :returns
        - new geojsons with a custom_id for each combination of unique
        attributes.
        -A lookup table for each classes custom_id.
    """
    os.makedirs(output_path, exist_ok=True)
    gdf = gpd.read_file(all_annotations_geojson)
    lookup_gdf = count_unique_index(gdf, category_attributes)
    lookup_gdf['custom_id'] = list(range(0, len(lookup_gdf)))
    lookup_gdf.drop(columns=['count'], inplace=True)
    lookup_gdf.to_csv(os.path.join(output_path, "custom_class_lookup.csv"))
    os.chdir(geojson_dir)
    geojsons = glob.glob("*.geojson")
    for geojson in tqdm(geojsons):
        gdf = gpd.read_file(geojson)
        gdf = pd.merge(gdf, lookup_gdf, on=category_attributes, how='left')
        gdf["custom_id"] = pd.to_numeric(gdf["custom_id"], downcast='float')
        gdf.to_file(os.path.join(output_path, geojson), driver="GeoJSON", encoding='utf-8')

In [15]:
all_annotations_geojson = '/home/ubuntu/src/yolo_planes/wdata/RarePlanes_Public_All_Annotations.geojson'
geojson_dir_test = '/home/ubuntu/src/yolo_planes/wdata/test/geojson_aircraft_bbox'

output_path_test_one = '/home/ubuntu/src/yolo_planes/wdata/test/yolo_class_one_truth_bbox'

class_one = ['num_engines', 'propulsion']

create_custom_classes(all_annotations_geojson, geojson_dir_test, output_path_test_one, class_one)

100%|██████████| 66/66 [00:03<00:00, 18.73it/s]


In [44]:
def average_score_by_class(ious, threshold=0.5):
    """ for a list of object ious by class, test if they are a counted as a
    positive or a negative.
    Arguments
    ---------
        ious : list of lists
            A list containing individual lists of ious for eachobject class.
        threshold : float
            A value between 0.0 and 1.0 that determines the threshold for a true positve.
    Returns
    ---------
        average_by_class : list
            A list containing the ratio of true positives for each class
    """
    binary_scoring_lists = []
    for x in ious:
        items = []
        for i in x:
            if i >= threshold:
                items.append(1)
            else:
                items.append(0)
        binary_scoring_lists.append(items)
    average_by_class = []
    for l in binary_scoring_lists:
        average_by_class.append(np.nanmean(l))
    return average_by_class

def get_all_objects(proposal_polygons_dir, gt_polygons_dir,
                    prediction_cat_attrib="class", gt_cat_attrib='make',
                    file_format="geojson"):
    """ Using the proposal and ground truth polygons, calculate the total.
    Filenames of predictions and ground-truth must be identical.
    unique classes present in each
    Arguments
    ---------
        proposal_polygons_dir : str
            The path that contains any model proposal polygons
        gt_polygons_dir : str
            The path that contains the ground truth polygons
        prediction_cat_attrib : str
            The column or attribute within the predictions that specifies
            unique classes
        gt_cat_attrib : str
            The column or attribute within the ground truth that
            specifies unique classes
        file_format : str
            The extension or file format for predictions
    Returns
    ---------
            prop_objs : list
                All unique objects that exist in the proposals
            gt_obj : list
                All unique objects that exist in the ground truth
            all_objs : list
                A union of the prop_objs and gt_objs lists
    """
    objs = []
    os.chdir(proposal_polygons_dir)
    search = "*" + file_format
    proposal_geojsons = glob.glob(search)
    for geojson in (proposal_geojsons):
        ground_truth_poly = os.path.join(gt_polygons_dir, geojson)
        if os.path.exists(ground_truth_poly):
            ground_truth_gdf = gpd.read_file(ground_truth_poly)
            proposal_gdf = gpd.read_file(geojson)
            for index, row in (proposal_gdf.iterrows()):
                objs.append(row[str(prediction_cat_attrib)])
    prop_objs = list(set(objs))
    os.chdir(gt_polygons_dir)
    search = "*" + file_format
    objs = []
    gt_geojsons = glob.glob(search)
    for geojson in (gt_geojsons):
        proposal_poly = os.path.join(proposal_polygons_dir, geojson)
        if os.path.exists(proposal_poly):
            proposal_gdf = gpd.read_file(proposal_poly)
            ground_truth_gdf = gpd.read_file(geojson)
            for index, row in (ground_truth_gdf.iterrows()):
                objs.append(row[gt_cat_attrib])
    gt_objs = list(set(objs))
    all_objs = gt_objs + prop_objs
    all_objs = list(set(all_objs))
    return prop_objs, gt_objs, all_objs


def precision_calc(proposal_polygons_dir, gt_polygons_dir,
                   prediction_cat_attrib="class", gt_cat_attrib='make',
                   object_subset=[], threshold=0.5, file_format="geojson"):
    """ Using the proposal and ground truth polygons, calculate precision metrics.
    Filenames of predictions and ground-truth must be identical.  Will only
    calculate metric for classes that exist in the ground truth.
    Arguments
    ---------
        proposal_polygons_dir : str
            The path that contains any model proposal polygons
        gt_polygons_dir : str
            The path that contains the ground truth polygons
        prediction_cat_attrib : str
            The column or attribute within the predictions that specifies
            unique classes
        gt_cat_attrib : str
            The column or attribute within the ground truth that
            specifies unique classes
        object_subset : list
            A list or subset of the unique objects that are contained within the
            ground truth polygons. If empty, this will be
            auto-created using all classes that appear ground truth polygons.
        threshold : float
            A value between 0.0 and 1.0 that determines the IOU threshold for a
            true positve.
        file_format : str
            The extension or file format for predictions
    Returns
    ---------
        iou_holder : list of lists
            An iou score for each object per class (precision specific)
        precision_by_class : list
            A list containing the precision score for each class
        mPrecision : float
            The mean precision score of precision_by_class
    """
    ious = []
    os.chdir(proposal_polygons_dir)
    search = "*" + file_format
    proposal_geojsons = glob.glob(search)
    iou_holder = []
    if len(object_subset) == 0:
        prop_objs, object_subset, all_objs = get_all_objects(
            proposal_polygons_dir, gt_polygons_dir,
            prediction_cat_attrib=prediction_cat_attrib,
            gt_cat_attrib=gt_cat_attrib, file_format=file_format)
    for i in range(len(object_subset)):
        iou_holder.append([])

    for geojson in (proposal_geojsons):
        ground_truth_poly = os.path.join(gt_polygons_dir, geojson)
        if os.path.exists(ground_truth_poly):
            ground_truth_gdf = gpd.read_file(ground_truth_poly)
            proposal_gdf = gpd.read_file(geojson)
            i = 0
            for obj in object_subset:
                proposal_gdf2 = proposal_gdf[proposal_gdf[prediction_cat_attrib] == obj]
                for index, row in (proposal_gdf2.iterrows()):
                    iou_GDF = calculate_iou(row.geometry, ground_truth_gdf)
                    if 'iou_score' in iou_GDF.columns:
                        iou = iou_GDF.iou_score.max()
                        max_iou_row = iou_GDF.loc[iou_GDF['iou_score'].idxmax(axis=0, skipna=True)]
                        id_1 = row[prediction_cat_attrib]
                        id_2 = ground_truth_gdf.loc[max_iou_row.name][gt_cat_attrib]
                        if id_1 == id_2:
                            ious.append(iou)
                            ground_truth_gdf.drop(max_iou_row.name, axis=0, inplace=True)
                        else:
                            iou = 0
                            ious.append(iou)
                    else:
                        iou = 0
                        ious.append(iou)
                for item in ious:
                    iou_holder[i].append(item)
                ious = []
                i += 1
        else:
            print("Warning- No ground truth for:", geojson)
            proposal_gdf = gpd.read_file(geojson)
            i = 0

            for obj in object_subset:
                proposal_gdf2 = proposal_gdf[proposal_gdf[gt_cat_attrib] == obj]
                for z in range(len(proposal_gdf2)):
                    ious.append(0)
                for item in ious:
                    iou_holder[i].append(item)
                i += 1
                ious = []
    precision_by_class = average_score_by_class(iou_holder, threshold=0.5)
    precision_by_class = list(np.nan_to_num(precision_by_class))
    mPrecision = np.nanmean(precision_by_class)
    print("mPrecision:", mPrecision)
    return iou_holder, precision_by_class, mPrecision


def recall_calc(proposal_polygons_dir, gt_polygons_dir,
                prediction_cat_attrib="class", gt_cat_attrib='make',
                object_subset=[], threshold=0.5, file_format="geojson"):
    """ Using the proposal and ground truth polygons, calculate recall metrics.
    Filenames of predictions and ground-truth must be identical. Will only
    calculate metric for classes that exist in the ground truth.
    Arguments
    ---------
        proposal_polygons_dir : str
            The path that contains any model proposal polygons
        gt_polygons_dir : str
            The path that contains the ground truth polygons
        prediction_cat_attrib : str
            The column or attribute within the predictions that specifies
            unique classes
        gt_cat_attrib : str
            The column or attribute within the ground truth that
            specifies unique classes
        object_subset : list
            A list or subset of the unique objects that are contained within the
            ground truth polygons. If empty, this will be
            auto-created using all classes that appear ground truth polygons.
        threshold : float
            A value between 0.0 and 1.0 that determines the IOU threshold for a
            true positve.
        file_format : str
            The extension or file format for predictions
    Returns
    ---------
        iou_holder : list of lists
            An iou score for each object per class (recall specific)
        recall_by_class : list
            A list containing the recall score for each class
        mRecall : float
            The mean recall score of recall_by_class
    """
    ious = []
    os.chdir(gt_polygons_dir)
    search = "*" + file_format
    gt_geojsons = glob.glob(search)
    iou_holder = []
    if len(object_subset) == 0:
        prop_objs, object_subset, all_objs = get_all_objects(
            proposal_polygons_dir, gt_polygons_dir,
            prediction_cat_attrib=prediction_cat_attrib,
            gt_cat_attrib=gt_cat_attrib, file_format=file_format)
    for i in range(len(object_subset)):
        iou_holder.append([])
    for geojson in (gt_geojsons):
        proposal_poly = os.path.join(proposal_polygons_dir, geojson)
        if os.path.exists(proposal_poly):
            proposal_gdf = gpd.read_file(proposal_poly)
            ground_truth_gdf = gpd.read_file(geojson)
            i = 0
            for obj in object_subset:
                ground_truth_gdf2 = ground_truth_gdf[ground_truth_gdf[gt_cat_attrib] == obj]
                for index, row in (ground_truth_gdf2.iterrows()):
                    iou_GDF = calculate_iou(row.geometry, proposal_gdf)
                    if 'iou_score' in iou_GDF.columns:
                        iou = iou_GDF.iou_score.max()
                        max_iou_row = iou_GDF.loc[iou_GDF['iou_score'].idxmax(axis=0, skipna=True)]
                        id_1 = row[gt_cat_attrib]
                        id_2 = proposal_gdf.loc[max_iou_row.name][prediction_cat_attrib]
                        if id_1 == id_2:
                            ious.append(iou)
                            proposal_gdf.drop(max_iou_row.name, axis=0, inplace=True)
                        else:
                            iou = 0
                            ious.append(iou)
                    else:
                        iou = 0
                        ious.append(iou)
                for item in ious:
                    iou_holder[i].append(item)
                i += 1
                ious = []
        else:
            ground_truth_gdf = gpd.read_file(geojson)
            i = 0
            for obj in object_subset:
                ground_truth_gdf2 = ground_truth_gdf[ground_truth_gdf[gt_cat_attrib] == obj]
                for z in range(len(ground_truth_gdf2)):
                    ious.append(0)
                for item in ious:
                    iou_holder[i].append(item)
                i += 1
                ious = []

    recall_by_class = average_score_by_class(iou_holder, threshold=0.5)
    recall_by_class = list(np.nan_to_num(recall_by_class))
    mRecall = np.nanmean(recall_by_class)
    print("mRecall:", mRecall)
    return iou_holder, recall_by_class, mRecall


def mF1(proposal_polygons_dir, gt_polygons_dir, prediction_cat_attrib="class",
        gt_cat_attrib='make', object_subset=[], threshold=0.5,
        file_format="geojson", all_outputs=False):
    """ Using the proposal and ground truth polygons, calculate F1 and mF1
    metrics. Filenames of predictions and ground-truth must be identical.  Will
    only calculate metric for classes that exist in the ground truth.
    Arguments
    ---------
        proposal_polygons_dir : str
            The path that contains any model proposal polygons
        gt_polygons_dir : str
            The path that contains the ground truth polygons
        prediction_cat_attrib : str
            The column or attribute within the predictions that specifies
            unique classes
        gt_cat_attrib : str
            The column or attribute within the ground truth that
            specifies unique classes
        object_subset : list
            A list or subset of the unique objects that are contained within the
            proposal and ground truth polygons. If empty, this will be
            auto-created using all classes that appear in the proposal and
            ground truth polygons.
        threshold : float
            A value between 0.0 and 1.0 that determines the IOU threshold for a
            true positve.
        file_format : str
            The extension or file format for predictions
        all_outputs : bool
            `True` or `False`.  If `True` returns an expanded output.
    Returns
    ---------
        if all_outputs is `True`:
            mF1 : float
                The mean F1 score of f1s_by_class
            f1s_by_class : list
                A list containing the f1 score for each class
            precision_iou_by_obj : list of lists
                An iou score for each object per class (precision specific)
            precision_by_class : list
                A list containing the precision score for each class
            mPrecision : float
                The mean precision score of precision_by_class
            recall_iou_by_obj : list of lists
                An iou score for each object per class (recall specific)
            recall_by_class : list
                A list containing the recall score for each class
            mRecall : float
                The mean recall score of recall_by_class
            object_subset : list
                All unique objects that exist in the ground truth polygons
            prop_objs : list
                All unique objects that exist in the proposal polygons
            all_objs : list
                All unique objects that exist in both the proposal and ground
                truth polygons
        if all_outputs is `False`:
            mF1_score : float
                The mean F1 score of f1s_by_class (only calculated for ground
                ground truth classes)
            f1s_by_class : list
                A list containing the f1 score for each class
    """
    if len(object_subset) == 0:
        print("getting unique objects...")
        prop_objs, object_subset, all_objs = get_all_objects(
            proposal_polygons_dir, gt_polygons_dir,
            prediction_cat_attrib=prediction_cat_attrib,
            gt_cat_attrib=gt_cat_attrib, file_format=file_format)
    print("calculating recall...")
    recall_iou_by_obj, recall_by_class, mRecall = recall_calc(
        proposal_polygons_dir, gt_polygons_dir,
        prediction_cat_attrib=prediction_cat_attrib,
        gt_cat_attrib=gt_cat_attrib, object_subset=object_subset,
        threshold=threshold, file_format=file_format)
    print("calculating precision...")
    precision_iou_by_obj, precision_by_class, mPrecision = precision_calc(
        proposal_polygons_dir, gt_polygons_dir,
        prediction_cat_attrib=prediction_cat_attrib,
        gt_cat_attrib=gt_cat_attrib, object_subset=object_subset,
        threshold=threshold, file_format=file_format)
    print("")
    print("calculating F1 scores...")
    f1s_by_class = []
    for recall, precision in zip(recall_by_class, precision_by_class):
        f1 = 2 * precision * recall / (precision + recall)
        f1 = np.nan_to_num(f1)
        f1s_by_class.append(f1)
    mF1_score = np.nanmean(f1s_by_class)
    print("mF1:", mF1_score)
    if all_outputs is True:
        return mF1_score, f1s_by_class, precision_iou_by_obj, precision_by_class, mPrecision, recall_iou_by_obj, recall_by_class, mRecall, object_subset, prop_objs, all_objs
    else:
        return mF1_score, f1s_by_class

### Results

The following cells execute the scoring of the predicitons vs the ground truth. The first cell outputs total recall, precision, and F1, while the second cell outputs the lookup table and now includes the class by class recall, precision, and F1. 

In [45]:
proposal_polygons_dir ="/home/ubuntu/src/yolo_planes/yolov5/inference/class_one_out/bounding_boxes"
gt_polygons_dir = "/home/ubuntu/src/yolo_planes/wdata/test/yolo_class_one_truth_bbox"
mF1, f1s_by_class, precision_iou_by_obj, precision_by_class, mPrecision, recall_iou_by_obj, recall_by_class, mRecall, object_subset, prop_objs, all_objs = mF1(proposal_polygons_dir, gt_polygons_dir, prediction_cat_attrib="class_id", gt_cat_attrib='custom_id', object_subset=[], all_outputs=True)

getting unique objects...
calculating recall...
mRecall: 0.6511768568429169
calculating precision...
mPrecision: 0.7047816872010533

calculating F1 scores...
mF1: 0.6732098637317354




In [46]:
lookup_table = "/home/ubuntu/src/yolo_planes/geojsons_test/yolo_class_one/custom_class_lookup.csv"
lookup_table = pd.read_csv(lookup_table)
lookup_table = lookup_table.drop('custom_id',1)
lookup_table.drop(lookup_table.columns[lookup_table.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)
lookup_table['recall_by_class'] = recall_by_class
lookup_table.set_index('recall_by_class')
lookup_table['precision_by_class'] = recall_by_class
lookup_table.set_index('precision_by_class')
lookup_table['f1s_by_class'] = recall_by_class
lookup_table.set_index('f1s_by_class')
print(lookup_table)

   num_engines propulsion  recall_by_class  precision_by_class  f1s_by_class
0            0  unpowered         0.729730            0.729730      0.729730
1            1        jet         0.250000            0.250000      0.250000
2            1  propeller         0.933941            0.933941      0.933941
3            2        jet         0.978015            0.978015      0.978015
4            2  propeller         0.885305            0.885305      0.885305
5            3        jet         0.000000            0.000000      0.000000
6            4        jet         0.690000            0.690000      0.690000
7            4  propeller         0.742424            0.742424      0.742424


Now, using these fused geojsons, you can visulaize the predictions by downloading the `/home/ubuntu/src/yolo_planes/yolov5/inference/class_one_out/bounding_boxes` folder to your local machine and using a geographic infromation system like QGIS and overlaying the bounding boxes onto the real image.