In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tqdm

# Load and clean prediciton data

In [None]:
df_ground_truth = pd.read_csv('../annotations/ground_truth_boat_frames_coco.csv', index_col=0, sep=';')
df_yolo_predictions = pd.read_csv('../annotations/yolov8x_no_train_labels.csv', index_col=0, sep=';')

df_ground_truth['datetime'] = pd.to_datetime(df_ground_truth.datetime)
df_ground_truth['date'] = df_ground_truth.datetime.dt.date
df_yolo_predictions['datetime'] = pd.to_datetime(df_yolo_predictions['datetime'])
df_yolo_predictions['date'] = pd.to_datetime(df_yolo_predictions.datetime.dt.date, format='%Y-%m-%d')
print('shape of loaded data', df_ground_truth.shape, df_yolo_predictions.shape)
print('Ground truth data:')
print(df_ground_truth.groupby(['date', 'camera_id']).datetime.count())
print('YOLO predictions data:')
print(df_yolo_predictions.groupby(['date', 'camera_id']).datetime.count())

df_yolo_predictions = df_yolo_predictions[\
    ((df_yolo_predictions.date == '2023-06-09') & (df_yolo_predictions.camera_id.isin([1,2]))) |\
    ((df_yolo_predictions.date == '2023-06-10') & (df_yolo_predictions.camera_id == 2)) |\
    ((df_yolo_predictions.date == '2023-07-07') & (df_yolo_predictions.camera_id == 2)) |\
    ((df_yolo_predictions.date == '2023-07-08') & (df_yolo_predictions.camera_id == 1)) \
].copy()

# crop bounding boxes from right side of camera 2 field of view
df_yolo_predictions.drop(index=df_yolo_predictions[(df_yolo_predictions.camera_id == 2) & (df_yolo_predictions.x > 1800)].index, inplace=True)
df_ground_truth.drop(index=df_ground_truth[(df_ground_truth.camera_id == 2) & (df_ground_truth.x > 1800)].index, inplace=True)

df_ground_truth.set_index('filename', inplace=True)
df_yolo_predictions.set_index('filename', inplace=True)

df_ground_truth.shape, df_yolo_predictions.shape

In [None]:
# filter out predicitons in the bank on camera 1; use a line to define the bank in the camera 1 field of view
point1_cam01_bank = (489, 591)
point2_cam01_bank = (1499, 875)
slope_cam01_bank = (point2_cam01_bank[1] - point1_cam01_bank[1]) / (point2_cam01_bank[0] - point1_cam01_bank[0])
intercept_cam01_bank = point1_cam01_bank[1] - slope_cam01_bank * point1_cam01_bank[0]

def under_the_bank_apply(row):
    if row.camera_id == 1 and row.x >= point1_cam01_bank[0] and row.x <= point2_cam01_bank[0]:
        if row.y > (slope_cam01_bank * row.x + intercept_cam01_bank):
            return True        
    return False
        
df_yolo_predictions['under_the_bank'] = df_yolo_predictions.apply(under_the_bank_apply, axis=1)
print('How many prediction were under the bank?')
print(df_yolo_predictions['under_the_bank'].value_counts())
df_yolo_predictions = df_yolo_predictions[df_yolo_predictions.under_the_bank == False].copy()
print('YOLO predictions filtered data:')
print(df_yolo_predictions.groupby(['date', 'camera_id']).datetime.count())

# Evaluation precision recall of detected frame_ids

In [None]:
def calculate_iou(ground_truth:tuple, prediction:tuple):
    """
        Calculate intersection over union for two bounding boxes.
        Args:
            ground_truth: tuple of (x, y, w, h)
            prediction: tuple of (x, y, w, h)
    """
    gt_xtl = ground_truth[0]-ground_truth[2]/2
    gt_ytl = ground_truth[1]-ground_truth[3]/2
    gt_xbr = ground_truth[0]+ground_truth[2]/2
    gt_ybr = ground_truth[1]+ground_truth[3]/2
    pr_xtl = prediction[0]-prediction[2]/2
    pr_ytl = prediction[1]-prediction[3]/2
    pr_xbr = prediction[0]+prediction[2]/2
    pr_ybr = prediction[1]+prediction[3]/2
    intersection_xtl = max(gt_xtl, pr_xtl)
    intersection_ytl = max(gt_ytl, pr_ytl)
    intersection_xbr = min(gt_xbr, pr_xbr)
    intersection_ybr = min(gt_ybr, pr_ybr)
    intersection_area = max(0, intersection_xbr - intersection_xtl) * max(0, intersection_ybr - intersection_ytl)
    union_area = ground_truth[2] * ground_truth[3] + prediction[2] * prediction[3] - intersection_area
    return intersection_area / union_area

def evaluate_model(df_ground_truth, df_predictions):
    df_predictions_frame_indexed = df_predictions.reset_index().set_index(['filename', 'frame_id'])
    # group quality results by name, aggregate over frame_id and calculate true positive, false positive, false negative when comparing corresponding names and frame from both dataset
    evaluation_dict = dict()
    for id in tqdm.tqdm(set(df_ground_truth.index) | set(df_predictions.index)):
        evaluation_dict[id] = dict()

        if id in df_ground_truth.index:
            if df_ground_truth.loc[id,'frame_id'].size == 1:
                ground_truth_frame_ids = set([df_ground_truth.loc[id,'frame_id']])
            else:
                ground_truth_frame_ids = set(df_ground_truth.loc[id,'frame_id'])
        else:
            ground_truth_frame_ids = set()

        if id in df_predictions.index:
            if df_predictions.loc[id,'frame_id'].size == 1:
                yolo_frame_ids = set([df_predictions.loc[id,'frame_id']])
            else:
                yolo_frame_ids = set(df_predictions.loc[id,'frame_id'])
        else:
            yolo_frame_ids = set()
        
        corresponding_frames = ground_truth_frame_ids & yolo_frame_ids
        evaluation_dict[id]['true_positive'] = len(corresponding_frames)
        evaluation_dict[id]['false_positive'] = len(yolo_frame_ids - ground_truth_frame_ids)
        evaluation_dict[id]['false_negative'] = len(ground_truth_frame_ids - yolo_frame_ids)
        if len(corresponding_frames) > 0:
            frames_iou = {}
            for frame_id in corresponding_frames:
                # calulate iou for each frame
                ground_truth_frame = df_ground_truth.loc[id].loc[df_ground_truth.loc[id].frame_id == frame_id].iloc[0]
                # prediction_frame = df_predictions.loc[id].loc[df_predictions.loc[id].frame_id == frame_id].sort_values('w', ascending=False).iloc[0] ## this line was computation heavy, therefore indexed version is used
                prediciton_frames = df_predictions_frame_indexed.loc[id].loc[frame_id]
                if len(prediciton_frames.shape) == 1:
                    prediction_frame = prediciton_frames
                else:
                    prediction_frame = prediciton_frames.sort_values(['confidence', 'w'], ascending=False).iloc[0]
                frames_iou[frame_id] = calculate_iou(ground_truth_frame[['x', 'y', 'w', 'h']].values, prediction_frame[['x', 'y', 'w', 'h']].values)
            evaluation_dict[id]['iou'] = sum(frames_iou.values()) / len(frames_iou.values())
            evaluation_dict[id]['frames_iou'] = frames_iou
        else:
            evaluation_dict[id]['iou'] = 0
            evaluation_dict[id]['frames_iou'] = []

    df_evaluation = pd.DataFrame().from_dict(evaluation_dict, orient='index')
    df_evaluation['f1'] = 2 * df_evaluation['true_positive'] / (2 * df_evaluation['true_positive'] + df_evaluation['false_positive'] + df_evaluation['false_negative'])
    df_evaluation['recall'] = df_evaluation['true_positive'] / (df_evaluation['true_positive'] + df_evaluation['false_negative'])
    df_evaluation['precision'] = df_evaluation['true_positive'] / (df_evaluation['true_positive'] + df_evaluation['false_positive'])
    
    total_eval = df_evaluation[['true_positive','false_positive','false_negative']].sum(axis=0)
    total_evaluation_dict = {
        'f1': 2*total_eval['true_positive'] / (2*total_eval['true_positive'] + total_eval['false_positive'] + total_eval['false_negative']),
        'recall': total_eval['true_positive'] / (total_eval['true_positive'] + total_eval['false_negative']),
        'precision': total_eval['true_positive'] / (total_eval['true_positive'] + total_eval['false_positive']),
        'iou': (df_evaluation['true_positive']*df_evaluation['iou']).sum() / total_eval['true_positive']
    }
    return df_evaluation, total_evaluation_dict

In [None]:
df_evaluation, total_evaluation_dict = evaluate_model(df_ground_truth, df_yolo_predictions)
total_evaluation_dict

In [None]:
df_evaluation.to_csv('data_evaluation.csv', sep=';')

# Confidence values evaluation

In [None]:
confidence_threshold_evaluation_dict = dict()
for confidence_threshold in [0.25, 0.5, 0.75, 0.9]:
    print('Confidence threshold', confidence_threshold)
    df_evaluation, total_evaluation_dict = evaluate_model(df_ground_truth, df_yolo_predictions[df_yolo_predictions.confidence >= confidence_threshold])    
    print(total_evaluation_dict)
    # df_evaluation.to_csv(f'data_evaluation_conf{confidence_threshold}.csv', sep=';')
    confidence_threshold_evaluation_dict[confidence_threshold] = total_evaluation_dict

In [None]:
plt.plot(confidence_threshold_evaluation_dict.keys(), [total_evaluation_dict['recall'] for total_evaluation_dict in confidence_threshold_evaluation_dict.values()], 'x-')
plt.xlabel('Confidence threshold')
plt.ylabel('Recall')

In [None]:
plt.plot(confidence_threshold_evaluation_dict.keys(), [total_evaluation_dict['f1'] for total_evaluation_dict in confidence_threshold_evaluation_dict.values()], 'x-')
plt.xlabel('Confidence threshold')
plt.ylabel('F1')

In [None]:
plt.plot(confidence_threshold_evaluation_dict.keys(), [total_evaluation_dict['iou'] for total_evaluation_dict in confidence_threshold_evaluation_dict.values()], 'x-')
plt.xlabel('Confidence threshold')
plt.ylabel('IOU')

# mAP evaluation
- Use https://github.com/bes-dev/mean_average_precision.git for evaluation

In [None]:
# transform coords to xmin, ymin, xmax, ymax
df_ground_truth['xmin'] = (df_ground_truth['x'] - df_ground_truth['w']/2).astype(int)
df_ground_truth['ymin'] = (df_ground_truth['y'] - df_ground_truth['h']/2).astype(int)
df_ground_truth['xmax'] = (df_ground_truth['x'] + df_ground_truth['w']/2).astype(int)
df_ground_truth['ymax'] = (df_ground_truth['y'] + df_ground_truth['h']/2).astype(int)
df_yolo_predictions['xmin'] = (df_yolo_predictions['x'] - df_yolo_predictions['w']/2).astype(int)
df_yolo_predictions['ymin'] = (df_yolo_predictions['y'] - df_yolo_predictions['h']/2).astype(int)
df_yolo_predictions['xmax'] = (df_yolo_predictions['x'] + df_yolo_predictions['w']/2).astype(int)
df_yolo_predictions['ymax'] = (df_yolo_predictions['y'] + df_yolo_predictions['h']/2).astype(int)

## Firt evaluation
- Evaluation only for videos (10 minutes) with known ground truths and predictions
- Missing frames from ground truth or prediction is threated as wrong prediction


In [None]:
metric_fn = MetricBuilder.build_evaluation_metric("map_2d", async_mode=True, num_classes=1)

# for id in tqdm.tqdm(set(df_ground_truth.index) | set(df_yolo_predictions.index)):
for id in tqdm.tqdm(set(df_ground_truth.index) & set(df_yolo_predictions.index)):
    # print(id)
    # TODO there can be an error bcs id not in index
    df_ground_truth_filtered = df_ground_truth.loc[id].set_index('frame_id')
    df_yolo_predictions_filtered = df_yolo_predictions.loc[id].set_index('frame_id')

    # if id in df_ground_truth.index:
    #     if df_ground_truth.loc[id,'frame_id'].size == 1:
    #         ground_truth_frame_ids = set([df_ground_truth.loc[id,'frame_id']])
    #     else:
    #         ground_truth_frame_ids = set(df_ground_truth.loc[id,'frame_id'])
    # else:
    #     ground_truth_frame_ids = set()

    # if id in df_yolo_predictions.index:
    #     if df_yolo_predictions.loc[id,'frame_id'].size == 1:
    #         yolo_frame_ids = set([df_yolo_predictions.loc[id,'frame_id']])
    #     else:
    #         yolo_frame_ids = set(df_yolo_predictions.loc[id,'frame_id'])
    # else:
    #     yolo_frame_ids = set()

    for frame_id in list(df_ground_truth_filtered.index.union(df_yolo_predictions_filtered.index)):
        if frame_id in df_ground_truth_filtered.index:            
            gt = df_ground_truth_filtered.loc[frame_id][['xmin', 'ymin', 'xmax', 'ymax']].values
        else:
            gt = np.array([0, 0, 0, 0])
        if frame_id in df_yolo_predictions_filtered.index:
            preds = df_yolo_predictions_filtered.loc[frame_id][['xmin', 'ymin', 'xmax', 'ymax', 'confidence']].values
        else:
            preds = np.array([0, 0, 0, 0, 0])

        if len(gt.shape) == 1:
            gt = np.array([gt])
        if len(preds.shape) == 1:
            preds = np.array([preds])
        # gt [xmin, ymin, xmax, ymax, class_id, difficult, crowd]
        # preds [xmin, ymin, xmax, ymax, class_id, confidence]
        gt = np.concatenate((gt, np.zeros((gt.shape[0], 3))), axis=1)
        preds = np.concatenate((preds, np.zeros((preds.shape[0], 1))), axis=1)
        preds[:, -1] = preds[:, -2]
        preds[:, -2] = 0

        metric_fn.add(preds, gt)

# compute PASCAL VOC metric
print(f"VOC PASCAL mAP: {metric_fn.value(iou_thresholds=0.5, recall_thresholds=np.arange(0., 1.1, 0.1))['mAP']}")
# compute PASCAL VOC metric at the all points
print(f"VOC PASCAL mAP in all points: {metric_fn.value(iou_thresholds=0.5)['mAP']}")
# compute metric COCO metric
print(f"COCO mAP: {metric_fn.value(iou_thresholds=np.arange(0.5, 1.0, 0.05), recall_thresholds=np.arange(0., 1.01, 0.01), mpolicy='soft')['mAP']}")

## Evaluate only for ground truth frames - similar to finetuning procedure

In [None]:
metric_fn = MetricBuilder.build_evaluation_metric("map_2d", async_mode=True, num_classes=1)

for id in tqdm.tqdm(set(df_ground_truth.index) & set(df_yolo_predictions.index)):
    df_ground_truth_filtered = df_ground_truth.loc[id].set_index('frame_id')
    df_yolo_predictions_filtered = df_yolo_predictions.loc[id].set_index('frame_id')

    for frame_id in list(df_ground_truth_filtered.index):
        if frame_id in df_ground_truth_filtered.index:            
            gt = df_ground_truth_filtered.loc[frame_id][['xmin', 'ymin', 'xmax', 'ymax']].values
        else:
            gt = np.array([0, 0, 0, 0])
        if frame_id in df_yolo_predictions_filtered.index:
            preds = df_yolo_predictions_filtered.loc[frame_id][['xmin', 'ymin', 'xmax', 'ymax', 'confidence']].values
        else:
            preds = np.array([0, 0, 0, 0, 0])

        if len(gt.shape) == 1:
            gt = np.array([gt])
        if len(preds.shape) == 1:
            preds = np.array([preds])
        # gt [xmin, ymin, xmax, ymax, class_id, difficult, crowd]
        # preds [xmin, ymin, xmax, ymax, class_id, confidence]
        gt = np.concatenate((gt, np.zeros((gt.shape[0], 3))), axis=1)
        preds = np.concatenate((preds, np.zeros((preds.shape[0], 1))), axis=1)
        preds[:, -1] = preds[:, -2]
        preds[:, -2] = 0

        metric_fn.add(preds, gt)

# compute PASCAL VOC metric
print(f"VOC PASCAL mAP: {metric_fn.value(iou_thresholds=0.5, recall_thresholds=np.arange(0., 1.1, 0.1))['mAP']}")
# compute PASCAL VOC metric at the all points
print(f"VOC PASCAL mAP in all points: {metric_fn.value(iou_thresholds=0.5)['mAP']}")
# compute metric COCO metric
print(f"COCO mAP: {metric_fn.value(iou_thresholds=np.arange(0.5, 1.0, 0.05), recall_thresholds=np.arange(0., 1.01, 0.01), mpolicy='soft')['mAP']}")

## Evaluate only for ground truth frames - use only validation set

In [None]:
df_ground_truth['date'] = pd.to_datetime(df_ground_truth.datetime.dt.date, format='%Y-%m-%d')
df_ground_truth.shape, df_ground_truth[(df_ground_truth.date == '2023-07-07') | (df_ground_truth.date == '2023-07-08')].shape

In [None]:
metric_fn = MetricBuilder.build_evaluation_metric("map_2d", async_mode=True, num_classes=1)

for id in tqdm.tqdm(set(df_ground_truth[(df_ground_truth.date == '2023-07-07') | (df_ground_truth.date == '2023-07-08')].index) & set(df_yolo_predictions.index)):
    df_ground_truth_filtered = df_ground_truth.loc[id].set_index('frame_id')
    df_yolo_predictions_filtered = df_yolo_predictions.loc[id].set_index('frame_id')

    for frame_id in list(df_ground_truth_filtered.index):
        if frame_id in df_ground_truth_filtered.index:            
            gt = df_ground_truth_filtered.loc[frame_id][['xmin', 'ymin', 'xmax', 'ymax']].values
        else:
            gt = np.array([0, 0, 0, 0])
        if frame_id in df_yolo_predictions_filtered.index:
            preds = df_yolo_predictions_filtered.loc[frame_id][['xmin', 'ymin', 'xmax', 'ymax', 'confidence']].values
        else:
            preds = np.array([0, 0, 0, 0, 0])

        if len(gt.shape) == 1:
            gt = np.array([gt])
        if len(preds.shape) == 1:
            preds = np.array([preds])
        # gt [xmin, ymin, xmax, ymax, class_id, difficult, crowd]
        # preds [xmin, ymin, xmax, ymax, class_id, confidence]
        gt = np.concatenate((gt, np.zeros((gt.shape[0], 3))), axis=1)
        preds = np.concatenate((preds, np.zeros((preds.shape[0], 1))), axis=1)
        preds[:, -1] = preds[:, -2]
        preds[:, -2] = 0

        metric_fn.add(preds, gt)

# compute PASCAL VOC metric
print(f"VOC PASCAL mAP: {metric_fn.value(iou_thresholds=0.5, recall_thresholds=np.arange(0., 1.1, 0.1))['mAP']}")
# compute PASCAL VOC metric at the all points
print(f"VOC PASCAL mAP in all points: {metric_fn.value(iou_thresholds=0.5)['mAP']}")
# compute metric COCO metric
print(f"COCO mAP: {metric_fn.value(iou_thresholds=np.arange(0.5, 1.0, 0.05), recall_thresholds=np.arange(0., 1.01, 0.01), mpolicy='soft')['mAP']}")

# Helpers for debugging

In [None]:
## cfg_raw_cam_02_fhd_h265_20230609T173000 # there is a car identified as a boat outside of the field of view

In [None]:
def iqr(x):
    return np.percentile(x, 75) - np.percentile(x, 25)
# filter out static bounding box predictions; use iqr < 100 as a threshold and later shift of x coords are less than 10
df_tmp = df_yolo_predictions[df_yolo_predictions.camera_id == 2].reset_index()
df_tmp = df_tmp.groupby('filename').datetime.count()
df_tmp = df_tmp[df_tmp > 100]
df_tmp = df_yolo_predictions.loc[df_tmp.index].reset_index().groupby('filename').agg({'x': iqr, 'y': iqr})
df_static_indexes = df_tmp[(df_tmp.x < 100) & (df_tmp.y < 100)].index

# for each static filename, calculate the shift of x coords between frames and filter out those with shift > 10
for filename in list(df_static_indexes)[5:]:
    print(filename)
    df_tmp = df_yolo_predictions.loc[filename].groupby('frame_id').x.max().reset_index()
    # df_tmp = 
    df_tmp['x_shift'] = df_tmp['x'].diff().abs()
    print(df_tmp)
    # df_tmp = df_tmp[df_tmp['x_shift'] > 10]
    # print(df_tmp)
    # df_static_indexes = df_static_indexes.drop(filename)
    # df_static_indexes = df_static_indexes.append(df_tmp.index)

    break



# .agg({'x': lambda x: np.percentile(x, 75) - np.percentile(x, 25)})

In [None]:
# values for camera 2 bank identification.. this should be the water.. camera have a good horizontal angle, therefore check that bottom coord of bounding box is in upper area of the image y < less than straight line
x = 2 (0.001042), y = 378 (0.350000)
x = 1918 (0.998958), y = 445 (0.412037)

In [None]:
df_tmp.set_index('frame_id').to_csv('debug.csv')

In [None]:
df_tmp.loc['cfg_raw_cam_02_fhd_h265_20230707T202002.mkv']

In [None]:
(df_yolo_predictions.loc['cfg_raw_cam_02_fhd_h265_20230707T202002.mkv'].groupby('frame_id').x.max() - df_yolo_predictions.loc['cfg_raw_cam_02_fhd_h265_20230707T202002.mkv'].groupby('frame_id').x.max().shift()).rolling(10).mean().plot()

In [None]:
1597  /4/60

In [None]:
df_yolo_predictions.loc['cfg_raw_cam_02_fhd_h265_20230707T202002.mkv'][df_yolo_predictions.loc['cfg_raw_cam_02_fhd_h265_20230707T202002.mkv'].frame_id > 2250].sort_values('frame_id').head(30)

In [None]:
df_yolo_predictions.loc['cfg_raw_cam_02_fhd_h265_20230707T202002.mkv'].groupby('frame_id').x.max().sort_index().plot()

In [None]:
df_yolo_predictions.loc['cfg_raw_cam_02_fhd_h265_20230707T174000.mkv'].y.describe()


In [None]:
df_yolo_predictions.loc['cfg_raw_cam_02_fhd_h265_20230707T174000.mkv'].x.value_counts()

In [None]:
plt.scatter(df_evaluation.iloc[0].frames_iou.keys(), df_evaluation.iloc[0].frames_iou.values())

In [None]:
df_ground_truth.loc['cfg_raw_cam_01_fhd_h265_20230609T050002.mkv'].sort_values('frame_id')

In [None]:
df_ground_truth.loc['cfg_raw_cam_02_fhd_h265_20230707T124001.mkv'].sort_values('frame_id').frame_id.plot()

In [None]:
convert_frame_id = 1087
convert_frame_id / 4 / 60, convert_frame_id / 4 // 60, convert_frame_id / 4 % 60

In [None]:
df_tmp = df_yolo_predictions.loc['cfg_raw_cam_02_fhd_h265_20230707T124001.mkv']
df_tmp[df_tmp.x < 1800].sort_values('frame_id').frame_id.plot()