# Testowanie dokładności na pełnym zbiorze dla różnych thresholds

## Funkcje przetwarzające dane

In [1]:
from validating_all_images import unpickle_data
import numpy as np

In [2]:
def reduce_by_threshold(image_row, **kwargs):
    threshold = kwargs['threshold']
    detections = np.array(image_row[2])
    if detections.ndim == 1:
        return (image_row[0], image_row[1], detections)
    
    detections_reduced = detections[np.where(detections[:, 3] > threshold)].astype('int')
    return (image_row[0], image_row[1], detections_reduced)

In [3]:
from haar_students_new import non_max_suppression

def bounding_boxes_reduction(image_row):
    detections = image_row[2]
    detections_reduced = non_max_suppression(detections, treshold=0.1)
    return (image_row[0], image_row[1], detections_reduced)

In [4]:
from haar_students_new import iou

def calculate_metrics(image_row):
    preds = image_row[2]
    reals = image_row[1]
    preds = [[j, k, j+w, k+w] for (k, j, w, _) in preds]
    
    p = len(reals)
    fn = 0
    # calculate false negatives
    for real in reals:
        found_prediction = False
        for pred in preds:
            if iou(real, pred) > 0.5:
                found_prediction = True
                break
        if not found_prediction:
            fn += 1
    
    tp = 0
    fp = 0
    # calculate true positives and false positives
    for pred in preds:
        found_real = False
        for real in reals:
            if iou(real, pred) > 0.5:
                found_real = True
                break
        if found_real:
            tp += 1
        else:
            fp += 1
                
    
    return tp, fp, fn, p

## Przeprowadzenie badań na całym zbiorze

In [5]:
import tqdm 

coords = unpickle_data("combined_coords_greater_neg3.pickle")
coords = np.array(coords, dtype=object)

summary = {}

for threshold in tqdm.tqdm(np.arange(0, 6.5, 0.5), desc=" thresholds"):
# for threshold in np.arange(-3, 6, 0.5):
    coords_filtered = np.apply_along_axis(reduce_by_threshold, 1, coords, threshold=threshold)
    coords_suppresion = np.apply_along_axis(bounding_boxes_reduction, 1, coords_filtered)
    metrics = np.array([calculate_metrics(image_row) for image_row in coords_suppresion])
    num_of_images = metrics.shape[0]
    metrics_summary = metrics.sum(axis=0)
    tp, fp, fn, p = metrics_summary
    sensitivity = 1 - fn/p
    far = fp/num_of_images
    summary[threshold] = (sensitivity, far, tp, fp, fn, p)

  return array(a, dtype, copy=False, order=order, subok=True)
 thresholds: 100%|██████████████████████████████████████████████| 13/13 [04:20<00:00, 20.05s/it]


In [8]:
import pandas as pd
df = pd.DataFrame.from_dict(summary, orient='index', columns=['sensitivity', 'far', 'tp', 'fp', 'fn', 'p'])
df.to_csv('metrics_summary.csv')

In [9]:
df

Unnamed: 0,sensitivity,far,tp,fp,fn,p
0.0,0.55425,9.872759,2594,28088,2087,4682
0.5,0.552328,7.563445,2585,21518,2096,4682
1.0,0.545707,5.455536,2554,15521,2127,4682
1.5,0.539513,3.608084,2525,10265,2156,4682
2.0,0.520718,2.181019,2437,6205,2244,4682
2.5,0.496796,1.136028,2325,3232,2356,4682
3.0,0.444041,0.548682,2078,1561,2603,4682
3.5,0.385733,0.229877,1806,654,2876,4682
4.0,0.306707,0.087873,1436,250,3246,4682
4.5,0.226399,0.029525,1060,84,3622,4682


## Przeprowadzenie badań na testowym zbiorze okno 32

In [11]:
import tqdm 

coords = unpickle_data("combined_coords_10th_fold_32.pickle")
coords = np.array(coords, dtype=object)

summary = {}

for threshold in tqdm.tqdm(np.arange(0, 6.5, 0.5), desc=" thresholds"):
# for threshold in np.arange(-3, 6, 0.5):
    coords_filtered = np.apply_along_axis(reduce_by_threshold, 1, coords, threshold=threshold)
    coords_suppresion = np.apply_along_axis(bounding_boxes_reduction, 1, coords_filtered)
    metrics = np.array([calculate_metrics(image_row) for image_row in coords_suppresion])
    num_of_images = metrics.shape[0]
    metrics_summary = metrics.sum(axis=0)
    tp, fp, fn, p = metrics_summary
    sensitivity = 1 - fn/p
    far = fp/num_of_images
    summary[threshold] = (sensitivity, far, tp, fp, fn, p)

  return array(a, dtype, copy=False, order=order, subok=True)
 thresholds: 100%|██████████████████████████████████████████████| 13/13 [01:12<00:00,  5.55s/it]


In [12]:
import pandas as pd
df = pd.DataFrame.from_dict(summary, orient='index', columns=['sensitivity', 'far', 'tp', 'fp', 'fn', 'p'])
df.to_csv('metrics_summary_10th.csv')

In [13]:
df

Unnamed: 0,sensitivity,far,tp,fp,fn,p
0.0,0.104478,33.132143,49,9277,420,469
0.5,0.10661,24.025,50,6727,419,469
1.0,0.102345,15.946429,48,4465,421,469
1.5,0.095949,9.607143,45,2690,424,469
2.0,0.091684,5.071429,43,1420,426,469
2.5,0.085288,2.382143,40,667,429,469
3.0,0.055437,0.989286,26,277,443,469
3.5,0.049041,0.357143,23,100,446,469
4.0,0.029851,0.178571,14,50,455,469
4.5,0.027719,0.05,13,14,456,469
