In [None]:
from fiftyone import ViewField as F
import fiftyone as fo
import numpy as np
from tqdm import tqdm

dataset = fo.load_dataset("mcity_fisheye_3_months")

In [None]:
try:
    dataset.delete_sample_field("bbox_area_abs")
    dataset.delete_sample_field("bbox_area_rel")
except:
    pass

In [None]:
bbox_areas = []
pred_fields = []

#Get fields of zero shot model predictions
dataset_schema = dataset.get_field_schema()
for field in dataset_schema:
    if "pred_" in field:
        pred_fields.append(field)

# Filter data, iterate over detections, and compute bounding box areas
for field in pred_fields:
    print(field)
    view = (
        dataset
        .exists(field)
        .match(F(f"{field}.detections") != [])
    )
    samples_detections = view.values(field + ".detections") # list of lists, one list of detections per sample
    for detections in tqdm(samples_detections):
        for detection in detections:
            bbox_area = float(detection["bounding_box"][2] * detection["bounding_box"][3])
            if (isinstance(bbox_area, (int, float)) and 0 <= bbox_area <= 1) == False:
                print(bbox_area)
            detection["bbox_area"] = bbox_area
            bbox_areas.append(bbox_area)

    # Save the updated detections
    view.set_values(field + ".detections", samples_detections)    

print(len(bbox_areas))

In [None]:
for sample in tqdm(dataset):
    #print(sample)
    for detection in sample[pred_fields[0]].detections:
        print(detection.bbox_area)


In [None]:
bbox_areas = []

field = pred_fields[0]
print(field)
view = (
    dataset
    .exists(field)
    .match(F(f"{field}.detections") != [])
)
samples_detections = view.values(field + ".detections") # list of lists, one list of detections per sample
for detections in tqdm(samples_detections):
    for detection in detections:
        #print(detection)
        bbox_areas.append(detection["bbox_area"])

for area in bbox_areas:
    if area < 0 or area > 1:
        print(area)

print(len(bbox_areas))

In [None]:
# Compute bins
num_bins = 10
quantiles = np.linspace(0, 1, num_bins + 1)
quantile_bin_edges = np.quantile(bbox_areas, quantiles)
upper_bin_limits = list(quantile_bin_edges[1:-1]) + [np.float64(1)]
print(upper_bin_limits)

In [None]:
# Compute bins
num_bins = 10
quantiles = np.linspace(0, 1, num_bins + 1)
quantile_bin_edges = np.quantile(bbox_areas, quantiles)
upper_bin_limits = list(quantile_bin_edges[1:-1]) + [np.float64(1)]
print(upper_bin_limits)

In [None]:
# Assign bin label tags to detections
for field in pred_fields:
    print(field)
    view = (
        dataset
        .exists(field)
        .match(F(f"{field}.detections") != [])
    )
    samples_detections = view.values(field + ".detections") # list of lists, one list of detections per sample
    for detections in tqdm(samples_detections):
        for detection in detections:
            bin_index = np.digitize(
                    detection["bbox_area"], upper_bin_limits, right=True
                )
            bin_upper_limit = "{:.4f}".format(upper_bin_limits[bin_index])
            bin_name = "bbox_size_bin_" + str(bin_index) + "_" + bin_upper_limit
            detection.tags.append(bin_name)

    # Save the assigned bin tag
    view.set_values(field + ".detections", samples_detections)    

In [None]:
#for sample in dataset.iter_samples(progress=True, autosave=True, batch_size=1000):
#    for field in sample.iter_fields():
#        if "pred_" in field[0]:
#            for detection in field[1]["detections"]:
#                bin_index = np.digitize(
#                    detection["bbox_area"], upper_bin_limits, right=True
#                )
#                bin_upper_limit = "{:.4f}".format(upper_bin_limits[bin_index])
#                bin_name = "bin_" + str(bin_index) + "_" + bin_upper_limit
#                detection["bbox_area_bin"] = bin_name