This script takes the Mcity **midadvrb_2000** dataset and creates new splits:

- The train split does not include any pedestrians.
- The val split includes only frames with pedestrians

With the new **midadvrb_ano_ped** dataset we can learn a normality without pedestrians and treat them as outliers during validation. This way we might be able to detect new images with pedestrians.

In [None]:
%load_ext autoreload
%autoreload 2

import sys
import os.path

sys.path.append("..")

from config import SELECTED_DATASET
from data_loader.data_loader import *

import fiftyone as fo

from utils.selector import select_by_class

In [None]:
dataset_info = load_dataset_info(
    SELECTED_DATASET,
    config_path="/home/dbogdoll/mcity_data_engine/datasets/datasets.yaml",
)
loader_function = dataset_info.get("loader_fct")
dataset = globals()[loader_function](dataset_info)
dataset.persistent = False

In [None]:
view_train = select_by_class(
    dataset, classes_in=[], classes_out=["pedestrian"]
)  # Build training dataset (no pedestrians)

view_val = select_by_class(
    dataset, classes_in=["pedestrian"], classes_out=[]
)  # Build validation dataset (1-n pedestrians in each frame)

In [None]:
# https://github.com/voxel51/fiftyone/issues/1952

export_dir = "/home/dbogdoll/mcity_data_engine/datasets/midadvrb_ano_ped"
label_field = "ground_truth"

classes = dataset.distinct(
    "ground_truth.detections.label"
)  # Sorted list of all observed labels in a given field

view_train.export(
    export_dir=export_dir,
    dataset_type=fo.types.YOLOv5Dataset,
    label_field=label_field,
    split="train",
    classes=classes,
)

view_val.export(
    export_dir=export_dir,
    dataset_type=fo.types.YOLOv5Dataset,
    label_field=label_field,
    split="val",
    classes=classes,
)

In [None]:
session = fo.launch_app(dataset, view=view_train)
session.wait(-1)