# Dataset Labeling

This is a **manual workflow**. Once a dataset has been populated with the results from the **Ensemble Exploration** workflow, this workflow can be used to select the samples to be labeled. Here you manually select the final images to be labeled. These will be exported and are ready to be labeled in the second part of the notebbok, where CVAT is used for that purpose.


In [None]:
import sys

sys.path.append("..")

import os
import logging
from tqdm import tqdm

import fiftyone as fo
import fiftyone.utils.cvat as fouc

from main import configure_logging
from utils.selector import (
    generate_view_embedding_selection,
    generate_view_anomaly_detection_selection,
    generate_view_ensemble_selection,
)

configure_logging()

In [None]:
# Prepare connection to CVAT
# Add your credentials to the .secret file. Follow these instructions: https://docs.voxel51.com/integrations/cvat.html#authentication")

try:
    with open("../.secret", "r") as file:
        for line in file:
            key, value = line.strip().split("=")
            if key in [
                "FIFTYONE_CVAT_USERNAME",
                "FIFTYONE_CVAT_PASSWORD",
                "FIFTYONE_CVAT_EMAIL",
            ]:
                logging.info(f"Key {key} found")
                os.environ[key] = value
except Exception as e:
    logging.error(f"An error occured: {e}")

In [None]:
# Load dataset
dataset_name = "fisheye8k"  # Dataset to load
dataset = fo.load_dataset(dataset_name)

In [None]:
# Field and tags names used in Ensemble Selection
ensemble_selection_field = "n_unique_ensemble_selection"
ensemble_selection_tag = "detections_overlap"

In [None]:
# Field and tags names used in Ensemble Selection
ensemble_selection_field = "n_unique_ensemble_selection"
ensemble_selection_tag = "detections_overlap"

In [None]:
# Shows only labels that belong to the unique detections
# Key 'apply_filter': Whether to utilize the workflow output for the selection
# Key 'function': Function to filter and return a view

configuration = {
    "embedding_selection": {
        "apply_filter": True,
        "function": generate_view_embedding_selection,
        "min_selection_count": 1,  # How often a sample was selected
    },
    "anomaly_detection": {
        "apply_filter": True,
        "function": generate_view_anomaly_detection_selection,
        "model": "Padim",  # Model used for mask generation
        "min_anomaly_score": 0.5,  # Anomaly score of the model
    },
    "ensemble_selection": {
        "apply_filter": True,
        "function": generate_view_ensemble_selection,
        "min_n_unique_selection": 5,  # Number of instances per frame
    },
}

# Currently supported workflows to filter data
view = dataset.view()
for workflow_name, workflow_config in tqdm(
    configuration.items(), desc="Filtering samples"
):
    if workflow_config.get("apply_filter", False) is True:
        samples_in = len(view)
        view = workflow_config["function"](view, workflow_config)
        samples_out = len(view)
        logging.info(
            f"Reduced {samples_in} samples to {samples_out} with workflow {workflow_name}"
        )
        if len(view) == 0:
            logging.error("Filter settings were too strict. No samples left.")

In [None]:
# Use this view to label the samples you want to have labeled
# Choose the tag_for_labeling to tag the samples you want to label
tag_for_labeling = "todo_labeling"
fo.launch_app(view=view)

In [None]:
# Inspect your selection of samples to be labeled
view_selection = dataset.match_tags(tag_for_labeling)
if len(view_selection) == 0:
    logging.error(
        f"No samples selected for labeling. Please tag samples with tag '{tag_for_labeling}'"
    )
print(view_selection)

In [None]:
# Export the dataset you want to have labeled
# This example uses the YOLOv5Dataset format which also exports labels
# https://docs.voxel51.com/user_guide/export_datasets.html#yolov5dataset

splits = ["train", "val"]  # Select all the splits your dataset has
export_dir = "/media/dbogdoll/Datasets/vru_labeling"  # Directory to export the files to
label_field = "pred_google_owlv2_large_patch14_finetuned"  # Include a prediction field from one of the zero-shot models

classes = view_selection.distinct(
    f"{label_field}.detections.label"
)  # Get all the classes from the prediction field

# Export the splits
for split in splits:
    split_view = view_selection.match_tags(split)
    split_view.export(
        export_dir=export_dir,
        dataset_type=fo.types.YOLOv5Dataset,
        label_field=label_field,
        split=split,
        classes=classes,
    )

In [None]:
# If you want to label the dataset in-house, this example shows how to use CVAT. Voxel51 also has other integrations
# You will only need a CVAT account and set the environment variables at the top of the notebook
# https://docs.voxel51.com/integrations/cvat.html

classes_for_labeling = [
    "motorbike/cycler",
    "pedestrian",
]  # ["car", "truck", "bus", "trailer", "motorbike/cycler", "pedestrian", "van", "pickup"] Classes from Mcity Fisheye dataset
label_schema = {
    "new_ground_truth": {
        "type": "polylines",
        "classes": classes_for_labeling,
    },
}

anno_key = "cvat"
# if view_selection.has_annotation_runs:
#    view_selection.delete_annotation_run(anno_key)

In [None]:
view_selection.annotate(
    anno_key,
    backend="cvat",
    label_schema=label_schema,
    launch_editor=True,
)

In [None]:
view_selection.list_annotation_runs()
info = dataset.get_annotation_info(anno_key)
print(info)

In [None]:
# Once you have annotated the dataset, you can import the labels back into FiftyOne
dataset.load_annotations(anno_key)
view_annotations = dataset.load_annotation_view(anno_key)
fo.launch_app(view=view_annotations)

In [None]:
# Import a labeled dataset from CVAT if the annotation_key was deleted

cvat_project_name = "FiftyOne_mcity_fisheye_3_months"

dataset_labeled = fo.Dataset()
fouc.import_annotations(
    dataset,
    project_name=cvat_project_name,
    data_path="/tmp/cvat_import",
    download_media=True,
)

session = fo.launch_app(dataset_labeled)

In [None]:
# If dataset got extended, merge into existing dataset
# Add tags prior to merging
# for sample in dataset_labeled:
#    sample.tags = ["addition_2025_01_13","train"]
#    sample.save()
# dataset_exitsting.merge_samples(dataset_labeled)

In [None]:
# Cleanup

# Delete tasks from CVAT
# results = dataset.load_annotation_results(anno_key)
# results.cleanup()

# Delete run record (not the labels) from FiftyOne
# dataset.delete_annotation_run(anno_key)