1) Loads annotation and active-learning metadata.
2) Splits annotations into train/val.
3) Carves a prediction subset from the active-learning pool (size based on a percentage of the annotations count).
4) Prints split details and copies frames/JSONs to their target folders.

# Libraries

In [None]:
from src import (
    load_metadata,
    perform_split,
    print_details,
    copy_frames
)

import os

# Main

In [None]:
# Directories and filenames
source_dir = ""
output_dir = ""
al_dir = os.path.join(output_dir, "active_learning")
annotations_metadata_filename = "annotations_metadata.json"
aL_metadata_filename = "active_learning.json"

In [None]:
# Load metadata
annotations_metadata = load_metadata(
    source_dir=al_dir, 
    metadata_filename=annotations_metadata_filename
)

al_metadata = load_metadata(
    source_dir=output_dir, 
    metadata_filename=aL_metadata_filename
)

#### Split #1: train/val from annotations

In [None]:
split_ratio_annotation = 0.15  # 15% → validation

train_metadata, val_metadata = perform_split(
    metadata=annotations_metadata, 
    split_ratio=split_ratio_annotation
)

In [None]:
split_data_train = (
    ("train", "train.json"),
    (train_metadata, False),              # do not copy images here
    (source_dir, al_dir),
)
split_data_val = (
    ("val", "val.json"),
    (val_metadata, False),                # do not copy images here
    (source_dir, al_dir),
)

split_data_1 = [split_data_train, split_data_val]
print_details(
    metadata=annotations_metadata,
    split_data=split_data_1
)

#### Split B: prediction subset from active-learning pool

In [None]:
prediction_percentage = 30  # want #predict ~ 30% of annotation count
n_of_annotations = len(annotations_metadata)
n_of_al = len(al_metadata)


# Desired predict set size (relative to current active-learning pool)
desired_predict = int((prediction_percentage * n_of_annotations) / 100) if n_of_annotations > 0 else 0
split_ratio_predict = 0.0 if n_of_al == 0 else round(min(1.0, desired_predict / n_of_al), 3)

In [None]:
updated_al_metadata, predict_metadata = perform_split(
    metadata=al_metadata,
    split_ratio=split_ratio_predict
)

In [None]:
split_data_aL = (
    ("activeLearning", "activeLearning.json"),
    (updated_al_metadata, False),
    (source_dir, output_dir),
)

split_data_predict = (
    ("predict", "predict.json"),
    (predict_metadata, True),   # copy predict images into al_dir/predict
    (source_dir, al_dir),
)

split_data_2 = [split_data_aL, split_data_predict]

print_details(
    metadata=al_metadata, 
    split_data=split_data_2
)

In [None]:
# copy all splits
split_data = split_data_1 + split_data_2

for split in split_data:
    copy_frames(
        split=split
    )