In [2]:
import json
import random
from datetime import datetime
from pathlib import Path

from constants import SEED, SYNTHETIC_DATA_DIR, SYNTHETIC_DATA_FILE, TRAIN_TEST_SPLIT_RATIO
from type import SyntheticSamplesAnnotated

DATA_FOLDER = Path.cwd().parent / "data"
SAMPLES_DIR = DATA_FOLDER / "synthetic_samples"
now = ""# datetime.now().strftime("%Y%m%d_%H%M%S")
OUTPUT_SAMPLES_FILE = SYNTHETIC_DATA_DIR / f"{now}_annotated_synthetic_samples.json"
OUTPUT_SAMPLES_TRAIN_FILE = SYNTHETIC_DATA_DIR / f"{now}_annotated_synthetic_samples_train.json"
OUTPUT_SAMPLES_TEST_FILE = SYNTHETIC_DATA_DIR / f"{now}_annotated_synthetic_samples_test.json"


with (SAMPLES_DIR / "annotated_synthetic_samples.json").open("r") as fp:
    fc = json.load(fp)


random.shuffle(fc["samples"])

annotated_synthetic_samples = SyntheticSamplesAnnotated(**fc)
annotated_samples = annotated_synthetic_samples.samples

categories = list(set(sample.category for sample in annotated_samples))
random.shuffle(categories)
train_categories = categories[: int(TRAIN_TEST_SPLIT_RATIO * len(categories))]
test_categories = categories[int(TRAIN_TEST_SPLIT_RATIO * len(categories)) :]
train_samples = [sample for sample in annotated_samples if sample.category in train_categories]
test_samples = [sample for sample in annotated_samples if sample.category in test_categories]

# Save annotated samples
with OUTPUT_SAMPLES_FILE.open("w", encoding="utf-8") as f:
    json.dump(annotated_synthetic_samples.model_dump(), f, indent=4, ensure_ascii=False)

with OUTPUT_SAMPLES_TRAIN_FILE.open("w", encoding="utf-8") as f:
    json.dump(SyntheticSamplesAnnotated(samples=train_samples).model_dump(), f, indent=4, ensure_ascii=False)
with OUTPUT_SAMPLES_TEST_FILE.open("w", encoding="utf-8") as f:
    json.dump(SyntheticSamplesAnnotated(samples=test_samples).model_dump(), f, indent=4, ensure_ascii=False)



In [3]:
from time import perf_counter, sleep

start = perf_counter()
sleep(1.4)
end = perf_counter()
print(f"Elapsed time: {end - start} seconds")

Elapsed time: 1.4002243390004878 seconds
