# Create annotator directories

Splits up examples into directories, 1 for each evaluator. Ensuring 1 sample overlap between neighbors so IAA can be calculated

In [None]:
import math
import shutil
from pathlib import Path
from random import Random

In [None]:
OUTPUT_DIR = Path.cwd() / "outputs"
DATA_DIR = OUTPUT_DIR / "human_readable_responses"
ANNOTATOR_DIR = OUTPUT_DIR / "annotators"

NUM_ANNOTATORS = 15
NUM_UNIQUE_SAMPLES = 4
NUM_SHARED_SAMPLES = 1
RANDOM_SEED = 23

In [None]:
ANNOTATOR_DIR.mkdir(exist_ok=True)

In [None]:
hadm_id_dirs = list(DATA_DIR.iterdir())
Random(RANDOM_SEED).shuffle(hadm_id_dirs)
len(hadm_id_dirs)

In [None]:
num_shared_dirs = math.ceil(NUM_SHARED_SAMPLES * NUM_ANNOTATORS / 2)
shared_dirs = [dir for dir in hadm_id_dirs[:num_shared_dirs] for _ in range(2)]
unique_dirs = [
    hadm_id_dirs[idx : idx + NUM_UNIQUE_SAMPLES]
    for idx in range(num_shared_dirs, len(hadm_id_dirs), NUM_UNIQUE_SAMPLES)
]

In [None]:
for annotator_idx, (annotator_shared_dir, annotator_unique_dirs) in enumerate(
    zip(shared_dirs, unique_dirs)
):
    annotator_dir = ANNOTATOR_DIR / f"annotator_{annotator_idx+1}"
    annotator_dir.mkdir(exist_ok=True)

    for dir in [annotator_shared_dir] + annotator_unique_dirs:
        shutil.copytree(dir, annotator_dir / dir.stem, dirs_exist_ok=True)