# Create Train/Val/Test Splits

Use this notebook to generate `data/MMFW-UAV/splits/*.json` from a dataset directory.


In [None]:
import os
import sys
from pathlib import Path

REPO_ROOT = Path('..').resolve()
if str(REPO_ROOT) not in sys.path:
    sys.path.append(str(REPO_ROOT))

from scripts.prepare_data import create_splits

print('✅ Imports ready')

## Configuration

In [None]:
data_root = REPO_ROOT / 'data' / 'MMFW-UAV' / 'raw'  # or 'sample'
output_dir = REPO_ROOT / 'data' / 'MMFW-UAV' / 'splits'
train_ratio = 0.8
val_ratio = 0.1
seed = 42

print(f'Data root: {data_root}')
print(f'Output dir: {output_dir}')
print(f'Train/Val ratios: {train_ratio}/{val_ratio}')

## Create Splits

In [None]:
splits = create_splits(
    data_root=str(data_root),
    output_dir=str(output_dir),
    train_ratio=train_ratio,
    val_ratio=val_ratio,
    seed=seed,
)

print('✅ Split summary:')
for name, split in splits.items():
    print(f"  {name}: {len(split['uav_types'])} UAV types")

## Verify Files

In [None]:
print('Files created:')
for path in sorted(Path(output_dir).glob('*.json')):
    print(f'  {path.name}')