In [None]:
# Dataset/Test_Train/
#     ├── test_X.npy
#     ├── test_y.npy
#     ├── signals_train.npy      # 90 % of the data, shuffled
#     └── labels_train.npy



#!/usr/bin/env python3
"""
split_train_test.py: split DeepSig RadioML 2018.01A into
a stratified 90 % train set and 10 % test set.

Run:  python split_train_test.py
"""

import numpy as np
import os
from pathlib import Path
from sklearn.model_selection import train_test_split

# Configuration
ROOT         = Path('/Users/.../Dataset')   # folder with signals.npy + labels.npy
TEST_RATIO   = 0.10        # 10 % hold-out
OUT_DIR      = ROOT / "Test_Train"
OUT_DIR.mkdir(exist_ok=True)

print("Loading mmap’ed arrays …")
X = np.load(ROOT / "signals.npy", mmap_mode='r')       # shape (2 555 904, 1024, 2)
y = np.load(ROOT / "labels.npy",  mmap_mode='r')       # shape (2 555 904, 24 one-hot)

# Derive integer class labels for stratification
class_ids = np.argmax(y, axis=1).astype(np.int16)

print("Splitting with stratification …")
train_idx, test_idx = train_test_split(
        np.arange(len(X)),
        test_size=TEST_RATIO,
        random_state=42,
        stratify=class_ids
)

#  helper to write contiguous blocks to disk 
def save_subset(name, idx):
    """Save X[idx], y[idx] contiguous on disk."""
    print(f"   writing {name}  ({len(idx):,} samples)")
    sub_X = np.asarray(X[idx])      # materialise contiguous in RAM
    sub_y = np.asarray(y[idx])
    np.save(OUT_DIR / f"{name}_X.npy", sub_X)
    np.save(OUT_DIR / f"{name}_y.npy", sub_y)
    del sub_X, sub_y                # free memory

save_subset("test",   test_idx)
save_subset("signals_train", train_idx)   # signals_train_X.npy / _y.npy

print("\nDone!  Train/Test sets are in", OUT_DIR)

Loading mmap’ed arrays …
Splitting with stratification …
   writing test  (255,591 samples)
   writing signals_train  (2,300,313 samples)

Done!  Train/Test sets are in /Users/saurabhtiwari/Library/CloudStorage/OneDrive-TrinityCollege/Tinity/Summer 2025/RF Signal Classification/Dataset/Test_Train
