In [None]:
# Dataset/Test_Train/chunks/
#     ├── signals_chunk0.npy
#     ├── labels_chunk0.npy
#     ├── signals_chunk1.npy
#     ├── labels_chunk1.npy
#     └── …


#!/usr/bin/env python3
"""
chunk_train_set.py: split the 90 % training set into ~1 GB chunks.
Run after split_train_test.py.
"""

import numpy as np
import os
from pathlib import Path

# Setup
TRAIN_DIR  = Path('/Users/.../Dataset')
CHUNK_DIR  = TRAIN_DIR / "chunks"
CHUNK_DIR.mkdir(exist_ok=True)
CHUNK_SIZE = 125_000          # ~1 GB with (1024,2) float32+label

sig_path = TRAIN_DIR / "signals_train_X.npy"
lab_path = TRAIN_DIR / "signals_train_y.npy"

print("Loading training mmap…")
X = np.load(sig_path, mmap_mode='r')
y = np.load(lab_path, mmap_mode='r')

assert len(X) == len(y)
n_total   = len(X)
n_chunks  = (n_total + CHUNK_SIZE - 1) // CHUNK_SIZE
print(f"Total train samples: {n_total:,} {n_chunks} chunks")

for i in range(n_chunks):
    s, e = i*CHUNK_SIZE, min((i+1)*CHUNK_SIZE, n_total)
    print(f"  Saving chunk {i}  [{s}:{e}] ({e-s:,} samples)")
    np.save(CHUNK_DIR / f"signals_chunk{i}.npy", np.asarray(X[s:e]))
    np.save(CHUNK_DIR / f"labels_chunk{i}.npy",  np.asarray(y[s:e]))

print("\nAll chunks written to", CHUNK_DIR)


Loading training mmap…
Total train samples: 2,300,313 → 19 chunks
  Saving chunk 0  [0:125000] (125,000 samples)
  Saving chunk 1  [125000:250000] (125,000 samples)
  Saving chunk 2  [250000:375000] (125,000 samples)
  Saving chunk 3  [375000:500000] (125,000 samples)
  Saving chunk 4  [500000:625000] (125,000 samples)
  Saving chunk 5  [625000:750000] (125,000 samples)
  Saving chunk 6  [750000:875000] (125,000 samples)
  Saving chunk 7  [875000:1000000] (125,000 samples)
  Saving chunk 8  [1000000:1125000] (125,000 samples)
  Saving chunk 9  [1125000:1250000] (125,000 samples)
  Saving chunk 10  [1250000:1375000] (125,000 samples)
  Saving chunk 11  [1375000:1500000] (125,000 samples)
  Saving chunk 12  [1500000:1625000] (125,000 samples)
  Saving chunk 13  [1625000:1750000] (125,000 samples)
  Saving chunk 14  [1750000:1875000] (125,000 samples)
  Saving chunk 15  [1875000:2000000] (125,000 samples)
  Saving chunk 16  [2000000:2125000] (125,000 samples)
  Saving chunk 17  [2125000:2