In [1]:
import os
import glob
import numpy as np

# --- 1) Paths ----------------------------------------------------------------
test_dir   = '/home/ml/Documents/voice/ResData/wavefake128_2048split/train'
fake_dir   = os.path.join(test_dir, 'fake')
real_file  = os.path.join(test_dir, 'real.npy')
out_dir    = os.path.join(test_dir, 'aggregated')

os.makedirs(out_dir, exist_ok=True)

# --- 2) Load & save real ----------------------------------------------------
real = np.load(real_file)
N_real = real.shape[0]
np.save(os.path.join(out_dir, 'real.npy'), real)
print(f"→ Saved real.npy with {N_real} samples")

# --- 3) Gather fake file list -----------------------------------------------
fake_paths = sorted(glob.glob(os.path.join(fake_dir, '*.npy')))
K = len(fake_paths)
if K == 0:
    raise RuntimeError("No .npy files found in test/fake/")

# Compute per-file quotas so sum(quota_i) == N_real and |quota_i - quota_j| ≤ 1
base      = N_real // K
remainder = N_real % K   # first `remainder` files get one extra sample

np.random.seed(42)       # for reproducibility—optional
fake_parts = []
for i, path in enumerate(fake_paths):
    data = np.load(path)
    Ni   = data.shape[0]
    qi   = base + (1 if i < remainder else 0)
    if qi > Ni:
        raise ValueError(
            f"File {os.path.basename(path)} has only {Ni} samples, "
            f"but {qi} were requested"
        )
    # randomly pick qi samples
    idx = np.random.permutation(Ni)[:qi]
    fake_parts.append(data[idx])
    print(f"  • {os.path.basename(path)} → selected {qi}/{Ni}")

# --- 4) Concatenate & save fake ---------------------------------------------
fake_agg = np.concatenate(fake_parts, axis=0)
assert fake_agg.shape[0] == N_real, \
       f"Total fake ({fake_agg.shape[0]}) ≠ real ({N_real})"

np.save(os.path.join(out_dir, 'fake.npy'), fake_agg)
print(f"→ Saved fake.npy with {fake_agg.shape[0]} samples")


→ Saved real.npy with 18320 samples
  • ljspeech_full_band_melgan.npy → selected 2618/18337
  • ljspeech_hifiGAN.npy → selected 2617/18297
  • ljspeech_melgan.npy → selected 2617/18337
  • ljspeech_melgan_large.npy → selected 2617/18337
  • ljspeech_multi_band_melgan.npy → selected 2617/18337
  • ljspeech_parallel_wavegan.npy → selected 2617/18337
  • ljspeech_waveglow.npy → selected 2617/18337
→ Saved fake.npy with 18320 samples
