In [10]:
import os
import torch.nn.functional as F
import torchaudio
import numpy as np
import Preprocessor as pp
from tqdm import tqdm
import glob

In [11]:
DATASET = "./Dataset"
OUTPUT = "./calibration"

SAMPLE_RATE = 16000
DURATION = 1
NUM_SAMPLES = SAMPLE_RATE * DURATION
WINDOW_SIZE = 512
HOP_SIZE = 160
MEL_BINS = 64
FMIN = 50
FMAX = 8000

In [12]:
def logmel_transform(file_path):
    waveform, sr = torchaudio.load(file_path)
    # print(waveform.shape) # torch.Size([1, 16000])
    if waveform.shape[0] > 1:
        waveform = waveform.mean(dim=0, keepdim=True)

    if sr != SAMPLE_RATE:
        resampler = torchaudio.transforms.Resample(sr, SAMPLE_RATE)
        waveform = resampler(waveform)

    if waveform.shape[1] < NUM_SAMPLES:
        waveform = F.pad(waveform, (0, NUM_SAMPLES - waveform.shape[1]))
    else:
        waveform = waveform[:, :NUM_SAMPLES]

    waveform = waveform / (waveform.abs().max() + 1e-9)

    logmel = pp.logmel(waveform)
    logmel = logmel.unsqueeze(0)
    # print(logmel.shape)  # torch.Size([1, 1, 64, 101])

    if logmel.shape[3] < 101:
        logmel = F.pad(logmel, (0, 101 - logmel.shape[2]))

    elif logmel.shape[3] > 101:
        logmel = logmel[:, :, :101]

    return logmel.numpy()

In [16]:
filepaths = []

for root, _, files in os.walk(DATASET):
    
    for file in files:
        if file.lower().endswith(".wav"):
            filepaths.append(os.path.join(root, file))

print(len(filepaths))

3000


In [17]:
logmel = logmel_transform(filepaths[0])
print(logmel.shape)
print(type(logmel))

(1, 1, 64, 101)
<class 'numpy.ndarray'>


In [18]:
os.makedirs(OUTPUT, exist_ok=True)
for path in tqdm(filepaths, desc="file convert", leave=True):
    file = path.split("\\")[-1]
    # print(file)
    data = logmel_transform(path)
    # print(data.shape)
    output = os.path.join(OUTPUT, file.replace(".wav", ".npy"))
    # print(output)
    np.save(output, data)

file convert: 100%|██████████| 3000/3000 [00:16<00:00, 179.28it/s]


In [19]:
file_list = sorted(glob.glob('./calibration/*.npy'))
arrays = [np.load(f) for f in file_list]
for i in range(len(arrays)):
    print(arrays[i].shape)

(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 64, 101)
(1, 1, 6

In [83]:
calib_dataset = np.concatenate(arrays, axis=0)
min_val = calib_dataset.min()
max_val = calib_dataset.max()
print(calib_dataset.shape)
print(min_val, max_val)

(3000, 1, 64, 101)
-23.02585 5.632151


In [85]:
calib_dataset_nom = (calib_dataset- min_val) / (max_val - min_val) * 255

In [86]:
np.save("./calib_dataset_float32.npy", calib_dataset_nom)

In [87]:
cal_float = np.load("./calib_dataset_float32.npy")
cf_min = cal_float.min()
cf_max = cal_float.max()
print(cal_float.shape, cal_float.dtype)
print(cf_min, cf_max)

(3000, 1, 64, 101) float32
0.0 255.0


In [88]:
cal_int = (cal_float - 128).astype(np.uint8)
ci_min = cal_int.min()
ci_max = cal_int.max()
print(cal_int.shape, cal_int.dtype)
print(ci_min, ci_max)

(3000, 1, 64, 101) uint8
0 255


In [89]:
np.save("./calib_dataset_uint8.npy", cal_int)

In [90]:
cal = np.load("./calib_dataset_uint8.npy")
cal_min = cal.min()
cal_max = cal.max()
print(cal.shape, cal.dtype)
print(cal_min, cal_max)

(3000, 1, 64, 101) uint8
0 255
