In [1]:
%reload_ext autoreload
%autoreload all

%aimport -torch
%aimport -matplotlib
%aimport -seaborn
%aimport -numpy
%aimport -pandas
%aimport -scipy

In [2]:
from dataset import CHBMITDataset

data_path = "./CHB-MIT/processed"
dataset = CHBMITDataset(data_path)

In [3]:
import torch

from utils.preprocess import VectorizeSTFT, normalize


def preprocess_data(data: torch.Tensor):
    stft_data = VectorizeSTFT(data)
    magnitude = torch.abs(stft_data)
    normalized_data = normalize(magnitude)

    # Reshape to 1D
    return normalized_data

In [4]:
chunk_size = 500
outputs = []

for start in range(0, dataset.data.shape[0], chunk_size):
    end = min(start + chunk_size, dataset.data.shape[0])
    chunk = dataset.data[start:end, ...].to(device="cuda")

    chunk_processed = preprocess_data(chunk)
    outputs.append(chunk_processed.cpu())
    del chunk_processed

processed_data = torch.cat(outputs, dim=0)
del outputs

In [6]:
import h5py

with h5py.File("processed_data.h5", "w") as f:
    f.create_dataset("data", data=processed_data.numpy())
    f.create_dataset("labels", data=dataset.labels.numpy())