In [None]:
!unzip /content/drive/MyDrive/shiraz-university-fetal-heart-sounds-database-1.0.1.zip

In [None]:
import os
import random
import shutil
from glob import glob
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt

In [None]:
def spec_augment(mel_spec, freq_masking=0.15, time_masking=0.30):
    spec = mel_spec.copy()
    num_mels, num_frames = spec.shape

    f = int(freq_masking * num_mels)
    f0 = random.randint(0, num_mels - f)
    spec[f0:f0+f, :] = 0

    t = int(time_masking * num_frames)
    t0 = random.randint(0, num_frames - t)
    spec[:, t0:t0+t] = 0

    return spec

In [None]:

def create_augmented_dataset(input_folder, output_folder, num_augments=10):
    if os.path.exists(output_folder):
        shutil.rmtree(output_folder)
    os.makedirs(output_folder, exist_ok=True)

    wav_files = glob(os.path.join(input_folder, "*.wav"))
    dataset = []

    for wav_path in wav_files:
        filename = os.path.basename(wav_path)
        label = 'mother' if filename.startswith('m') else 'fetus'
        y, sr = librosa.load(wav_path, sr=None)
        mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
        mel_db = librosa.power_to_db(mel, ref=np.max)

        for i in range(num_augments):
            aug = spec_augment(mel_db, freq_masking=0.15, time_masking=0.30)

            save_dir = os.path.join(output_folder, label)
            os.makedirs(save_dir, exist_ok=True)

            out_filename = f"{os.path.splitext(filename)[0]}_aug{i}.npy"
            out_path = os.path.join(save_dir, out_filename)
            np.save(out_path, aug)

            dataset.append((out_path, label))

    return dataset




In [None]:

def split_and_save(dataset, train_folder, test_folder, train_ratio=0.8):

    random.shuffle(dataset)

    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)

    split_idx = int(len(dataset) * train_ratio)
    train_set = dataset[:split_idx]
    test_set = dataset[split_idx:]

    for dest_folder, split in [(train_folder, train_set), (test_folder, test_set)]:
        for src_path, label in split:
            label_folder = os.path.join(dest_folder, label)
            os.makedirs(label_folder, exist_ok=True)
            shutil.copy(src_path, label_folder)


In [None]:
dataset = create_augmented_dataset(
    input_folder="/content/drive/MyDrive/shiraz-university-fetal-heart-sounds-database-1.0.1/base",
    output_folder="/content/drive/MyDrive/SUFHSDB/augmented_data",
    num_augments=10
)

In [None]:
split_and_save(
    dataset,
    train_folder="/content/drive/MyDrive/SUFHSDB/training_data",
    test_folder="/content/drive/MyDrive/SUFHSDB/testing_data",
    train_ratio=0.8
)