In [1]:
import json
import os
import math
import librosa
import pandas as pd
import random
import numpy as np
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler
from librosa import power_to_db

In [2]:
train_labels_df = pd.read_csv("edaicwoz/train_split.csv")
test_labels_df = pd.read_csv("edaicwoz/test_split.csv")
val_labels_df = pd.read_csv("edaicwoz/dev_split.csv")

In [3]:
DATASET_PATH = "edaicwoz/merge"
JSON_PATH = "data_10.json"
SAMPLE_RATE = 16000
SEGMENT_DUR = 15
SAMPLES_PER_TRACK = SAMPLE_RATE * SEGMENT_DUR

N_FFT = 4000
HOP_LENGTH = 1600

In [4]:
out_path = "MELs_40100_MM_SCA_CROP"

for i, (dirpath, dirnames, filenames) in tqdm(enumerate(os.walk(DATASET_PATH))):
    for file in filenames:
        sample_id = file.split("_")[0]
        file_path = dirpath + "/" + sample_id + "_AUDIO_M.wav"
        signal, _ = librosa.load(file_path, sr=SAMPLE_RATE)

        sample_group = "TRVAL" if int(sample_id) not in test_labels_df["Participant_ID"].values else "TEST"
        sample_label = 0
        if sample_group == "TRVAL":
            if int(sample_id) in train_labels_df["Participant_ID"].values:
                sample_group = "TR"
                if train_labels_df[train_labels_df["Participant_ID"] == int(sample_id)]["PHQ_Binary"].item() == 1:
                    sample_label = 1
            else:
                sample_group = "VAL"
                if val_labels_df[val_labels_df["Participant_ID"] == int(sample_id)]["PHQ_Binary"].item() == 1:
                    sample_label = 1

        num_segments = int(40 * (3 if sample_label == 1 else 1))
    

        random_indices = np.random.choice(range(signal.shape[0] // SAMPLES_PER_TRACK), num_segments, replace=True)

        for idx, index in enumerate(random_indices):
            start = index * SAMPLES_PER_TRACK
            end = start + SAMPLES_PER_TRACK
            split_audio = signal[start:end]

            mel_spec = librosa.feature.melspectrogram(y=split_audio, sr=SAMPLE_RATE, n_fft=N_FFT, hop_length=HOP_LENGTH, window='hann', n_mels=80)
            logps = power_to_db(mel_spec)
            
            scaler = MinMaxScaler((0, 1))
            scaled_mel_spec = scaler.fit_transform(logps)
            
            out_file_dir = out_path + "/" + sample_id
            out_file_path = out_file_dir + "/" + sample_id + "_MEL_" + str(idx) + ".npy"
            os.makedirs(out_file_dir, exist_ok=True)
            
            np.save(out_file_path, scaled_mel_spec)


276it [02:48,  1.64it/s]
