In [61]:
import os
import librosa
import numpy as np
np.set_printoptions(suppress = True)
import tensorflow as tf
import pandas as pd

In [62]:
train_labels_df = pd.read_csv("daicwoz/train_split.csv").fillna(0).replace('[^0-9]', '', regex=True).rename(columns={"PHQ8_Binary": "PHQ_Binary", "PHQ8_Score": "PHQ_Score"})
test_labels_df = pd.read_csv("daicwoz/test_split.csv").fillna(0).replace('[^0-9]', '', regex=True)
val_labels_df = pd.read_csv("daicwoz/dev_split.csv").fillna(0).replace('[^0-9]', '', regex=True).rename(columns={"PHQ8_Binary": "PHQ_Binary", "PHQ8_Score": "PHQ_Score"})

In [63]:
def load_audio_files(data_dir, sr=16000):
    file_ids = os.listdir(data_dir)
    file_paths = []
    types = []
    labels = []
    labels_binary = []

    for file_id in file_ids:
        file_path = os.path.join(data_dir, file_id, file_id + "_AUDIO.wav")
            
        if int(file_id) in train_labels_df["Participant_ID"].values:
            types.append(0)
            labels.append(train_labels_df[train_labels_df["Participant_ID"] == int(file_id)]['PHQ_Score'].values[0])
            labels_binary.append(train_labels_df[train_labels_df["Participant_ID"] == int(file_id)]['PHQ_Binary'].values[0])
            
        elif int(file_id) in test_labels_df["Participant_ID"].values:
            types.append(1)
            labels.append(test_labels_df[test_labels_df["Participant_ID"] == int(file_id)]['PHQ_Score'].values[0])
            labels_binary.append(test_labels_df[test_labels_df["Participant_ID"] == int(file_id)]['PHQ_Binary'].values[0])
        else:
            types.append(2)
            labels.append(val_labels_df[val_labels_df["Participant_ID"] == int(file_id)]['PHQ_Score'].values[0])
            labels_binary.append(val_labels_df[val_labels_df["Participant_ID"] == int(file_id)]['PHQ_Binary'].values[0])
        file_paths.append(file_path)

    return file_ids, file_paths, types, labels, labels_binary

data_dir = "edaicwoz/wav"

file_ids, file_paths, types, labels, labels_binary = load_audio_files(data_dir)

In [64]:
sr = 16000
chunk_secs = 15
chunk_floats = sr * chunk_secs


def prepare_audio_set(file_paths):

    samples = []
    samples_types = []
    samples_labels = []
    samples_labels_binary = []

    for i, file_path in enumerate(file_paths):
        audio, _ = librosa.load(file_path, sr=sr, mono=True)
        num_segments = (audio.shape[0] // chunk_floats)
        split_audio = np.array_split(audio[:num_segments * chunk_floats], num_segments)
        samples.extend(split_audio)
        samples_types.extend([types[i]] * len(split_audio))
        samples_labels.extend([labels[i]] * len(split_audio))
        samples_labels_binary.extend([labels_binary[i]] * len(split_audio))

    samples = np.array(samples)
    samples = samples[:, :, None]

    samples_types = np.array(samples_types)
    samples_labels = np.array(samples_labels)
    samples_labels_binary = np.array(samples_labels_binary)

    return samples, samples_types, samples_labels, samples_labels_binary

print("[INFO] preparing data...")
samples, samples_types, samples_labels, samples_labels_binary = prepare_audio_set(file_paths)

[INFO] preparing data...


In [65]:
samples.shape

(11953, 240000, 1)

In [66]:
training_samples = samples[samples_types == 0]
training_labels = samples_labels_binary[samples_types == 0]

test_samples = samples[samples_types == 1]
test_labels = samples_labels_binary[samples_types == 1]

val_samples = samples[samples_types == 2]
val_labels = samples_labels_binary[samples_types == 2]

In [79]:
import numpy as np
    

class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, x_set, y_set, batch_size=16):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
        self.indices = np.arange(self.x.shape[0])

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        inds = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_x = self.x[inds]
        batch_y = self.y[inds]
        return batch_x, batch_y
    
    def on_epoch_end(self):
        np.random.shuffle(self.indices)

print("[INFO] constructing training/testing split...")
train_gen = DataGenerator(training_samples, training_labels, 64)
test_gen = DataGenerator(test_samples, test_labels, 64)
val_gen = DataGenerator(val_samples, val_labels, 64)

[INFO] constructing training/testing split...


In [80]:
from kapre.composed import get_melspectrogram_layer
from kapre import LogmelToMFCC

combination = (32, 64, 128, 256, 192)
mylambda = 0.1

model = tf.keras.models.Sequential([
    get_melspectrogram_layer(n_fft=400, hop_length=160, input_data_format='channels_last', output_data_format='channels_last'
                                       , input_shape=(chunk_floats, 1), sample_rate=16000, return_decibel=True, n_mels= 26),
    LogmelToMFCC(n_mfccs=40),

    tf.keras.layers.BatchNormalization(),


    tf.keras.layers.Conv2D(combination[0], (3, 3), activation='relu', kernel_regularizer = tf.keras.regularizers.l2(mylambda), padding='same'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Conv2D(combination[1], (3, 3), activation='relu', kernel_regularizer = tf.keras.regularizers.l2(mylambda), padding='same'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),

    tf.keras.layers.Conv2D(combination[2], (3, 3), activation='relu', kernel_regularizer = tf.keras.regularizers.l2(mylambda), padding='same'),
    tf.keras.layers.MaxPooling2D((2, 2)),

    tf.keras.layers.Conv2D(combination[3], (3, 3), activation='relu', kernel_regularizer = tf.keras.regularizers.l2(mylambda), padding='same'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.4),


    tf.keras.layers.Conv2D(combination[4], (3, 3), activation='relu', kernel_regularizer = tf.keras.regularizers.l2(mylambda), padding='same'),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu', kernel_regularizer = tf.keras.regularizers.l2(mylambda)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

print("[INFO] compiling model...")
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

# model.summary()

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20, min_delta=0, restore_best_weights=True)

print("[INFO] fitting model...")
history = model.fit(train_gen, epochs=200, validation_data=val_gen, callbacks=[early_stopping], verbose = 1, class_weight={0: .28, 1: .72}) # , class_weight={0: .25, 1:1}

[INFO] compiling model...
[INFO] fitting model...
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200


In [82]:
from tqdm import tqdm

means = []

file_ids = os.listdir("edaicwoz/merge")
test_label_file = pd.read_csv(f"daicwoz/dev_split.csv")

for file_id in val_labels_df["Participant_ID"].values:
    samples_test = []
    file_path = os.path.join(data_dir, str(file_id), str(file_id) + "_AUDIO.wav")
    audio, _ = librosa.load(file_path, sr=sr, mono=True)
    num_segments = (audio.shape[0] // chunk_floats)
    split_audio = np.array_split(audio[:num_segments * chunk_floats], num_segments)
    samples_test.extend(split_audio)
    samples_test = np.array(samples_test)
    samples_test = samples_test[:, :, None]
    print("True Label = " + str(val_labels_df[val_labels_df["Participant_ID"] == int(file_id)]['PHQ_Binary'].values[0]))
    predictions = model.predict(samples_test, verbose=0)
    print("Min Prediction = " + str(predictions.min()))
    print("Max Prediction = " + str(predictions.max()))
    print("Mean Prediction = " + str(predictions.mean()))
    # print(predictions)
    print("--------------------------------------------")

print(means)

True Label = 0
Min Prediction = 0.02110052
Max Prediction = 0.10322046
Mean Prediction = 0.058699366
--------------------------------------------
True Label = 0
Min Prediction = 0.0033402515
Max Prediction = 0.30538842
Mean Prediction = 0.045766268
--------------------------------------------
True Label = 0
Min Prediction = 0.024381027
Max Prediction = 0.7800975
Mean Prediction = 0.23049656
--------------------------------------------
True Label = 1
Min Prediction = 0.27553907
Max Prediction = 0.88052225
Mean Prediction = 0.5702885
--------------------------------------------
True Label = 1
Min Prediction = 0.106617555
Max Prediction = 0.7931887
Mean Prediction = 0.3410061
--------------------------------------------
True Label = 1
Min Prediction = 0.037092187
Max Prediction = 0.8245288
Mean Prediction = 0.3301889
--------------------------------------------
True Label = 1
Min Prediction = 0.2865608
Max Prediction = 0.93598014
Mean Prediction = 0.6841847
-------------------------------