In [1]:
import os
import librosa
import numpy as np
np.set_printoptions(suppress = True)
import tensorflow as tf
import pandas as pd

In [2]:
# This notebook runs for E-DAIC however DAIC-WOZ tests were not good either

train_labels_df = pd.read_csv("edaicwoz/train_split.csv")
test_labels_df = pd.read_csv("edaicwoz/test_split.csv")
val_labels_df = pd.read_csv("edaicwoz/dev_split.csv")

In [3]:
def load_audio_files(data_dir, sr=16000):
    file_ids = os.listdir(data_dir)
    subject_ids = []
    file_paths = []
    types = []
    labels = []
    labels_binary = []

    for file_id in file_ids:

        file_id = file_id.split("_")[0]
        file_path = [data_dir + "/" + file_id + "/" + file_id + "_MEL_" + str(i) + ".npy" for i in range(len(next(iter(enumerate(os.walk(data_dir + "/" + str(file_id) + "/"))))[1][2]))]
        if int(file_id) in train_labels_df["Participant_ID"].values:
            types.append(0)
            labels.append(train_labels_df[train_labels_df["Participant_ID"] == int(file_id)]['PHQ_Score'].values[0])
            labels_binary.append(train_labels_df[train_labels_df["Participant_ID"] == int(file_id)]['PHQ_Binary'].values[0])
            
        elif int(file_id) in test_labels_df["Participant_ID"].values:
            types.append(1)
            labels.append(test_labels_df[test_labels_df["Participant_ID"] == int(file_id)]['PHQ_Score'].values[0])
            labels_binary.append(test_labels_df[test_labels_df["Participant_ID"] == int(file_id)]['PHQ_Binary'].values[0])
        else:
            types.append(2)
            labels.append(val_labels_df[val_labels_df["Participant_ID"] == int(file_id)]['PHQ_Score'].values[0])
            labels_binary.append(val_labels_df[val_labels_df["Participant_ID"] == int(file_id)]['PHQ_Binary'].values[0])
        subject_ids.append(int(file_id))
        file_paths.append(file_path)

    return file_ids, subject_ids, file_paths, types, labels, labels_binary

data_dir = "MELs_40100_MM_SCA_CROP"

file_ids, subject_ids, file_paths, types, labels, labels_binary = load_audio_files(data_dir)

In [4]:
def prepare_audio_set(file_paths):

    samples = []
    samples_ids = []
    samples_types = []
    samples_labels = []
    samples_labels_binary = []

    for i, file_path in enumerate(file_paths):
        all_mfccs = []
        for j in range(len(file_path)):
            all_mfccs.append(np.load(file_path[j]))
        all_mfccs = np.array(all_mfccs)
        samples.extend(all_mfccs)
        samples_ids.extend([subject_ids[i]] * len(all_mfccs))
        samples_types.extend([types[i]] * len(all_mfccs))
        samples_labels.extend([labels[i]] * len(all_mfccs))
        samples_labels_binary.extend([labels_binary[i]] * len(all_mfccs))

    samples = np.array(samples)

    samples_ids = np.array(samples_ids)
    samples_types = np.array(samples_types)
    samples_labels = np.array(samples_labels)
    samples_labels_binary = np.array(samples_labels_binary)

    return samples, samples_ids, samples_types, samples_labels, samples_labels_binary

print("[INFO] preparing data...")
samples, samples_ids, samples_types, samples_labels, samples_labels_binary = prepare_audio_set(file_paths)
samples = np.swapaxes(samples, 1, 2)

[INFO] preparing data...


In [5]:
samples.shape

(14920, 151, 80)

In [6]:
training_samples = samples[samples_types == 0]
training_labels = samples_labels_binary[samples_types == 0]

test_samples = samples[samples_types == 1]
test_labels = samples_labels_binary[samples_types == 1]

val_samples = samples[samples_types == 2]
val_labels = samples_labels_binary[samples_types == 2]

In [7]:
import numpy as np
    

class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, x_set, y_set, batch_size=16):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
        self.indices = np.arange(self.x.shape[0])

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        inds = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_x = self.x[inds]
        batch_y = self.y[inds]
        return batch_x, batch_y
    
    def on_epoch_end(self):
        np.random.shuffle(self.indices)

print("[INFO] constructing training/testing split...")
train_gen = DataGenerator(training_samples, training_labels, 4)
test_gen = DataGenerator(test_samples, test_labels, 4)
val_gen = DataGenerator(val_samples, val_labels, 4)

[INFO] constructing training/testing split...


In [8]:
samples.shape

(14920, 151, 80)

In [9]:
from kapre.composed import get_melspectrogram_layer
from kapre import LogmelToMFCC

# Architecture details are specified in page 7 of paper
model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(samples.shape[1], samples.shape[2])),
    tf.keras.layers.Reshape((151, 80, 1)),
    tf.keras.layers.Conv2D(filters=32, kernel_size=(1, 7), strides=(1, 1), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(pool_size=(4, 3), strides=(1, 3)),
    tf.keras.layers.Conv2D(filters=32, kernel_size=(1, 7), strides=(1, 1), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(pool_size=(1, 3), strides=(1, 3)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=128, activation='relu'),
    tf.keras.layers.Dropout(rate=0.5),
    tf.keras.layers.Dense(units=128, activation='relu'),
    tf.keras.layers.Dropout(rate=0.5),
    tf.keras.layers.Dense(units=1, activation='sigmoid')
])

print("[INFO] compiling model...")
model.compile(optimizer=tf.keras.optimizers.Adadelta(learning_rate=1), loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5, min_delta=0, restore_best_weights=True)

print("[INFO] fitting model...")
history = model.fit(train_gen, epochs=30, validation_data=val_gen, callbacks=[early_stopping])

[INFO] compiling model...
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape (Reshape)           (None, 151, 80, 1)        0         
                                                                 
 conv2d (Conv2D)             (None, 151, 80, 32)       256       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 148, 26, 32)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 148, 26, 32)       7200      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 148, 8, 32)       0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (