In [1]:
import os
import librosa
import numpy as np
import tensorflow as tf
import pandas as pd

In [2]:
train_labels_df = pd.read_csv("edaicwoz/train_split.csv")
test_labels_df = pd.read_csv("edaicwoz/test_split.csv")
val_labels_df = pd.read_csv("edaicwoz/dev_split.csv")

In [3]:
def load_audio_files(data_dir, sr=16000):
    file_ids = os.listdir(data_dir)
    subject_ids = []
    file_paths = []
    types = []
    labels = []
    labels_binary = []

    for file_id in file_ids:
        file_id = file_id.split("_")[0]
        file_path = [data_dir + "/" + file_id + "/" + file_id + "_MFCC_" + str(i) + ".npy" for i in range(len(next(iter(enumerate(os.walk(data_dir + "/" + str(file_id) + "/"))))[1][2]))]
        if int(file_id) in train_labels_df["Participant_ID"].values:
            types.append(0)
            labels.append(train_labels_df[train_labels_df["Participant_ID"] == int(file_id)]['PHQ_Score'].values[0])
            labels_binary.append(train_labels_df[train_labels_df["Participant_ID"] == int(file_id)]['PHQ_Binary'].values[0])
            
        elif int(file_id) in test_labels_df["Participant_ID"].values:
            types.append(1)
            labels.append(test_labels_df[test_labels_df["Participant_ID"] == int(file_id)]['PHQ_Score'].values[0])
            labels_binary.append(test_labels_df[test_labels_df["Participant_ID"] == int(file_id)]['PHQ_Binary'].values[0])
        else:
            types.append(2)
            labels.append(val_labels_df[val_labels_df["Participant_ID"] == int(file_id)]['PHQ_Score'].values[0])
            labels_binary.append(val_labels_df[val_labels_df["Participant_ID"] == int(file_id)]['PHQ_Binary'].values[0])
        subject_ids.append(int(file_id))
        file_paths.append(file_path)

    return file_ids, subject_ids, file_paths, types, labels, labels_binary

data_dir = "MFCCs_1030"

file_ids, subject_ids, file_paths, types, labels, labels_binary = load_audio_files(data_dir)

In [4]:
def prepare_audio_set(file_paths):

    samples = []
    samples_ids = []
    samples_types = []
    samples_labels = []
    samples_labels_binary = []

    for i, file_path in enumerate(file_paths):
        all_mfccs = []
        for j in range(len(file_path)):
            all_mfccs.append(np.load(file_path[j]))
        all_mfccs = np.array(all_mfccs)
        samples.extend(all_mfccs)
        samples_ids.extend([subject_ids[i]] * len(all_mfccs))
        samples_types.extend([types[i]] * len(all_mfccs))
        samples_labels.extend([labels[i]] * len(all_mfccs))
        samples_labels_binary.extend([labels_binary[i]] * len(all_mfccs))

    samples = np.array(samples)

    samples_ids = np.array(samples_ids)
    samples_types = np.array(samples_types)
    samples_labels = np.array(samples_labels)
    samples_labels_binary = np.array(samples_labels_binary)

    return samples, samples_ids, samples_types, samples_labels, samples_labels_binary

print("[INFO] preparing data...")
samples, samples_ids, samples_types, samples_labels, samples_labels_binary = prepare_audio_set(file_paths)
samples = np.swapaxes(samples, 1, 2)

[INFO] preparing data...


In [5]:
training_samples = samples[samples_types == 0]
training_labels = samples_labels_binary[samples_types == 0]
training_subject_ids = samples_ids[samples_types == 0]

test_samples = samples[samples_types == 1]
test_labels = samples_labels_binary[samples_types == 1]

val_samples = samples[samples_types == 2]
val_labels_df = samples_labels_binary[samples_types == 2]

In [6]:
training_samples.shape

(2350, 15001, 13)

In [7]:
import numpy as np

class DataGeneratorVanilla(tf.keras.utils.Sequence):
    def __init__(self, x_set, y_set, batch_size=256):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
        self.indices = np.arange(self.x.shape[0])

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        inds = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_x = self.x[inds]
        batch_y = self.y[inds]
        return batch_x, batch_y
    
    def on_epoch_end(self):
        np.random.shuffle(self.indices)

print("[INFO] constructing training/testing split...")
train_gen = DataGeneratorVanilla(training_samples, training_labels, 32)
test_gen = DataGeneratorVanilla(test_samples, test_labels, 32)
val_gen = DataGeneratorVanilla(val_samples, val_labels_df, 32)

[INFO] constructing training/testing split...


In [8]:
(training_labels == 0).sum()

1240

In [9]:
(training_samples.shape[0], training_samples.shape[1], training_samples.shape[2])

(2350, 15001, 13)

In [11]:
def create_cnn_model(input_shape):
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.GRU(units = 64, input_shape = input_shape))

    model.add(tf.keras.layers.Dense(32, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.3))

    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

    return model

histories = []
learning_rates = [0.0001, 0.0003, 0.0005, 0.001]

for lr in learning_rates:
    model = create_cnn_model((training_samples.shape[1], training_samples.shape[2]))

    print("[INFO] compiling model...")
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr), loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=0.1), metrics=['accuracy'])

    model.summary()

    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    print("[INFO] fitting model...")
    history = model.fit(train_gen, epochs=100, validation_data=val_gen, callbacks=[early_stopping])

    

    histories.append(history)

    del model

lowest_loss = float('inf')
lowest_loss_index = -1

for i, history in enumerate(histories):
    val_loss = history.history['val_loss'][-6] # we're using -6 since patience of early stopping is 5
    if val_loss < lowest_loss:
        lowest_loss = val_loss
        lowest_loss_index = i

print(f"The learning rate with the lowest validation loss is {learning_rates[lowest_loss_index]}")

[INFO] compiling model...
Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_4 (GRU)                 (None, 64)                15168     
                                                                 
 dense_8 (Dense)             (None, 32)                2080      
                                                                 
 dropout_4 (Dropout)         (None, 32)                0         
                                                                 
 dense_9 (Dense)             (None, 1)                 33        
                                                                 
Total params: 17,281
Trainable params: 17,281
Non-trainable params: 0
_________________________________________________________________
[INFO] fitting model...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/10

In [18]:
def create_cnn_model(input_shape):
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.GRU(units = 64, input_shape = input_shape))

    model.add(tf.keras.layers.Dense(32, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.3))

    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

    return model

histories = []
label_smoothings = [0, 0.05, 0.1]

for ls in label_smoothings:
    model = create_cnn_model((training_samples.shape[1], training_samples.shape[2]))

    print("[INFO] compiling model...")
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003), loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=ls), metrics=['accuracy'])

    model.summary()

    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    print("[INFO] fitting model...")
    history = model.fit(train_gen, epochs=100, validation_data=val_gen, callbacks=[early_stopping])

    

    histories.append(history)

    del model

lowest_loss = float('inf')
lowest_loss_index = -1

for i, history in enumerate(histories):
    val_loss = history.history['val_loss'][-6] # we're using -6 since patience of early stopping is 5
    if val_loss < lowest_loss:
        lowest_loss = val_loss
        lowest_loss_index = i

print(f"The label smoothing value with the lowest validation loss is {label_smoothings[lowest_loss_index]}")

[INFO] compiling model...
Model: "sequential_30"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_30 (GRU)                (None, 64)                15168     
                                                                 
 dense_60 (Dense)            (None, 32)                2080      
                                                                 
 dropout_30 (Dropout)        (None, 32)                0         
                                                                 
 dense_61 (Dense)            (None, 1)                 33        
                                                                 
Total params: 17,281
Trainable params: 17,281
Non-trainable params: 0
_________________________________________________________________
[INFO] fitting model...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
[INFO] com