<a href="https://colab.research.google.com/github/shetty7019/Bankers-algorithm-deadlock-avoidance/blob/main/final_depression_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import fnmatch

import os

import zipfile


def extract_files(zip_file, out_dir, delete_zip=False):

    # create audio directory
    audio_dir = os.path.join(out_dir, 'audio')
    if not os.path.exists(audio_dir):
        os.makedirs(audio_dir)

    # create transcripts directory
    transcripts_dir = os.path.join(out_dir, 'transcripts')
    if not os.path.exists(audio_dir):
        os.makedirs(transcripts_dir)

    zip_ref = zipfile.ZipFile(zip_file)
    for f in zip_ref.namelist():  # iterate through files in zip file
        if f.endswith('.wav'):
            zip_ref.extract(f, audio_dir)
        elif fnmatch.fnmatch(f, '*TRANSCRIPT.csv'):
            zip_ref.extract(f, transcripts_dir)
    zip_ref.close()

    if delete_zip:
        os.remove(zip_file)


if __name__ == '__main__':

    dir_name = '/Volumes/Seagate Backup Plus Drive/'

    out_dir = '../../depression-detect/data/raw'

    for file in os.listdir(dir_name):
        if file.endswith('.zip'):
            zip_file = os.path.join(dir_name, file)
            extract_files(zip_file, out_dir, delete_zip=delete_zip)


In [None]:
import os
from pyAudioAnalysis import audioBasicIO as aIO
from pyAudioAnalysis import audioSegmentation as aS
import scipy.io.wavfile as wavfile
import wave



def remove_silence(filename, out_dir, smoothing=1.0, weight=0.3, plot=False):

    partic_id = 'P' + filename.split('/')[-1].split('_')[0]  # PXXX
    if is_segmentable(partic_id):
        # create participant directory for segmented wav files
        participant_dir = os.path.join(out_dir, partic_id)
        if not os.path.exists(participant_dir):
            os.makedirs(participant_dir)

        os.chdir(participant_dir)

        [Fs, x] = aIO.read_audio_file(filename)
        segments = aS.silence_removal(x, Fs, 0.020, 0.020,
                                     smooth_window=smoothing,
                                     weight=weight,
                                     plot=plot)

        for s in segments:
            seg_name = "{:s}_{:.2f}-{:.2f}.wav".format(partic_id, s[0], s[1])
            wavfile.write(seg_name, Fs, x[int(Fs * s[0]):int(Fs * s[1])])

        # concatenate segmented wave files within participant directory
        concatenate_segments(participant_dir, partic_id)


def is_segmentable(partic_id):

    troubled = set(['P300', 'P305', 'P306', 'P308', 'P315', 'P316', 'P343',
                    'P354', 'P362', 'P375', 'P378', 'P381', 'P382', 'P385',
                    'P387', 'P388', 'P390', 'P392', 'P393', 'P395', 'P408',
                    'P413', 'P421', 'P438', 'P473', 'P476', 'P479', 'P490',
                    'P492'])
    return partic_id not in troubled


def concatenate_segments(participant_dir, partic_id, remove_segment=True):

    infiles = os.listdir(participant_dir)  # list of wav files in directory
    outfile = '{}_no_silence.wav'.format(partic_id)

    data = []
    for infile in infiles:
        w = wave.open(infile, 'rb')
        data.append([w.getparams(), w.readframes(w.getnframes())])
        w.close()
        if remove_segment:
            os.remove(infile)

    output = wave.open(outfile, 'wb')
    output.setparams(data[0][0])

    # write each segment to output
    for idx in range(len(data)):
        output.writeframes(data[idx][1])
    output.close()


if __name__ == '__main__':
    # directory containing raw wav files
    dir_name = '/Users/bhumikashetty/Desktop/depression-detect/src/data/raw/audio/'

    # segmented wav file
    out_dir = '/Users/bhumikashetty/Desktop/depression-detect/src/data/interim/'

    # iterate through wav files in dir_name and create a segmented wav_file
    for file in os.listdir(dir_name):
        if file.endswith('.wav'):
            filename = os.path.join(dir_name, file)
            remove_silence(filename, out_dir)


In [None]:
#spectrograms.py
import numpy as np
from numpy.lib import stride_tricks
import os
from PIL import Image
import scipy.io.wavfile as wav



#This script creates spectrogram matrices from wav files that can be passed to the CNN.


def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):

    win = window(frameSize)
    hopSize = int(frameSize - np.floor(overlapFac * frameSize))
    # zeros at beginning (thus center of 1st window should be for sample nr. 0)
    samples = np.append(np.zeros(int(np.floor(frameSize/2.0))), sig)
    # cols for windowing
    cols = np.ceil((len(samples) - frameSize) / float(hopSize)) + 1
    # zeros at end (thus samples can be fully covered by frames)
    samples = np.append(samples, np.zeros(frameSize))

    frames = np.lib.stride_tricks.as_strided(samples, shape=(int(cols), frameSize),
                                             strides=(samples.strides[0]*hopSize,
                                                      samples.strides[0])).copy()
    frames *= win

    return np.fft.rfft(frames)

def logscale_spec(spec, sr=44100, factor=20.):

    timebins, freqbins = np.shape(spec)

    scale = np.linspace(0, 1, freqbins) ** factor
    scale *= (freqbins-1) / max(scale)
    scale = np.unique(np.round(scale)).astype(int)  # Convert scale to integers

    # create spectrogram with new freq bins
    newspec = np.complex128(np.zeros([timebins, len(scale)]))
    for i in range(0, len(scale)):
        if i == len(scale)-1:
            newspec[:, i] = np.sum(spec[:, scale[i]:], axis=1)
        else:
            newspec[:, i] = np.sum(spec[:, scale[i]:scale[i+1]], axis=1)

    # list center freq of bins
    allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
    freqs = []
    for i in range(0, len(scale)):
        if i == len(scale)-1:
            freqs += [np.mean(allfreqs[scale[i]:])]
        else:
            freqs += [np.mean(allfreqs[scale[i]:scale[i+1]])]

    return newspec, freqs


def stft_matrix(wavfile):
  samplerate, samples = wav.read(wavfile)
  binsize=2**10
  x = stft(samples,binsize)
  sshow, freq = logscale_spec(x, factor=1, sr=samplerate)
  ims = 20.*np.log10(np.abs(sshow)/10e-6)  # amplitude to decibel
  timebins, freqbins = np.shape(ims)

  ims = np.transpose(ims)
  ims = np.flipud(ims)
  return(ims)

stft_matrix("/content/drive/MyDrive/Colab_Notebooks/Major_project/Dataset/interim/P301/P301_no_silence.wav")

In [None]:
#dev_data.py
import pandas as pd

df_train = pd.read_csv("/content/drive/MyDrive/Colab_Notebooks/Major_project/csv/train_split_Depression_AVEC2017.csv")

df_test = pd.read_csv("/content/drive/MyDrive/Colab_Notebooks/Major_project/csv/dev_split_Depression_AVEC2017.csv")

df_dev = pd.concat([df_train, df_test], axis=0)


In [None]:
#spectrograms_dict.py
#from spectrograms import stft_matrix
import os
#from dev_data import df_dev


"""
This script builds dictionaries for the depressed and non-depressed classes
with each participant id as the key, and the associated segmented matrix
spectrogram representation as the value.
"""


def build_class_dictionaries(dir_name):
    depressed_dict = dict()
    normal_dict = dict()
    for subdir, dirs, files in os.walk(dir_name):
        for file in files:
            if file.endswith('no_silence.wav'):
                partic_id = int(file.split('_')[0][1:])
                # print(partic_id)
                if in_dev_split(partic_id):
                    wav_file = os.path.join(subdir, file)
                    # matrix representation of spectrogram
                    mat = stft_matrix(wav_file)
                    depressed = get_depression_label(partic_id)  # 1 if True
                    if depressed:
                        depressed_dict[partic_id] = mat
                    elif not depressed:
                        normal_dict[partic_id] = mat
    return depressed_dict, normal_dict


def in_dev_split(partic_id):
    #returns true if partic_id is in df_dev
    return partic_id in set(df_dev['Participant_ID'].values)


def get_depression_label(partic_id):
    #Returns participant's PHQ8 Binary label
    return df_dev.loc[df_dev['Participant_ID'] ==
                      partic_id]['PHQ8_Binary'].item()


if __name__ == '__main__':
    dir_name = "/content/drive/MyDrive/Colab_Notebooks/Major_project/Dataset/interim"
    depressed_dict, normal_dict = build_class_dictionaries(dir_name)


In [None]:
#random_sampling.py

import numpy as np
import os
import random
#from spectrogram_dicts import build_class_dictionaries
np.random.seed(15)  # for reproducibility

def determine_num_crops(depressed_dict, normal_dict, crop_width=125):
    """
    Finds the shortest clip in the entire dataset which, will limit the number of samples we take from
    each clip to make sure our classes are balanced.

    """
    merged_dict = normal_dict.copy()
    merged_dict.update(depressed_dict)

    shortest_clip = min(merged_dict.items(), key=lambda x: x[1].shape[1])
    shortest_pixel_width = shortest_clip[1].shape[1]
    num_samples_from_clips = shortest_pixel_width / crop_width
    return num_samples_from_clips

def build_class_sample_dict(segmented_audio_dict, n_samples, crop_width):
    class_samples_dict = dict()
    for partic_id, clip_mat in segmented_audio_dict.items():
            samples = get_random_samples(clip_mat, n_samples, crop_width)
            class_samples_dict[partic_id] = samples
    return class_samples_dict

def get_random_samples(matrix, n_samples, crop_width):
    """
    Get N random samples with width of crop_width from the numpy matrix
    representing the participant's audio spectrogram.
    """
    # crop full spectrogram into segments of width = crop_width
    clipped_mat = matrix[:, (matrix.shape[1] % crop_width):]
    n_splits = clipped_mat.shape[1] / crop_width
    cropped_sample_ls = np.split(clipped_mat, n_splits, axis=1)

    # get random samples
    n_samples = min(n_samples, len(cropped_sample_ls))
    n_samples = int(n_samples)
    samples = random.sample(cropped_sample_ls, n_samples)

    return samples

def create_sample_dicts(crop_width):

    # build dictionaries of participants and segmented audio matrix
    depressed_dict, normal_dict = build_class_dictionaries("/content/drive/MyDrive/Colab_Notebooks/Major_project/Dataset/interim")
    n_samples = determine_num_crops(depressed_dict, normal_dict,
                                    crop_width=crop_width)
    # get n_sample random samples from each depressed participant
    depressed_samples = build_class_sample_dict(depressed_dict, n_samples,
                                                crop_width)
    # get n_sample random samples from each non-depressed participant
    normal_samples = build_class_sample_dict(normal_dict, n_samples,
                                             crop_width)

    # save depressed arrays to .npz
    for key, _ in depressed_samples.items():
        path = "/content/drive/MyDrive/Colab_Notebooks/Major_project/Dataset/processed/"
        filename = 'D{}.npz'.format(key)
        outfile = path + filename
        np.savez(outfile, *depressed_samples[key])
    # save normal arrays to .npz
    for key, _ in normal_samples.items():
        path =  "/content/drive/MyDrive/Colab_Notebooks/Major_project/Dataset/processed/"
        filename = '/N{}.npz'.format(key)
        outfile = path + filename
        np.savez(outfile, *normal_samples[key])

def rand_samp_train_test_split(npz_file_dir):

    # files in directory
    npz_files = os.listdir(npz_file_dir)

    dep_samps = [f for f in npz_files if f.startswith('D')]
    norm_samps = [f for f in npz_files if f.startswith('N')]
    # calculate how many samples to balance classes
    max_samples = min(len(dep_samps), len(norm_samps))

    # randomly select max participants from each class without replacement
    dep_select_samps = np.random.choice(dep_samps, size=max_samples,
                                        replace=False)
    norm_select_samps = np.random.choice(norm_samps, size=max_samples,
                                         replace=False)

    test_size = 0.2
    num_test_samples = int(len(dep_select_samps) * test_size)

    train_samples = []
    for sample in dep_select_samps[:-num_test_samples]:
        npz_file = npz_file_dir + '/' + sample
        with np.load(npz_file) as data:
            for key in data.keys():
                train_samples.append(data[key])
    for sample in norm_select_samps[:-num_test_samples]:
        npz_file = npz_file_dir + '/' + sample
        with np.load(npz_file) as data:
            for key in data.keys():
                train_samples.append(data[key])
    train_labels = np.concatenate((np.ones(int(len(train_samples)/2)),
                               np.zeros(int(len(train_samples)/2))))


    test_samples = []
    for sample in dep_select_samps[-num_test_samples:]:
        npz_file = npz_file_dir + '/' + sample
        with np.load(npz_file) as data:
            for key in data.keys():
                test_samples.append(data[key])
    for sample in norm_select_samps[-num_test_samples:]:
        npz_file = npz_file_dir + '/' + sample
        with np.load(npz_file) as data:
            for key in data.keys():
                test_samples.append(data[key])
    test_labels = np.concatenate((np.ones(int(len(test_samples)/2)),
                                  np.zeros(int(len(test_samples)/2))))

    return np.array(train_samples), train_labels, np.array(test_samples), \
        test_labels

if __name__ == '__main__':

    create_sample_dicts(crop_width=125)

    # random sample from particpants npz files to ensure class balance
    train_samples, train_labels, test_samples, \
        test_labels = rand_samp_train_test_split( "/content/drive/MyDrive/Colab_Notebooks/Major_project/Dataset/processed/")

    # save as npz locally
    print("Saving npz file locally...")
    np.savez( "/content/drive/MyDrive/Colab_Notebooks/Major_project/Dataset/processed/train_samples.npz", train_samples)
    np.savez( "/content/drive/MyDrive/Colab_Notebooks/Major_project/Dataset/processed/train_labels.npz", train_labels)
    np.savez( "/content/drive/MyDrive/Colab_Notebooks/Major_project/Dataset/processed/test_samples.npz", test_samples)
    np.savez( "/content/drive/MyDrive/Colab_Notebooks/Major_project/Dataset/processed/test_labels.npz", test_labels)


In [None]:
#plot_metrics.py
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc


"""
Plots of test/train accuracy, loss, ROC curve.
"""


def plot_accuracy(history, model_id):
    """
    Plots train and test accuracy for each epoch.
    """
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig('/content/drive/MyDrive/Colab_Notebooks/Major_project/models/images/cnn{}_accuracy.png'.format(model_id))
    plt.close()


def plot_loss(history, model_id):
    """
    Plots train and test loss for each epoch.
    """
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig('/content/drive/MyDrive/Colab_Notebooks/Major_project/models/images/cnn{}_loss.png'.format(model_id))
    plt.close()


def plot_roc_curve(y_test, y_score, model_id):

    fpr, tpr, _ = roc_curve(y_test, y_score)
    roc_auc = auc(fpr, tpr)
    plt.figure()
    plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.05])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic curve')
    plt.legend(loc="lower right")
    plt.savefig('/content/drive/MyDrive/Colab_Notebooks/Major_project/models/images/cnn{}_roc.png'.format(model_id))
    plt.close()


In [None]:
#cnn.py
from __future__ import print_function
import os
import numpy as np
from sklearn.metrics import confusion_matrix
#from plot_metrics import plot_accuracy, plot_loss, plot_roc_curve
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras import backend as K


np.random.seed(15)  # for reproducibility



def retrieve_from_drive(file):
    """
    Load spectrogram representation of matrices from the local file system.
    """
    if not os.path.exists(file):
        raise FileNotFoundError(f"File '{file}' does not exist.")

    X = np.load(file)
    return X


def preprocess(X_train, X_test):

    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')

    X_train = np.array([(X - X.min()) / (X.max() - X.min()) for X in X_train])
    X_test = np.array([(X - X.min()) / (X.max() - X.min()) for X in X_test])
    return X_train, X_test


def prep_train_test(X_train, y_train, X_test, y_test, nb_classes):

    print('Train on {} samples, validate on {}'.format(X_train.shape[0],
                                                       X_test.shape[0]))

    # normalize to dBfS
    X_train, X_test = preprocess(X_train, X_test)

    # Convert class vectors to binary class matrices
    Y_train = np_utils.to_categorical(y_train, nb_classes)
    Y_test = np_utils.to_categorical(y_test, nb_classes)

    return X_train, X_test, Y_train, Y_test


def keras_img_prep(X_train, X_test, img_dep, img_rows, img_cols):

    if K.image_data_format() == 'channels_first':
        X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
        X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
        input_shape = (1, img_rows, img_cols)
    else:
        X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
        X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
        input_shape = (img_rows, img_cols, 1)
    return X_train, X_test, input_shape


from tensorflow.keras import regularizers

def cnn(X_train, y_train, X_test, y_test, batch_size,
        nb_classes, epochs, input_shape):
    """
    The Convolutional Neural Net architecture for classifying the audio clips
    as normal (0) or depressed (1).
    """
    model = Sequential()

    model.add(Conv2D(32, (3, 3), padding='valid', strides=1,
                     input_shape=input_shape, activation='relu'))

    model.add(MaxPooling2D(pool_size=(4, 3), strides=(1, 3)))

    model.add(Conv2D(32, (1, 3), padding='valid', strides=1,
              input_shape=input_shape, activation='relu'))

    model.add(MaxPooling2D(pool_size=(1, 3), strides=(1, 3)))

    model.add(Flatten())
    model.add(Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(Dropout(0.5))

    model.add(Dense(nb_classes))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer='adadelta',
                  metrics=['accuracy'])

    history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs,
                        verbose=1, validation_data=(X_test, y_test))

    # Evaluate accuracy on test and train sets
    score_train = model.evaluate(X_train, y_train, verbose=1)
    print('Train accuracy:', score_train[1])
    score_test = model.evaluate(X_test, y_test, verbose=1)
    print('Test accuracy:', score_test[1])

    return model, history



def model_performance(model, X_train, X_test, y_train, y_test):
    """
    Evaluation metrics for network performance.
    """
    y_test_pred_proba = model.predict(X_test)
    y_test_pred = np.argmax(y_test_pred_proba, axis=1)

    y_train_pred_proba = model.predict(X_train)
    y_train_pred = np.argmax(y_train_pred_proba, axis=1)

    # Converting y_test back to 1-D array for confusion matrix computation
    y_test_1d = y_test[:, 1]

    # Computing confusion matrix for test dataset
    conf_matrix = standard_confusion_matrix(y_test_1d, y_test_pred)
    print("Confusion Matrix:")
    print(conf_matrix)

    return y_train_pred, y_test_pred, y_train_pred_proba, \
        y_test_pred_proba, conf_matrix


def standard_confusion_matrix(y_test, y_test_pred):

    [[tn, fp], [fn, tp]] = confusion_matrix(y_test, y_test_pred)
    return np.array([[tp, fp], [fn, tn]])




if __name__ == '__main__':
    model_id = input("Enter model id: ")

    print('Retrieving from drive...')
    X_train = retrieve_from_drive("/content/drive/MyDrive/Colab_Notebooks/Major_project/Dataset/processed/train_samples.npz")
    y_train = retrieve_from_drive("/content/drive/MyDrive/Colab_Notebooks/Major_project/Dataset/processed/train_labels.npz")
    X_test = retrieve_from_drive("/content/drive/MyDrive/Colab_Notebooks/Major_project/Dataset/processed/test_samples.npz")
    y_test = retrieve_from_drive("/content/drive/MyDrive/Colab_Notebooks/Major_project/Dataset/processed/test_labels.npz")

    X_train, y_train, X_test, y_test = \
        X_train['arr_0'], y_train['arr_0'], X_test['arr_0'], y_test['arr_0']

    # CNN parameters
    batch_size = 6
    nb_classes = 2
    epochs = 36

    # normalalize data and prep for Keras
    print('Processing images for Keras...')
    X_train, X_test, y_train, y_test = prep_train_test(X_train, y_train,
                                                       X_test, y_test,
                                                       nb_classes=nb_classes)

    # 513x125x1 for spectrogram with crop size of 125 pixels
    img_rows, img_cols, img_depth = X_train.shape[1], X_train.shape[2], 1

    # reshape image input for Keras
    # used Theano dim_ordering (th), (# chans, # images, # rows, # cols)
    X_train, X_test, input_shape = keras_img_prep(X_train, X_test, img_depth,
                                                  img_rows, img_cols)

    # run CNN
    print('Fitting model...')
    model, history = cnn(X_train, y_train, X_test, y_test, batch_size,
                         nb_classes, epochs, input_shape)

    # evaluate model
    print('Evaluating model...')
    y_train_pred, y_test_pred, y_train_pred_proba, y_test_pred_proba, \
        conf_matrix = model_performance(model, X_train, X_test, y_train, y_test)

    # save model to locally
    print('Saving model locally...')
    model_name = '/content/drive/MyDrive/Colab_Notebooks/Major_project/models/cnn_{}.h5'.format(model_id)
    model.save(model_name)

    # custom evaluation metrics
    print('Calculating additional test metrics...')
    accuracy = float(conf_matrix[0][0] + conf_matrix[1][1]) / np.sum(conf_matrix)
    precision = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[0][1])
    recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])
    f1_score = 2 * (precision * recall) / (precision + recall)
    print("Accuracy: {}".format(accuracy))
    print("Precision: {}".format(precision))
    print("Recall: {}".format(recall))
    print("F1-Score: {}".format(f1_score))

    # plot train/test loss and accuracy. saves files in working dir
    print('Saving plots...')
    plot_loss(history, model_id)
    plot_accuracy(history, model_id)
    plot_roc_curve(y_test[:, 1], y_test_pred_proba[:, 1], model_id)

