In [1]:
import os
import glob
import pickle
import logging
import sklearn
import librosa
import itertools
import numpy as np
import pandas as pd
from PIL import Image
import librosa.display
import tensorflow as tf
from tensorflow import keras
from scipy.io.wavfile import write
from matplotlib import pyplot as plt
from keras.utils import plot_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
SAMPLR_RATE = 16000
!pip install h5py
import h5py



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
def read_transcript(all_files):
  print("ALLL Files: ",all_files)
  all_transcripts = {}
  for filename in all_files:
    id = int(filename.split("/")[-1].split("_")[0])
    transcript = pd.read_csv(filename, sep='\t')
    participants_lines = transcript[transcript.speaker == "Participant"][["start_time", "stop_time"]]
    all_transcripts[id] = participants_lines
  print("All transcripts keys: ",all_transcripts.keys())
  return all_transcripts

In [4]:
def exctract_participant_lines(all_files, all_transcripts):
  all_participants_audios = {}
  for filename in all_files:
    a_participant_audio = []
    id = int(filename.split("/")[-1].split("_")[0])
    audio, _ = librosa.load(filename, sr = SAMPLR_RATE)
    transcript = all_transcripts[id]
    for i in range(len(transcript)):
      audio_start_time = round(transcript.iloc[i].start_time * SAMPLR_RATE)
      audio_stop_time = round(transcript.iloc[i].stop_time * SAMPLR_RATE)
      a_participant_audio.append(audio[audio_start_time:audio_stop_time])

    # a_participant_audio = np.array(a_participant_audio, dtype=object)
    # b_participant_audio = np.array(a_participant_audio, dtype=object)
    a_participant_audio = np.hstack(a_participant_audio)
    all_participants_audios[id] = a_participant_audio
  print("all_participants_audios keys: ",all_participants_audios.keys())
  return all_participants_audios

In [5]:
# sliding window 10 sec
from scipy import stats

def slidingWindow(X,window_size=32, step=10):
    windowed_mels = []
    for i in range(0, len(X) - window_size, step):
      x = X[i:(i + window_size)]
      windowed_mels.append(x)
    return np.array(windowed_mels)

In [6]:
# spectogram code
# widowns_size = 2 * SAMPLR_RATE
# step_size = 2 * SAMPLR_RATE
# def split_audios(audio_dict):
#   not_dep_participants_mels = {}

#   for key in audio_dict.keys():
#     mels = []
#     not_dep_participants_mels = {}
#     filename = key
#     # print(f"{filename} : Started")
#     audio = audio_dict[key]
#     audios_part = slidingWindow(audio, widowns_size, step_size)
#     # print('Audio part',audios_part)
#     # print(f"{filename} : Splited")

#     for i in range(len(audios_part)):
#       mel_spectogram = librosa.feature.melspectrogram(y=audios_part[i], sr=SAMPLR_RATE, n_fft=400, hop_length=160, win_length=400, window='hamming', n_mels = 32).T
#       # print('mel_spectogram',mel_spectogram)
#       mels.append(mel_spectogram)
#     mels = np.array(mels)

#     not_dep_participants_mels[key] = mels

#     # print(f"{filename} : Done")
#   # print(f"All_Audios : Done")
#   print("not_dep_participants_mels: ",not_dep_participants_mels.keys())
#   return not_dep_participants_mels


# spectogram code

widowns_size = 2 * SAMPLR_RATE
step_size = 2 * SAMPLR_RATE
def split_audios(audio_dict):
  not_dep_participants_mels = {}
  for key in audio_dict.keys():
    mels = []
    filename = key
    print(f"{filename} : Started")
    audio = audio_dict[key]
    audios_part = slidingWindow(audio, widowns_size, step_size)
    # print('Audio part',audios_part)
    print(f"{filename} : Splited")

    for i in range(len(audios_part)):
      mel_spectogram = librosa.feature.melspectrogram(y=audios_part[i], sr=SAMPLR_RATE,
                                      n_fft=400, hop_length=160, win_length=400, window='hamming', n_mels = 32).T
      # print('mel_spectogram',mel_spectogram)
      mels.append(mel_spectogram)
    mels = np.array(mels)

    not_dep_participants_mels[key] = mels

    print(f"{filename} : Done")
  print(f"All_Audios : Done")
  return not_dep_participants_mels

In [7]:
model = keras.models.Sequential([
  keras.layers.Input((201, 32)),
  keras.layers.Reshape((201, 32, 1)),
  keras.layers.Conv2D(1, (1, 3), activation="relu", padding="same"),
  keras.layers.MaxPooling2D(2),
  keras.layers.Conv2D(1, (3, 1), activation="relu", padding="same"),
  keras.layers.MaxPooling2D(2),
  keras.layers.Conv2D(1, (1, 3), activation="relu", padding="same"),
  keras.layers.MaxPooling2D(2),
  keras.layers.Flatten(),
  keras.layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [8]:
from sklearn.utils.class_weight import compute_class_weight
X_train = []
Y_train = []
for chunk in pd.read_csv('/content/drive/My Drive/Dataset/train_split_Depression_AVEC2017.csv', chunksize=40, delimiter=',', encoding='utf-8'):
  train = np.array(chunk)[:, 0:2]
  all_text_files = []
  all_audio_files = []
  for row in train:
    all_text_files.append('/content/drive/My Drive/Dataset/'+str(int(row[0]))+ '_TRANSCRIPT.csv')
    all_audio_files.append('/content/drive/My Drive/Dataset/'+str(int(row[0]))+ '_AUDIO.wav')

  all_transcripts = read_transcript(all_text_files)
  all_participants_audios =exctract_participant_lines(all_audio_files, all_transcripts)
  participants_mels = split_audios(all_participants_audios)
  print(participants_mels)
  for row in train:
    key = int(row[0])
    binary_value = int(row[1])
    print('key',key, 'bin_value', binary_value)
    # Get the corresponding 3D array from the dictionary
    array_3d = participants_mels.get(key)
    if array_3d is not None:
      n = array_3d.shape[0]  # Get the number of frames for this key
      print('n', n)
      frames = array_3d.reshape((-1, 201, 32))  # Flatten 3D array into 2D frames
      X_train.append(frames)  # Append frames to X_train
      # Y_train.extend(np.array(([binary_value] * n)))  # Append binary value n times to y_train
      if binary_value==0:
        Y_train.append(np.zeros((n,1)))
      else:
        Y_train.append(np.ones((n,1)))
    else:
      print("Here--------------------------------------------------------------------")

  # Convert lists to NumPy arrays
X_train = np.vstack(X_train)
Y_train = np.vstack(Y_train)
print('xtrainshape',X_train.shape)
print('ytrainshape',Y_train.shape)
print('ytrain', Y_train)
  # model.modelFit(X_train,Y_train, 10)
  # model.fit(X_train,Y_train, 10)

# Calculate class weights
# class_weights = compute_class_weight('balanced', np.unique(Y_train), Y_train)
# Compile the model with class weights
# class_weights = {0: 1, 1: 2}

# Define sample weights based on class weights
# sample_weights = np.array([class_weights[label] for label in Y_train])



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
        [5.89274510e-04, 1.72879777e-06, 4.78295760e-07, ...,
         1.49651473e-08, 1.21424675e-08, 7.16175563e-09],
        ...,
        [1.69684775e-02, 1.79635175e-02, 2.19688658e-02, ...,
         9.37647997e-07, 2.01938178e-06, 4.04129338e-07],
        [2.45662909e-02, 2.19699573e-02, 2.50500906e-02, ...,
         1.37340101e-06, 1.77179982e-06, 5.90692764e-07],
        [6.55988278e-03, 1.50809688e-02, 1.46909263e-02, ...,
         9.34280433e-06, 4.97941710e-06, 4.51750248e-06]],

       [[8.18617642e-03, 1.41209373e-02, 1.18848430e-02, ...,
         5.75495778e-06, 5.03507727e-06, 4.50588050e-06],
        [2.01415289e-02, 2.56878864e-02, 2.80270223e-02, ...,
         3.22127107e-06, 1.81160192e-06, 1.18256958e-06],
        [3.27633061e-02, 2.44755987e-02, 1.32755581e-02, ...,
         2.83566146e-06, 2.94411825e-06, 1.76755589e-06],
        ...,
        [1.39051797e-02, 3.02765761e-02, 1.73143353e-02, ...,
     

In [9]:
num_zeros = 0
num_ones = 0

# Iterate through the array
for label in Y_train:
    if label == 0:
        num_zeros += 1
    elif label == 1:
        num_ones += 1
print(1)
print("Number of zeros:", num_zeros)
print("Number of ones:", num_ones)
total_samples = len(Y_train)
weight_zero = total_samples / (1.0*num_zeros)
weight_one = total_samples / (1.0* num_ones)

class_weights = {0: weight_zero, 1: weight_one}

# Train the model with sample weights
model.fit(X_train, Y_train, epochs=5, batch_size=32, class_weight = class_weights)


1
Number of zeros: 16621
Number of ones: 6516
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7ed216a5c2e0>

In [10]:
# using 'a' variable for length
# train = np.array(pd.read_csv('/content/drive/My Drive/Dataset/train_split_Depression_AVEC2017.csv',delimiter=',',encoding='utf-8'))[:, 0:2]
# for a in range(0, len(train), len(train)//5):
#   X_train = []
#   Y_train = []
#   all_text_files = []
#   all_audio_files = []
#   for i in range(a, min(a+len(train)//5, len(train))):
#     all_text_files.append('/content/drive/My Drive/Dataset/'+str(int(train[i][0]))+ "_TRANSCRIPT.wav")
#     all_audio_files.append('/content/drive/My Drive/Dataset/'+str(int(train[i][0]))+ "_AUDIO.wav")

#   all_transcripts = read_transcript(all_text_files)
#   all_participants_audios =exctract_participant_lines(all_audio_files, all_transcripts)
#   participants_mels = split_audios(all_participants_audios)

#   for row in range(a, min(a+len(train)//5, len(train)))
#       key = int(row[0])
#       binary_value = row[1]
#       # Get the corresponding 3D array from the dictionary
#       array_3d = participants_mels.get(key)
#       if array_3d is not None:
#           n = array_3d.shape[0]  # Get the number of frames for this key
#           frames = array_3d.reshape((-1, 201, 32))  # Flatten 3D array into 2D frames
#           X_train.extend(frames)  # Append frames to X_train
#           Y_train.extend([binary_value] * n)  # Append binary value n times to y_train

#   # Convert lists to NumPy arrays
#   X_train = np.array(X_train)
#   Y_train = np.array(Y_train)
#   model.modelFit(X_train,Y_train, 10)

In [11]:
for key in all_participants_audios.keys():
  print(key)

441
443
444
445
446
447
448
449
454
455
456
457
459
463
464
468
471
473
474
475
478
479
485
486
487
488
491


In [12]:
# testing
test = np.array(pd.read_csv('/content/drive/My Drive/Dataset/test_split_Depression_AVEC2017.csv',delimiter=',',encoding='utf-8'))[:, 0:2]
X_test = []
Y_test = []
test_all_text_files = []
test_all_audio_files = []
for row in test:
  test_all_text_files.append('/content/drive/My Drive/Dataset/'+str(int(row[0]))+ "_TRANSCRIPT.csv")
  test_all_audio_files.append('/content/drive/My Drive/Dataset/'+str(int(row[0]))+ "_AUDIO.wav")

test_all_transcripts = read_transcript(test_all_text_files)
test_all_participants_audios =exctract_participant_lines(test_all_audio_files, test_all_transcripts)
test_participants_mels = split_audios(test_all_participants_audios)

for row in test:
  key = int(row[0])
  binary_value = int(row[1])
  # Get the corresponding 3D array from the dictionary
  array_3d = test_participants_mels.get(key)
  if array_3d is not None:
      n = array_3d.shape[0]  # Get the number of frames for this key
      frames = array_3d.reshape((-1, 201, 32))  # Flatten 3D array into 2D frames
      X_test.append(frames)  # Append frames to X_train
      # Y_test.extend([binary_value] * n)  # Append binary value n times to y_train
      if binary_value==0:
        Y_test.append(np.zeros((n,1)))
      else:
        Y_test.append(np.ones((n,1)))
  else:
    print("here in else")
# Convert lists to NumPy arrays
X_test = np.vstack(X_test)
Y_test = np.vstack(Y_test)
print(X_test)
print(Y_test)

ALLL Files:  ['/content/drive/My Drive/Dataset/300_TRANSCRIPT.csv', '/content/drive/My Drive/Dataset/301_TRANSCRIPT.csv', '/content/drive/My Drive/Dataset/306_TRANSCRIPT.csv', '/content/drive/My Drive/Dataset/308_TRANSCRIPT.csv', '/content/drive/My Drive/Dataset/309_TRANSCRIPT.csv', '/content/drive/My Drive/Dataset/311_TRANSCRIPT.csv', '/content/drive/My Drive/Dataset/314_TRANSCRIPT.csv', '/content/drive/My Drive/Dataset/323_TRANSCRIPT.csv', '/content/drive/My Drive/Dataset/329_TRANSCRIPT.csv', '/content/drive/My Drive/Dataset/332_TRANSCRIPT.csv', '/content/drive/My Drive/Dataset/334_TRANSCRIPT.csv', '/content/drive/My Drive/Dataset/337_TRANSCRIPT.csv', '/content/drive/My Drive/Dataset/349_TRANSCRIPT.csv', '/content/drive/My Drive/Dataset/354_TRANSCRIPT.csv', '/content/drive/My Drive/Dataset/359_TRANSCRIPT.csv', '/content/drive/My Drive/Dataset/361_TRANSCRIPT.csv', '/content/drive/My Drive/Dataset/365_TRANSCRIPT.csv', '/content/drive/My Drive/Dataset/373_TRANSCRIPT.csv', '/content/driv

In [15]:
# prediction of cnn model
test_loss, test_accuracy = model.evaluate(X_test, Y_test)
print('Test Loss:', test_loss)
print('Test Accuracy:', test_accuracy)
ones = 0
zeros = 0
for label in Y_test:
  if(label == 0 ):
    zeros+=1
  else:
    ones+=1

print('zeros in test',zeros)
print('ones in test', ones)
from sklearn.metrics import classification_report
# Get predictions from the model
y_pred = model.predict(X_test)

# Convert probabilities to class labels
y_pred_classes = (y_pred > 0.6).astype(int)

# Compute precision, recall, and F1-score
report = classification_report(Y_test, y_pred_classes)

print(report)

Test Loss: 0.6942363977432251
Test Accuracy: 0.43011918663978577
zeros in test 6977
ones in test 5022
              precision    recall  f1-score   support

         0.0       0.58      1.00      0.74      6977
         1.0       0.00      0.00      0.00      5022

    accuracy                           0.58     11999
   macro avg       0.29      0.50      0.37     11999
weighted avg       0.34      0.58      0.43     11999



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
from sklearn.metrics import roc_curve, auc

# Assuming you have trained your model and obtained predicted probabilities
y_pred_proba = 1 / (1 + np.exp(-y_pred))

# Calculate ROC curve and AUC
fpr, tpr, thresholds = roc_curve(Y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)

# Find the threshold that maximizes the difference between TPR and FPR
optimal_idx = np.argmax(tpr - fpr)
optimal_threshold = thresholds[optimal_idx]

print("Optimal threshold:", optimal_threshold)


Optimal threshold: 1.6232352
