<a href="https://colab.research.google.com/github/tabaraei/depression-detection/blob/master/depression_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import wave
import numpy as np
import librosa
import tensorflow as tf
import LOUPE_Keras.loupe_keras as lpk

In [None]:
DATA_DIR = '/content/drive/MyDrive/Data/DepressionDetection/EATD-Corpus'
X_train, y_train = list(), list()
X_test, y_test = list(), list()

def load_wave(data_path):
    data_file = wave.open(data_path)
    sr = data_file.getframerate()
    nframes = data_file.getnframes()
    wave_data = np.frombuffer(data_file.readframes(nframes), dtype=np.short).astype(float)
    length = nframes / sr
    return data_file, wave_data, length, nframes, sr

def wav2mel(wave_data, sr):
    cluster_size = 16
    signal = wave_data
    melspec = librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=80).astype(np.float32).T
    melspec = np.log(np.maximum(1e-6, melspec))
    return melspec

for sample in os.listdir(DATA_DIR):
    positive_file, wave_data1, len1, nframes1, sr1 = load_wave(f'{DATA_DIR}/{sample}/positive_out.wav')
    neutral_file, wave_data2, len2, nframes2, sr2 = load_wave(f'{DATA_DIR}/{sample}/neutral_out.wav')
    negative_file, wave_data3, len3, nframes3, sr3 = load_wave(f'{DATA_DIR}/{sample}/negative_out.wav')

    with open(f'{DATA_DIR}/{sample}/new_label.txt') as label:
        target = float(label.readline())

    if wave_data1.shape[0] < 1:
        wave_data1 = np.array([1e-4]*sr1*5)
    if wave_data2.shape[0] < 1:
        wave_data2 = np.array([1e-4]*sr2*5)
    if wave_data3.shape[0] < 1:
        wave_data3 = np.array([1e-4]*sr3*5)

    audio_features = [
        wav2mel(wave_data1, sr1),
        wav2mel(wave_data2, sr2),
        wav2mel(wave_data3, sr3)
    ]
    # targets.append(1 if target >= 53 else 0)

    if sample.startswith('t'):
        # Training Data
        X_train.append(audio_features)
        y_train.append(target)
    else:
        # Test Data
        X_test.append(audio_features)
        y_test.append(target)
    break

print(X_test[0][0].shape)
print(X_test[0][1].shape)
print(X_test[0][2].shape)

(1068, 80)
(1201, 80)
(930, 80)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Define the CNN architecture
class CNN(nn.Module):
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))
        x = x.view(-1, 128 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define the data loaders and transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Initialize the model, loss function, and optimizer
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = CNN(num_classes=len(classes)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Training the model
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 2000 == 1999:  # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

# Testing the model
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 56997282.86it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
[1,  2000] loss: 2.111
[1,  4000] loss: 1.692
[1,  6000] loss: 1.494
[1,  8000] loss: 1.380
[1, 10000] loss: 1.318
[1, 12000] loss: 1.228
[2,  2000] loss: 1.105
[2,  4000] loss: 1.048
[2,  6000] loss: 1.006
[2,  8000] loss: 0.959
[2, 10000] loss: 0.945
[2, 12000] loss: 0.907
Finished Training
Accuracy of the network on the 10000 test images: 68 %
