In [1]:
import numpy as np
import os
import pickle
from utils import *

train_dir = 'FeaturesTrain'
# test_dir = 'FeatruesTest'


DATA = []
d = train_dir
for i in os.listdir(d):
    with open(os.path.join(d,i), 'rb') as f:
        DATA.append(pickle.load(f))



In [2]:
import torch
from utils import AudioData

device = torch.device('cuda:1')

In [12]:
from sklearn.model_selection import train_test_split
from utils import *


x_keys = ['mfccCoeffs', 'chromaCoeffs','contrastCoeffs', 'tonnetz','melspectCoeffs']

Y = np.c_[[emotion_labels[i['emotion']] for i in DATA]].flatten()
X = np.c_[[np.concatenate([standard_scale[j](i[j]) for j in x_keys]).reshape(1,-1) for i in DATA]]
x_train, x_val, y_train, y_val = train_test_split(X, Y, train_size=0.8, stratify=Y)


from imblearn.over_sampling import RandomOverSampler 

idxs = np.arange(len(x_train)).reshape(-1,1)
ros = RandomOverSampler(random_state=42)
rs_idx, y_idx = ros.fit_resample(idxs, y_train)
rs_idx = rs_idx.flatten()
x_train = x_train[rs_idx]
y_train = y_train[rs_idx]



In [13]:
from torch.utils.data import Dataset, DataLoader
AD_train = AudioData(x_train, y_train)
AD_val = AudioData(x_val, y_val)

trainloader = DataLoader(AD_train, batch_size=8, shuffle=True)
valloader = DataLoader(AD_val, batch_size=8, shuffle=True)



In [14]:
from models import ConvNet
import torch.nn as nn
CNN = ConvNet(X.shape[1]).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(CNN.parameters(), lr=1e-5, weight_decay=0.1)


In [15]:

early_stopping = EarlyStopping(patience=10)


n_epochs = 100

history = {'train_loss': [],
           'train_acc':[],
          'val_loss': [],
          'val_acc':[]}

for epoch in range(n_epochs):  # loop over the dataset multiple times
    correct = 0
    total = 0
    train_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()
        
#         inputs = torch.unsqueeze(inputs, 1)

        # forward + backward + optimize
        outputs = CNN(inputs.float())
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
#         train_loss += loss.item()        
#         predicted = outputs.argmax(1)
        
#         total += labels.size(0)
#         correct += (predicted == labels.argmax(1)).sum().item()
        
        
        train_loss += loss.item()*len(inputs)        
        predicted = outputs.argmax(1)
        
        total += labels.size(0)
        correct += (predicted == labels.argmax(1)).sum().item()
        
    train_loss = train_loss/total
    train_acc = correct/total
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
        
    # compute validation
    correct = 0
    total = 0
    val_loss = 0.0
    
    with torch.no_grad():
        for data in valloader:
            inputs, labels = data
            labels = labels.argmax(1)
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = CNN(inputs.float())
            _, predicted = torch.max(outputs.data, 1)

            loss = criterion(outputs, labels)
            val_loss += loss.item()*len(inputs)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = correct/total
    val_loss /= total
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    

    print(f'Epoch: {epoch}\ttrain loss: {train_loss:.4f}\ttrain acc: {train_acc:.4f}\tval loss: {val_loss:.4f}\tval acc: {val_acc:.4f}')

    early_stopping(val_loss)
    if early_stopping.early_stop:
        break

print('Finished Training')



Epoch: 0	train loss: 1.5900	train acc: 0.4346	val loss: 1.2776	val acc: 0.5792
Epoch: 1	train loss: 0.9042	train acc: 0.7393	val loss: 1.0980	val acc: 0.6375
Epoch: 2	train loss: 0.5191	train acc: 0.8896	val loss: 1.0140	val acc: 0.6667
Epoch: 3	train loss: 0.3145	train acc: 0.9688	val loss: 0.9098	val acc: 0.6958
Epoch: 4	train loss: 0.1876	train acc: 0.9902	val loss: 0.8651	val acc: 0.7125
Epoch: 5	train loss: 0.1501	train acc: 0.9902	val loss: 0.8574	val acc: 0.7250
Epoch: 6	train loss: 0.1171	train acc: 1.0000	val loss: 0.8013	val acc: 0.7167
Epoch: 7	train loss: 0.1143	train acc: 0.9980	val loss: 0.8478	val acc: 0.6917
INFO: Early stopping counter 1 of 10
Epoch: 8	train loss: 0.0891	train acc: 0.9990	val loss: 0.8217	val acc: 0.7125
INFO: Early stopping counter 2 of 10
Epoch: 9	train loss: 0.0822	train acc: 1.0000	val loss: 0.8207	val acc: 0.6958
INFO: Early stopping counter 3 of 10
Epoch: 10	train loss: 0.0831	train acc: 1.0000	val loss: 0.7552	val acc: 0.7375
Epoch: 11	train los

In [16]:

from sklearn.metrics import confusion_matrix

net = CNN

y_pred = []
y_true = []
correct = 0
total = 0

        
with torch.no_grad():
    for data in valloader:
        inputs, labels = data
        labels = labels.argmax(1)
        inputs = inputs.to(device).reshape(inputs.shape[0], -1).unsqueeze(1)
        outputs =  net.eval()(inputs.float())
        predicted = outputs.argmax(1).cpu()
        y_pred.append(predicted)
        y_true.append(labels)

y_true, y_pred = np.concatenate(y_true), np.concatenate(y_pred)

cm = confusion_matrix(y_true, y_pred)

print(cm)
print(cm/cm.sum(1).reshape(-1,1))

print(f'Val accuracy: {np.diag(cm).sum()/cm.sum()}')



[[27  0  2  0  1  2  0  0]
 [ 1 25  0  0  1  3  2  0]
 [ 1  0 23  3  1  0  3  1]
 [ 0  0  2 21  3  1  4  1]
 [ 2  1  0  1 24  2  0  2]
 [ 0  1  0  0  0 14  1  0]
 [ 0  3  0  2  2  5 19  1]
 [ 1  0  0  5  1  3  0 22]]
[[0.84375 0.      0.0625  0.      0.03125 0.0625  0.      0.     ]
 [0.03125 0.78125 0.      0.      0.03125 0.09375 0.0625  0.     ]
 [0.03125 0.      0.71875 0.09375 0.03125 0.      0.09375 0.03125]
 [0.      0.      0.0625  0.65625 0.09375 0.03125 0.125   0.03125]
 [0.0625  0.03125 0.      0.03125 0.75    0.0625  0.      0.0625 ]
 [0.      0.0625  0.      0.      0.      0.875   0.0625  0.     ]
 [0.      0.09375 0.      0.0625  0.0625  0.15625 0.59375 0.03125]
 [0.03125 0.      0.      0.15625 0.03125 0.09375 0.      0.6875 ]]
Val accuracy: 0.7291666666666666


In [17]:
torch.save(net.cpu().state_dict(), 'models/cnn.pt')