In [1]:
from utils import *


In [2]:
import numpy as np
import os
import pickle

train_dir = 'FeaturesTrain'
# test_dir = 'FeatruesTest'


DATA = []
d = train_dir
for i in os.listdir(d):
    with open(os.path.join(d,i), 'rb') as f:
        DATA.append(pickle.load(f))

In [3]:
import torch
device = torch.device('cuda:1')

In [4]:
from models import NN

In [5]:
from sklearn.model_selection import train_test_split

# x_keys = ['mfccCoeffs', 'chromaCoeffs', 'melspectCoeffs', 'contrastCoeffs', 'tonnetz']



x_keys = ['mfccCoeffs', 'chromaCoeffs','contrastCoeffs', 'tonnetz','melspectCoeffs']


Y = np.c_[[emotion_labels[i['emotion']] for i in DATA]].flatten()
X = np.c_[[np.concatenate([standard_scale[j](i[j]) for j in x_keys]).flatten() for i in DATA]]
x_train, x_val, y_train, y_val = train_test_split(X, Y, train_size=0.8, stratify=Y)


from imblearn.over_sampling import RandomOverSampler 

idxs = np.arange(len(x_train)).reshape(-1,1)
ros = RandomOverSampler(random_state=42)
rs_idx, y_idx = ros.fit_resample(idxs, y_train)
rs_idx = rs_idx.flatten()
x_train = x_train[rs_idx]
y_train = y_train[rs_idx]


In [6]:
from torch.utils.data import Dataset, DataLoader
AD_train = AudioData(x_train, y_train)
AD_val = AudioData(x_val, y_val)

trainloader = DataLoader(AD_train, batch_size=8, shuffle=True)
valloader = DataLoader(AD_val, batch_size=8, shuffle=True)



In [8]:
import torch.nn as nn

net = NN(X.shape[1]).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-5, weight_decay=0.1)

In [9]:
from utils import EarlyStopping
early_stopping = EarlyStopping(patience=10)


n_epochs = 100

history = {'train_loss': [],
           'train_acc':[],
          'val_loss': [],
          'val_acc':[]}

net = net

for epoch in range(n_epochs):  # loop over the dataset multiple times
    correct = 0
    total = 0
    train_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()
        
#         inputs = torch.unsqueeze(inputs, 1)

        # forward + backward + optimize
        outputs = net(inputs.float())
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
#         train_loss += loss.item()        
#         predicted = outputs.argmax(1)
        
#         total += labels.size(0)
#         correct += (predicted == labels.argmax(1)).sum().item()
        
        
        train_loss += loss.item()*len(inputs)        
        predicted = outputs.argmax(1)
        
        total += labels.size(0)
        correct += (predicted == labels.argmax(1)).sum().item()
        
    train_loss = train_loss/total
    train_acc = correct/total
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
        
    # compute validation
    correct = 0
    total = 0
    val_loss = 0.0
    
    with torch.no_grad():
        for data in valloader:
            inputs, labels = data
            labels = labels.argmax(1)
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = net(inputs.float())
            _, predicted = torch.max(outputs.data, 1)

            loss = criterion(outputs, labels)
            val_loss += loss.item()*len(inputs)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = correct/total
    val_loss /= total
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    

    print(f'Epoch: {epoch}\ttrain loss: {train_loss:.4f}\ttrain acc: {train_acc:.4f}\tval loss: {val_loss:.4f}\tval acc: {val_acc:.4f}')

    early_stopping(val_loss)
    if early_stopping.early_stop:
        break

print('Finished Training')



Epoch: 0	train loss: 1.9840	train acc: 0.3037	val loss: 1.9343	val acc: 0.3458
Epoch: 1	train loss: 1.8630	train acc: 0.4521	val loss: 1.8678	val acc: 0.4083
Epoch: 2	train loss: 1.7567	train acc: 0.5791	val loss: 1.8338	val acc: 0.4167
Epoch: 3	train loss: 1.6440	train acc: 0.6533	val loss: 1.7389	val acc: 0.4750
Epoch: 4	train loss: 1.5209	train acc: 0.7080	val loss: 1.6701	val acc: 0.5458
Epoch: 5	train loss: 1.3871	train acc: 0.7559	val loss: 1.5886	val acc: 0.5500
Epoch: 6	train loss: 1.2196	train acc: 0.8213	val loss: 1.5232	val acc: 0.5500
Epoch: 7	train loss: 1.0718	train acc: 0.8633	val loss: 1.4924	val acc: 0.5250
Epoch: 8	train loss: 0.9147	train acc: 0.9033	val loss: 1.4013	val acc: 0.5542
Epoch: 9	train loss: 0.7909	train acc: 0.9336	val loss: 1.3844	val acc: 0.5542
Epoch: 10	train loss: 0.6720	train acc: 0.9414	val loss: 1.3920	val acc: 0.5458
INFO: Early stopping counter 1 of 10
Epoch: 11	train loss: 0.5871	train acc: 0.9580	val loss: 1.3448	val acc: 0.5625
Epoch: 12	tra

In [10]:

from sklearn.metrics import confusion_matrix

net = net

y_pred = []
y_true = []
correct = 0
total = 0
with torch.no_grad():
    for data in valloader:
        inputs, labels = data
        labels = labels.argmax(1)
        inputs = inputs.to(device)
        outputs =  net.eval()(inputs.float())
        predicted = outputs.argmax(1).cpu()
        y_pred.append(predicted)
        y_true.append(labels)

y_true, y_pred = np.concatenate(y_true), np.concatenate(y_pred)

cm = confusion_matrix(y_true, y_pred)

print(cm)
print(cm/cm.sum(1).reshape(-1,1))

print(f'Val accuracy: {np.diag(cm).sum()/cm.sum()}')


[[25  0  0  1  4  1  1  0]
 [ 0 21  3  0  1  2  3  2]
 [ 3  0 23  2  0  2  1  1]
 [ 2  0  0 23  1  0  1  5]
 [ 2  1  0  7 18  1  2  1]
 [ 0  5  0  0  3  7  0  1]
 [ 0  4  3  0  7  3 13  2]
 [ 0  0  6  4  5  0  0 17]]
[[0.78125 0.      0.      0.03125 0.125   0.03125 0.03125 0.     ]
 [0.      0.65625 0.09375 0.      0.03125 0.0625  0.09375 0.0625 ]
 [0.09375 0.      0.71875 0.0625  0.      0.0625  0.03125 0.03125]
 [0.0625  0.      0.      0.71875 0.03125 0.      0.03125 0.15625]
 [0.0625  0.03125 0.      0.21875 0.5625  0.03125 0.0625  0.03125]
 [0.      0.3125  0.      0.      0.1875  0.4375  0.      0.0625 ]
 [0.      0.125   0.09375 0.      0.21875 0.09375 0.40625 0.0625 ]
 [0.      0.      0.1875  0.125   0.15625 0.      0.      0.53125]]
Val accuracy: 0.6125


In [11]:
torch.save(net.cpu().state_dict(), 'models/nn.pt')