In [1]:
from utils import *
import numpy as np
import os
import pickle

train_dir = 'FeaturesTrain'
# test_dir = 'FeatruesTest'


DATA = []
d = train_dir
for i in os.listdir(d):
    with open(os.path.join(d,i), 'rb') as f:
        DATA.append(pickle.load(f))

In [2]:
import torch
device = torch.device('cuda:1')

In [3]:
import torch.nn as nn
from sklearn.model_selection import train_test_split

class BiRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BiRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.gru = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
#         self.bn = nn.BatchNorm1d(hidden_size*2)
        self.fc1 = nn.Linear(hidden_size*2, hidden_size*2)
        self.fc2 = nn.Linear(hidden_size*2, num_classes)  # 2 for bidirection
        
        
        
    def forward(self, x):
        # Set initial states
        h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device) # 2 for bidirection 
#         c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device)
        
        # Forward propagate GRU
#         out, _ = self.gru(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size*2)
        out, _ = self.gru(x)
        # Decode the hidden state of the last time step
#         out = self.bn(out[:, -1, :])
        out = nn.ReLU()(self.fc1(out[:, -1, :]))
        out = self.fc2(out)
        return out

    

In [4]:
from sklearn.model_selection import train_test_split

# x_keys = ['mfccCoeffs', 'chromaCoeffs', 'melspectCoeffs', 'contrastCoeffs', 'tonnetz']



x_keys = ['mfccCoeffs', 'chromaCoeffs','contrastCoeffs', 'tonnetz', 'melspectCoeffs']


Y = np.c_[[emotion_labels[i['emotion']] for i in DATA]].flatten()
X = np.c_[[np.concatenate([standard_scale[j](i[j]) for j in x_keys]).T for i in DATA]]
x_train, x_val, y_train, y_val = train_test_split(X, Y, train_size=0.8, stratify=Y)


from imblearn.over_sampling import RandomOverSampler 

idxs = np.arange(len(x_train)).reshape(-1,1)
ros = RandomOverSampler(random_state=42)
rs_idx, y_idx = ros.fit_resample(idxs, y_train)
rs_idx = rs_idx.flatten()
x_train = x_train[rs_idx]
y_train = y_train[rs_idx]

In [5]:
from torch.utils.data import Dataset, DataLoader
AD_all = AudioData(X, Y)
AD_train = AudioData(x_train, y_train)
AD_val = AudioData(x_val, y_val)


dataloader = DataLoader(AD_all, batch_size=32, shuffle=True)
trainloader = DataLoader(AD_train, batch_size=32, shuffle=True)
valloader = DataLoader(AD_val, batch_size=32, shuffle=True)


In [6]:
from models import BiRNN

hidden_size = 128
input_size = X.shape[2]
num_classes = 8
num_layers = 1



net = BiRNN(X.shape[2], hidden_size, num_layers, num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-4)



In [7]:
from utils import EarlyStopping
early_stopping = EarlyStopping(patience=10, reset_patience=3)


n_epochs = 100

history = {'train_loss': [],
           'train_acc':[],
          'val_loss': [],
          'val_acc':[]}

net = net

for epoch in range(n_epochs):  # loop over the dataset multiple times
    correct = 0
    total = 0
    train_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = inputs.to(device).float()
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()
        
#         inputs = torch.unsqueeze(inputs, 1)

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()*len(inputs)        
        predicted = outputs.argmax(1)
        
        total += labels.size(0)
        correct += (predicted == labels.argmax(1)).sum().item()
#         print(net.gru.weight_hh_l1.grad.max())

#     print(net.gru.weight_ih_l2.grad)
        
    train_loss = train_loss/total
    train_acc = correct/total
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
        
    # compute validation
    correct = 0
    total = 0
    val_loss = 0.0
    
    with torch.no_grad():
        for data in valloader:
            inputs, labels = data
            
            inputs = inputs.to(device).float()
            labels = labels.to(device)
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)

            loss = criterion(outputs, labels)
            
            labels = labels.argmax(1)
            val_loss += loss.item()*len(inputs)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = correct/total
    val_loss /= total
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    

    print(f'Epoch: {epoch}\ttrain loss: {train_loss:.4f}\ttrain acc: {train_acc:.4f}\tval loss: {val_loss:.4f}\tval acc: {val_acc:.4f}')

    early_stopping(val_loss)
    if early_stopping.early_stop:
        break

print('Finished Training')



Epoch: 0	train loss: 2.0778	train acc: 0.1191	val loss: 2.0691	val acc: 0.1750
Epoch: 1	train loss: 2.0521	train acc: 0.2227	val loss: 2.0222	val acc: 0.2750
Epoch: 2	train loss: 1.9470	train acc: 0.2578	val loss: 1.8924	val acc: 0.2417
Epoch: 3	train loss: 1.8121	train acc: 0.2949	val loss: 1.8243	val acc: 0.2583
Epoch: 4	train loss: 1.7417	train acc: 0.3408	val loss: 1.7522	val acc: 0.2958
Epoch: 5	train loss: 1.7257	train acc: 0.3320	val loss: 1.7621	val acc: 0.3292
INFO: Early stopping counter 1 of 10
Epoch: 6	train loss: 1.6523	train acc: 0.3672	val loss: 1.7056	val acc: 0.3458
Epoch: 7	train loss: 1.6184	train acc: 0.3750	val loss: 1.7138	val acc: 0.3417
INFO: Early stopping counter 2 of 10
Epoch: 8	train loss: 1.5930	train acc: 0.3887	val loss: 1.7052	val acc: 0.3792
Epoch: 9	train loss: 1.5542	train acc: 0.4102	val loss: 1.6091	val acc: 0.3792
Epoch: 10	train loss: 1.5403	train acc: 0.4141	val loss: 1.7343	val acc: 0.3875
INFO: Early stopping counter 3 of 10
Epoch: 11	train los

In [8]:
from sklearn.metrics import confusion_matrix

net = net

y_pred = []
y_true = []
correct = 0
total = 0
with torch.no_grad():
    for data in valloader:
        inputs, labels = data
        labels = labels.argmax(1)
        inputs = inputs.to(device)
        outputs =  net.eval()(inputs.float())
        predicted = outputs.argmax(1).cpu()
        y_pred.append(predicted)
        y_true.append(labels)

y_true, y_pred = np.concatenate(y_true), np.concatenate(y_pred)

cm = confusion_matrix(y_true, y_pred)

print(cm)
print(cm/cm.sum(1).reshape(-1,1))

print(f'Val accuracy: {np.diag(cm).sum()/cm.sum()}')

[[22  0  3  2  1  0  1  3]
 [ 0 16  0  1  0  2 12  1]
 [ 2  1 18  2  1  0  1  7]
 [ 1  0  0 18  3  2  2  6]
 [ 2  1  2 10  5  1  5  6]
 [ 0  2  0  0  0  6  4  4]
 [ 4  4  0  6  1  4  9  4]
 [ 1  0  1  4  2  0  2 22]]
[[0.6875  0.      0.09375 0.0625  0.03125 0.      0.03125 0.09375]
 [0.      0.5     0.      0.03125 0.      0.0625  0.375   0.03125]
 [0.0625  0.03125 0.5625  0.0625  0.03125 0.      0.03125 0.21875]
 [0.03125 0.      0.      0.5625  0.09375 0.0625  0.0625  0.1875 ]
 [0.0625  0.03125 0.0625  0.3125  0.15625 0.03125 0.15625 0.1875 ]
 [0.      0.125   0.      0.      0.      0.375   0.25    0.25   ]
 [0.125   0.125   0.      0.1875  0.03125 0.125   0.28125 0.125  ]
 [0.03125 0.      0.03125 0.125   0.0625  0.      0.0625  0.6875 ]]
Val accuracy: 0.48333333333333334


In [9]:
torch.save(net.cpu().state_dict(), 'models/rnn.pt')

In [10]:
net

BiRNN(
  (lstm1): LSTM(173, 128, batch_first=True, bidirectional=True)
  (lstm2): LSTM(256, 256, batch_first=True, bidirectional=True)
  (fc1): Linear(in_features=512, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=8, bias=True)
)