In [1]:
# Format Data for CNN and NN

import numpy as np
import os
import pickle
from utils import *
import torch

train_dir = 'FeaturesTrain' # samples used for training and validation
# test_dir = 'FeatruesTest'


DATA = []
d = train_dir
for i in os.listdir(d):
    with open(os.path.join(d,i), 'rb') as f:
        DATA.append(pickle.load(f))

x_keys = ['mfccCoeffs', 'chromaCoeffs','contrastCoeffs', 'tonnetz','melspectCoeffs']
Y = np.c_[[emotion_labels[i['emotion']] for i in DATA]].flatten()
X = np.c_[[np.concatenate([standard_scale[j](i[j]) for j in x_keys]).reshape(1,-1) for i in DATA]]

AD_data = AudioData(X, Y)
dataloader = DataLoader(AD_data, batch_size=8, shuffle=True)


device = torch.device('cpu')

# Logistic Regression

In [2]:
import pickle

with open('models/LogisticRegression.pkl', 'rb') as f:
    LR_pkl = pickle.load(f)
    
x_keys = LR_pkl['keys']
LR = LR_pkl['model']

Y = np.c_[[emotion_labels[i['emotion']] for i in DATA]].flatten()
X = np.c_[[np.concatenate([standard_scale[j](i[j]) for j in x_keys]).flatten() for i in DATA]]
# x_train, x_val, y_train, y_val = train_test_split(X, Y, train_size=0.8, stratify=Y)

In [3]:
x_keys

array(['mfccCoeffs', 'chromaCoeffs', 'melspectCoeffs'], dtype='<U14')

In [4]:
LR.score(X, Y)

0.9866666666666667

## NN

In [5]:
from models import NN
import torch

nn = NN(141860)
nn.load_state_dict(torch.load('models/nn.pt'))
nn.eval()


NN(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=141860, out_features=512, bias=True)
    (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=512, out_features=512, bias=True)
    (4): ReLU()
    (5): Linear(in_features=512, out_features=512, bias=True)
    (6): ReLU()
    (7): Linear(in_features=512, out_features=8, bias=True)
  )
)

In [6]:
from utils import confusionMatrix

confusionMatrix(nn, dataloader)

array([[153,   0,   0,   1,   4,   1,   1,   0],
       [  0, 149,   3,   0,   1,   2,   3,   2],
       [  3,   0, 151,   2,   0,   2,   1,   1],
       [  2,   0,   0, 151,   1,   0,   1,   5],
       [  2,   2,   0,   7, 145,   1,   2,   1],
       [  0,   5,   0,   0,   3,  71,   0,   1],
       [  0,   4,   3,   0,   7,   3, 141,   2],
       [  0,   0,   6,   4,   5,   0,   0, 145]])

## CNN

In [7]:
from models import ConvNet
import torch


cnn = ConvNet(173)
cnn.load_state_dict(torch.load('models/cnn.pt'))
cnn.eval()

ConvNet(
  (convnet): Sequential(
    (0): Conv1d(1, 128, kernel_size=(5,), stride=(1,), padding=same)
    (1): ReLU()
    (2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (3): Conv1d(128, 128, kernel_size=(5,), stride=(1,), padding=same)
    (4): ReLU()
    (5): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (6): Conv1d(128, 128, kernel_size=(5,), stride=(1,), padding=same)
    (7): ReLU()
    (8): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (9): Flatten(start_dim=1, end_dim=-1)
    (10): Linear(in_features=283648, out_features=512, bias=True)
    (11): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU()
    (13): Linear(in_features=512, out_features=8, bias=True)
  )
)

In [8]:
from utils import confusionMatrix

confusionMatrix(cnn, dataloader)

array([[155,   0,   2,   0,   1,   2,   0,   0],
       [  1, 153,   0,   0,   1,   3,   2,   0],
       [  1,   0, 151,   3,   1,   0,   3,   1],
       [  0,   0,   2, 149,   3,   1,   4,   1],
       [  2,   1,   0,   1, 153,   2,   0,   1],
       [  0,   1,   0,   0,   0,  78,   1,   0],
       [  0,   3,   0,   2,   2,   5, 147,   1],
       [  1,   0,   0,   5,   1,   3,   0, 150]])

## RNN

In [9]:
from models import BiRNN
import torch

hidden_size = 128
num_classes = 8
num_layers = 1
input_size = 173

rnn = BiRNN(input_size, hidden_size, num_layers, num_classes)
rnn.load_state_dict(torch.load('models/rnn.pt'))
rnn.eval()

BiRNN(
  (lstm1): LSTM(173, 128, batch_first=True, bidirectional=True)
  (lstm2): LSTM(256, 256, batch_first=True, bidirectional=True)
  (fc1): Linear(in_features=512, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=8, bias=True)
)

In [10]:
x_keys = ['mfccCoeffs', 'chromaCoeffs','contrastCoeffs', 'tonnetz','melspectCoeffs']
X = np.c_[[np.concatenate([standard_scale[j](i[j]) for j in x_keys]).T for i in DATA]]

AD_data_rnn = AudioData(X,Y)
dataloader_rnn = DataLoader(AD_data_rnn, batch_size=8, shuffle=True)


In [11]:
confusionMatrix(rnn, dataloader_rnn)

array([[111,   2,  15,   9,   5,   0,   5,  13],
       [  0,  93,   3,   1,   1,  27,  33,   2],
       [ 18,   4, 101,   4,   2,   6,   9,  16],
       [  3,   0,   0, 109,   9,   5,  11,  23],
       [ 13,   2,   5,  48,  37,   4,  14,  37],
       [  0,  12,   0,   0,   4,  37,  18,   9],
       [  8,  16,   3,  18,   9,  11,  71,  24],
       [  8,   0,   7,  16,   9,   9,   5, 106]])