In [1]:
import numpy as np

from config import DATASET_PATH
from datasets import (Dataset, RAVDESSLabel, TESSLabel, 
                     EMOVOLabel, SAVEELabel, MFCCData, WAVData,
                     RAVDESSUnifiedLabel, TESSUnifiedLabel, SAVEEUnifiedLabel,
                     EMOVOUnifiedLabel)

from tools import add_margin, IndexPicker

from classifiers import Sequential

import torch

### Load Datasets

In [2]:
ravdess_path = DATASET_PATH.format(language="english", name="RAVDESS", form="mfcc")

ravdess_mfcc_unified = Dataset(ravdess_path, MFCCData(), RAVDESSUnifiedLabel())

In [3]:
tess_path = DATASET_PATH.format(language="english", name="TESS", form="mfcc")

tess_mfcc_unified = Dataset(tess_path, MFCCData(), TESSUnifiedLabel())

In [4]:
savee_path = DATASET_PATH.format(language="english", name="SAVEE", form="mfcc")

savee_mfcc_unified = Dataset(savee_path, MFCCData(), SAVEEUnifiedLabel())

In [5]:
emovo_path = DATASET_PATH.format(language="italian", name="EMOVO", form="mfcc")

emovo_mfcc_unified = Dataset(emovo_path, MFCCData(), EMOVOUnifiedLabel())

### Combine Datasets

In [6]:
ravdess_mfcc_unified.combine(savee_mfcc_unified, tess_mfcc_unified, emovo_mfcc_unified)

In [7]:
dataset = ravdess_mfcc_unified

In [8]:
samples = dataset.samples

In [9]:
X = np.array(list(samples['coefficients']))

y = np.array(list(samples['emotion']))

In [10]:
n_classes = len(np.unique(y))

In [11]:
y.shape

(1282163,)

In [12]:
X.shape

(1282163, 39)

## Mass Initialization

In [13]:
index_picker = IndexPicker(25, 25)  # 25 25

hidden_sizes = [128, 128, 128]

batch_size = 512

learning_rate = 0.0001

n_epochs = 30

### Cuda

In [14]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


### Add Margin

In [15]:
%time X_margined = np.array(add_margin(X, index_picker))

CPU times: user 4.68 s, sys: 679 ms, total: 5.36 s
Wall time: 5.41 s


In [16]:
X_margined.shape

(1282163, 3, 39)

### Reshape

In [17]:
n_samples,window_length,n_features = X_margined.shape
%time X_reshaped = np.array(np.reshape(X_margined, (n_samples, -1)))

CPU times: user 390 ms, sys: 244 ms, total: 634 ms
Wall time: 639 ms


In [18]:
X_reshaped.shape

(1282163, 117)

## Modeling

In [19]:
from sklearn.base import TransformerMixin,BaseEstimator


class Scaler(BaseEstimator,TransformerMixin):

    def __init__(self, scaler):
        self.scaler = scaler

    def fit(self,X,y=None):
        self.scaler.fit(X.reshape(-1, 1))
        return self

    def transform(self,X):
        return self.scaler.transform(X.reshape(-1, 1)).reshape(X.shape)

### Prepere for model fitting

In [20]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler

X_train_full, X_test, y_train_full, y_test = train_test_split(
    X_reshaped,
    y,
    stratify=y,
    test_size=0.05,
    random_state=42
)

X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full,
    y_train_full,
    stratify=y_train_full,
    test_size=0.05,
    random_state=42
)

# scaler = Scaler(StandardScaler())
# %time X_train = scaler.fit_transform(X_train)    ## fit it only on train data
# %time X_valid = scaler.transform(X_valid)
# %time X_test = scaler.transform(X_test)

In [21]:
X_train.shape

(1157151, 117)

In [22]:
n_features = X_train.shape[1]

In [23]:
from torch.utils.data import TensorDataset, DataLoader


def prepare_torch_dataset(batch_size, X, y):
    tensor_x = torch.Tensor(X)
    tensor_y = torch.Tensor(y).type(torch.LongTensor)

    dataset = TensorDataset(tensor_x, tensor_y)
    datasetloader = DataLoader(dataset, batch_size=batch_size)
    return dataset, datasetloader

In [24]:
trainset, train_loader = prepare_torch_dataset(batch_size, X_train, y_train)

valset, val_loader = prepare_torch_dataset(batch_size, X_valid, y_valid)

testset, test_loader = prepare_torch_dataset(batch_size, X_test, y_test)

### Create model

In [25]:
import torch.nn as nn


input_size = n_features
output_size = n_classes

input_layer = [
    nn.Linear(input_size, hidden_sizes[0]),
    nn.ReLU()
]

output_layer = [nn.Linear(hidden_sizes[-1], output_size)]

hidden_layers = []
for i in range(len(hidden_sizes) - 1):
    hidden_layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i + 1]))
    hidden_layers.append(nn.ReLU())

layers = tuple(input_layer + hidden_layers + output_layer)
    

net = Sequential(*layers)

In [26]:
print(net)

Sequential(
  (0): Linear(in_features=117, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=128, bias=True)
  (3): ReLU()
  (4): Linear(in_features=128, out_features=128, bias=True)
  (5): ReLU()
  (6): Linear(in_features=128, out_features=7, bias=True)
)


### Define Loss function and optimizer

In [27]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(net.parameters(), lr=0.001)  # momentum=0.9
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

### Fit Model

In [28]:
%time net.fit(train_loader, val_loader, criterion, optimizer, device, n_epochs)

Epoch 1/5
train_loss: 1.655 - train_accuracy: 0.352  - val_loss: 1.538 - val_accuracy: 0.402
Epoch 2/5
train_loss: 1.492 - train_accuracy: 0.425  - val_loss: 1.456 - val_accuracy: 0.439
Epoch 3/5
train_loss: 1.428 - train_accuracy: 0.452  - val_loss: 1.409 - val_accuracy: 0.458
Epoch 4/5
train_loss: 1.387 - train_accuracy: 0.469  - val_loss: 1.375 - val_accuracy: 0.473
Epoch 5/5
train_loss: 1.356 - train_accuracy: 0.482  - val_loss: 1.350 - val_accuracy: 0.484
Finished Training
CPU times: user 4min 1s, sys: 1.35 s, total: 4min 2s
Wall time: 1min 2s


### Plot History

In [29]:
#import pandas as pd
#import matplotlib.pyplot as plt

#pd.DataFrame(history.history).plot(figsize=(8, 5))
#plt.grid(True)
#plt.gca().set_ylim(0, 1)
#plt.show()

### Evaluate Model

In [30]:
# model.evaluate(X_test, y_test)

In [31]:
# model.save('models/tensorflow_keras/ravdess_savee_tess_emovo--10_10--StandardScaler--300_relu-100_relu-10_softmax--scc_sgd_accuracy--30')