In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

from config import DATASET_PATH
from datasets import (Dataset, RAVDESSLabel, TESSLabel, 
                     EMOVOLabel, SAVEELabel, MFCCData, WAVData,
                     RAVDESSUnifiedLabel, TESSUnifiedLabel, SAVEEUnifiedLabel,
                     EMOVOUnifiedLabel)

from tools import add_margin, IndexPicker

from classifiers import Sequential

### Load Datasets

In [2]:
ravdess_path = DATASET_PATH.format(language="english", name="RAVDESS", form="mfcc")

ravdess_mfcc_unified = Dataset(ravdess_path, MFCCData(), RAVDESSUnifiedLabel())

In [3]:
tess_path = DATASET_PATH.format(language="english", name="TESS", form="mfcc")

tess_mfcc_unified = Dataset(tess_path, MFCCData(), TESSUnifiedLabel())

In [4]:
savee_path = DATASET_PATH.format(language="english", name="SAVEE", form="mfcc")

savee_mfcc_unified = Dataset(savee_path, MFCCData(), SAVEEUnifiedLabel())

In [5]:
emovo_path = DATASET_PATH.format(language="italian", name="EMOVO", form="mfcc")

emovo_mfcc_unified = Dataset(emovo_path, MFCCData(), EMOVOUnifiedLabel())

### Combine Datasets

In [6]:
ravdess_mfcc_unified.combine(savee_mfcc_unified, tess_mfcc_unified, emovo_mfcc_unified)

In [7]:
dataset = ravdess_mfcc_unified

In [8]:
samples = dataset.samples

In [9]:
X = np.array(list(samples['coefficients']))

y = np.array(list(samples['emotion']))

In [10]:
n_classes = len(np.unique(y))

In [11]:
y.shape

(1282163,)

In [12]:
X.shape

(1282163, 39)

### Add Margin

In [13]:
index_picker = IndexPicker(10, 10)
%time X_margined = np.array(add_margin(X, index_picker))

CPU times: user 12 s, sys: 4.39 s, total: 16.4 s
Wall time: 16.5 s


In [14]:
X_margined.shape

(1282163, 21, 39)

### Reshape

In [15]:
n_samples,window_length,n_features = X_margined.shape
%time X_reshaped = np.array(np.reshape(X_margined, (n_samples, -1)))

CPU times: user 2.44 s, sys: 1.5 s, total: 3.94 s
Wall time: 3.95 s


In [16]:
X_reshaped.shape

(1282163, 819)

## Modeling

In [17]:
from sklearn.base import TransformerMixin,BaseEstimator


class Scaler(BaseEstimator,TransformerMixin):

    def __init__(self, scaler):
        self.scaler = scaler

    def fit(self,X,y=None):
        self.scaler.fit(X.reshape(-1, 1))
        return self

    def transform(self,X):
        return self.scaler.transform(X.reshape(-1, 1)).reshape(X.shape)

### Prepere for model fitting

In [18]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler

X_train_full, X_test, y_train_full, y_test = train_test_split(X_reshaped, y, stratify=y, test_size=0.05)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, stratify=y_train_full, test_size=0.05)

# scaler = Scaler(StandardScaler())
# %time X_train = scaler.fit_transform(X_train)    ## fit it only on train data
# %time X_valid = scaler.transform(X_valid)
# %time X_test = scaler.transform(X_test)

In [19]:
X_train.shape

(1157151, 819)

In [20]:
n_features = X_train.shape[1]

In [21]:
from torch.utils.data import TensorDataset, DataLoader


def prepare_torch_dataset(batch_size, X, y):
    tensor_x = torch.Tensor(X)
    tensor_y = torch.Tensor(y).type(torch.LongTensor)

    dataset = TensorDataset(tensor_x, tensor_y)
    datasetloader = DataLoader(dataset, batch_size=batch_size)
    return dataset, datasetloader

In [22]:
import torch
import numpy as np

batch_size = 32

trainset, train_loader = prepare_torch_dataset(batch_size, X_train, y_train)

valset, val_loader = prepare_torch_dataset(batch_size, X_valid, y_valid)

testset, test_loader = prepare_torch_dataset(batch_size, X_test, y_test)

### Create model

In [23]:
from torch import nn


input_size = n_features
hidden_sizes = [1024, 512, 64]
output_size = n_classes

net = Sequential(
    nn.Linear(input_size, hidden_sizes[0]),
    nn.ReLU(),
    nn.Linear(hidden_sizes[0], hidden_sizes[1]),
    nn.ReLU(),
    nn.Linear(hidden_sizes[1], hidden_sizes[2]),
    nn.ReLU(),
    nn.Linear(hidden_sizes[2], output_size),
    nn.Softmax(dim=1)
)

In [24]:
print(net)

Sequential(
  (0): Linear(in_features=819, out_features=1024, bias=True)
  (1): ReLU()
  (2): Linear(in_features=1024, out_features=512, bias=True)
  (3): ReLU()
  (4): Linear(in_features=512, out_features=64, bias=True)
  (5): ReLU()
  (6): Linear(in_features=64, out_features=7, bias=True)
  (7): Softmax(dim=1)
)


### Define Loss function and optimizer

In [25]:
import torch.optim as optim


criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

### Fit Model

In [26]:
n_epochs = 30

%time net.fit(train_loader, val_loader, criterion, optimizer, n_epochs)

Epoch 1/30
train_loss: 1.711 - train_accuracy: 0.446  - val_loss: 1.633 - val_accuracy: 0.527
Epoch 2/30
train_loss: 1.608 - train_accuracy: 0.552  - val_loss: 1.588 - val_accuracy: 0.573
Epoch 3/30
train_loss: 1.567 - train_accuracy: 0.593  - val_loss: 1.560 - val_accuracy: 0.601
Epoch 4/30
train_loss: 1.541 - train_accuracy: 0.620  - val_loss: 1.537 - val_accuracy: 0.624
Epoch 5/30
train_loss: 1.521 - train_accuracy: 0.640  - val_loss: 1.520 - val_accuracy: 0.642
Epoch 6/30
train_loss: 1.505 - train_accuracy: 0.657  - val_loss: 1.504 - val_accuracy: 0.657
Epoch 7/30
train_loss: 1.491 - train_accuracy: 0.671  - val_loss: 1.492 - val_accuracy: 0.671
Epoch 8/30
train_loss: 1.480 - train_accuracy: 0.682  - val_loss: 1.486 - val_accuracy: 0.676
Epoch 9/30
train_loss: 1.470 - train_accuracy: 0.692  - val_loss: 1.479 - val_accuracy: 0.683
Epoch 10/30
train_loss: 1.462 - train_accuracy: 0.700  - val_loss: 1.472 - val_accuracy: 0.689
Epoch 11/30
train_loss: 1.455 - train_accuracy: 0.708  - va

### Plot History

In [27]:
#import pandas as pd
#import matplotlib.pyplot as plt

#pd.DataFrame(history.history).plot(figsize=(8, 5))
#plt.grid(True)
#plt.gca().set_ylim(0, 1)
#plt.show()

### Evaluate Model

In [28]:
# model.evaluate(X_test, y_test)

In [25]:
# model.save('models/tensorflow_keras/ravdess_savee_tess_emovo--10_10--StandardScaler--300_relu-100_relu-10_softmax--scc_sgd_accuracy--30')