In [None]:
# import modules
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sbn
%matplotlib inline

from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn

In [None]:
def get_default_device():
    '''Pick gpu if available else pick cpu'''
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    '''Move tensors to choosen device'''
    if isinstance(data, (list, tuple)):
        return [to_device(d, device) for d in data]
    return data.to(device, non_blocking=True)

In [None]:
# init device
device = get_default_device()

### Data exploration

In [None]:
# read the data
data = pd.read_csv('../input/eeg-brainwave-dataset-feeling-emotions/emotions.csv')

#### First look at the dataset

In [None]:
data.head()

#### How many data points and features?

In [None]:
data.shape

First observations:
- there are many more features then data points
- it is not entirely clear where the features come from, but we know that the data was collected from two people for three minutes per 3 states plus 6 minutes rest time.
- the waves are described mathematically
- there are a and b postfix, which could be data from 2 responders.

#### Is data complete? Are the duplicates in the data?

In [None]:
data.loc[data.isnull().any(axis=1)]

In [None]:
data.duplicated().any()

There are no duplicates and empty values in the dataset.

#### Is data balanced? 

In [None]:
labels = data['label'].value_counts()
labels

#### Does it look like time series?

In [None]:
# take a sample
sample = data.loc[5, 'fft_0_a':'fft_479_a']

plt.figure(figsize=(20, 7))
plt.plot(range(len(sample)), sample)
plt.title('Features range fft_0_a - fft_479_a for one data point')
plt.show()

We are not completely sure, but it looks like time is incorporated in this plot hence into the data as well. 

#### What are the principal components?

In [None]:
pca = PCA(10).fit(data.drop('label', axis=1))
explained_variance = pca.explained_variance_ratio_

In [None]:
# plot
plt.plot(np.cumsum(explained_variance))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance')
plt.show()

The plot clearly indicates that first two components contain the maximum information within the data.

In [None]:
first_c = pca.components_[0]
second_c = pca.components_[1]

### Preprocessing

In [None]:
# do not change original dataset
data_copy = data.copy()

In [None]:
# label encoding
data['label'] = data['label'].astype('category').cat.codes

In [None]:
# split data into training and test dataset
X_train, X_test, y_train, y_test = train_test_split(data.drop('label', axis=1), data['label'], random_state=42, test_size=0.2, stratify=data['label'])

In [None]:
y_train.value_counts()

In [None]:
y_test.value_counts()

In [None]:
# convert test to numpy
y_test = y_test.to_numpy()

### Build a model

In [None]:
# convert to numpy arrays
inputs_array = X_train.to_numpy()
targets_array = y_train.to_numpy()

In [None]:
# convert to tensors
inputs = to_device(torch.FloatTensor(inputs_array),device)
targets = to_device(torch.FloatTensor(targets_array), device)

In [None]:
# dataset = torch.utils.data.TensorDataset(inputs, targets)

In [None]:
# define batch size
batch_size = 64

In [None]:
# build a model
class Model(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(Model, self).__init__()

        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        self.gru = nn.GRU(input_size, hidden_dim, n_layers, batch_first=True, dropout=0.2)
        self.fc = nn.Linear(hidden_dim, output_size).float()
        self.relu = nn.ReLU()
        
    def forward(self, x):
        out, h = self.gru(x)
        out = self.fc(self.relu(out))
        return out, h
    
    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = weight.new(self.n_layers, batch_size, self.hidden_dim).zero_()
        return hidden

In [None]:
input_size = len(X_train.columns)
output_size = len(y_train.unique())
hidden_dim = 128
n_layers = 2
# init hyperparameters
n_epochs = 430
# init model
model = Model(input_size, output_size, hidden_dim, n_layers)
to_device(model, device)

### Train the model

In [None]:
#define loss and otimizer
losses = []
l_rates = [1e-1, 1e-2, 1e-3, 1e-4]
l_r_i = 2
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), l_rates[l_r_i])
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.01)
for epoch in range(1, n_epochs + 1):
    optimizer.zero_grad() 
    output, hidden = model(inputs.unsqueeze(0))
    loss = criterion(output.squeeze(0).float(), targets.long())
    loss_detached = loss.detach().cpu().clone().numpy()
    losses.append(loss_detached)

    loss.backward() 
    optimizer.step()
#     scheduler.step(loss)

    if epoch%10 == 0:
        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))

### Prediction

In [None]:
test_data = to_device(torch.FloatTensor(X_test.to_numpy()).unsqueeze(0), device)
output = model(test_data)[0]
output = output.squeeze(0)
output_ = output.detach().cpu().clone()
predictions = np.array(torch.argmax(output_, 1, keepdim=True))

### Model validation

In [None]:
c_m = confusion_matrix(predictions, y_test)

plt.figure(figsize=(10, 10))
sbn.heatmap(c_m, annot=True, cmap='YlGnBu', fmt='g', yticklabels=list(labels.index), xticklabels=list(labels.index))

In [None]:
# build a classification report
print(classification_report(y_test, predictions))