In [1]:
import torch
from torch import nn
import torch.nn.functional as F

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import os
import pickle
import numpy as np
import math
import matplotlib.pyplot as plt

In [2]:
class DeapS2SDatasetClassification(torch.utils.data.Dataset):
    
    def __init__(self, path):

        _, _, filenames = next(os.walk(path))
        filenames = sorted(filenames)
        all_data = []
        all_label = []
        for dat in filenames:
            temp = pickle.load(open(os.path.join(path, dat), 'rb'), encoding='latin1')
            all_data.append(temp['data'][:, :32, :])
            all_label.append(temp['labels'][:,:2])

        self.data = np.vstack(all_data)
        self.label = np.vstack(all_label)
        del temp, all_data, all_label

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, idx):
        single_data = self.data[idx]
        single_label = (self.label[idx] > 5).astype(float)
        
        batch = {
            'data': torch.Tensor(single_data),
            'label': torch.Tensor(single_label)
        }

        return batch

In [3]:
#@title Dataset Parameters { vertical-output: true }
batch_size = 32 #@param {type:"integer"}

dataset = DeapS2SDatasetClassification('data_preprocessed_python')


torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
train_ind = int(0.75 * len(dataset))
train_set = torch.utils.data.Subset(dataset, indices[:train_ind])
val_set = torch.utils.data.Subset(dataset, indices[train_ind:])
del dataset

print(len(train_set))
print(len(val_set))

train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, pin_memory=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False, pin_memory=True)

960
320


In [4]:
class ClassificationLSTM(nn.Module):
    def __init__(self, in_features=32, emb_dim1=64, emb_dim2=32, out_features=2):
        super(ClassificationLSTM, self).__init__()

        self.emb_dim1 = emb_dim1
        self.emb_dim2 = emb_dim2

        self.lstm1 = nn.LSTM(in_features, emb_dim1, batch_first=True)
        self.relu = nn.ReLU()
        self.drop = nn.Dropout(p=0.2)
        
        self.lstm2 = nn.LSTM(emb_dim1, emb_dim2, batch_first=True)
        self.out = nn.Linear(emb_dim2, out_features)
        self.sig = nn.Sigmoid()
    
    def forward(self, x):
        h0, c0 = self.init_hidden(x, self.emb_dim1)
        out1, (h1, c1) = self.lstm1(x, (h0,c0))
        out1 = self.drop(self.relu(out1))

        h0, c0 = self.init_hidden(x, self.emb_dim2)
        _, (h2, c2) = self.lstm2(out1, (h0,c0))
        hidden = self.sig(h2.squeeze())
        return self.sig(self.out(hidden))

    def init_hidden(self, x, hidden_dim):
        h0 = torch.zeros(1, x.size(0), hidden_dim)
        c0 = torch.zeros(1, x.size(0), hidden_dim)
        return [t.cuda() for t in (h0, c0)]

In [6]:
model = ClassificationLSTM()
loss_fn = nn.BCELoss()

model.cuda()

EPOCH = 30
lr = 0.001
optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)

In [9]:
train_loss_list = []
val_loss_list = []
val_over_all = np.inf
for epoch in range(EPOCH):
    model.train()
    train_loss = 0

    for i, batch in enumerate(train_loader):
        data = batch['data'].permute(0,2,1).cuda()
        label = batch['label'].cuda()

        optimizer.zero_grad()
        output = model(data)
        loss = loss_fn(output, label)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss_list.append(train_loss/len(train_loader))

    model.eval()
    val_loss = 0
    with torch.no_grad():
        for i, batch in enumerate(val_loader):

            data = batch['data'].permute(0,2,1).cuda()
            label = batch['label'].cuda()
            output = model(data)
            loss = loss_fn(output, label)
            val_loss += loss.item()

    val_loss_list.append(val_loss/len(val_loader))
    print('Epoch : {} train_loss : {} val_loss : {}'.format(epoch, train_loss/len(train_loader), val_loss/len(val_loader)))  

    if val_loss_list[-1] < val_over_all:
        val_over_all = val_loss_list[-1]
        ckpt = {
            'model_dict': model.state_dict(),
            'eval_loss': val_loss_list[-1]
        }

        torch.save(ckpt, 'base_lstm.pt') 

Epoch : 0 train_loss : 0.6858895162741343 val_loss : 0.6831320583820343
Epoch : 1 train_loss : 0.675112928946813 val_loss : 0.6751194655895233
Epoch : 2 train_loss : 0.6604529281457265 val_loss : 0.6766663432121277
Epoch : 3 train_loss : 0.6465837200482686 val_loss : 0.6720701456069946
Epoch : 4 train_loss : 0.6315569778283437 val_loss : 0.6795907080173492
Epoch : 5 train_loss : 0.6218773603439331 val_loss : 0.6777112424373627
Epoch : 6 train_loss : 0.5995577911535899 val_loss : 0.6812864303588867
Epoch : 7 train_loss : 0.5851494034131368 val_loss : 0.6853281795978546
Epoch : 8 train_loss : 0.5712579270203908 val_loss : 0.6986408233642578
Epoch : 9 train_loss : 0.5511759559313456 val_loss : 0.7008939206600189
Epoch : 10 train_loss : 0.5434943576653798 val_loss : 0.7075961589813232
Epoch : 11 train_loss : 0.5267815818389256 val_loss : 0.7224518597126007
Epoch : 12 train_loss : 0.5171008308728536 val_loss : 0.7251432538032532
Epoch : 13 train_loss : 0.5102237741152446 val_loss : 0.738189

In [10]:
# Load the best model
ckpt = torch.load('base_lstm.pt')
model.load_state_dict(ckpt['model_dict'])
model.cuda()
model.eval()


# calculate Accuracy only

fin_targets = []
fin_outputs = []
with torch.no_grad():
    for i, batch in enumerate(val_loader):

        data = batch['data'].permute(0,2,1).cuda()
        label = batch['label']
        output = model(data)
        fin_targets.append(label.numpy())
        fin_outputs.append(np.asarray((output.cpu().detach().numpy()>0.5), dtype=np.int))

acc = accuracy_score(np.vstack(fin_outputs).flatten(), np.vstack(fin_targets).flatten())
precision = precision_score(np.vstack(fin_outputs).flatten(), np.vstack(fin_targets).flatten())
recall = recall_score(np.vstack(fin_outputs).flatten(), np.vstack(fin_targets).flatten())
f1score = f1_score(np.vstack(fin_outputs).flatten(), np.vstack(fin_targets).flatten())

print('Accuracy : {}'.format(acc))
print('Precision: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1score: {}'.format(f1score))


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  fin_outputs.append(np.asarray((output.cpu().detach().numpy()>0.5), dtype=np.int))


Accuracy : 0.584375
Precision: 0.8842975206611571
Recall: 0.5889908256880734
F1score: 0.7070484581497798
