Final_multi_head_attention
 : batch_size=1, number of heads =8, , hidden dim=256, RMS prop, regularisation, cross entropy

In [None]:
import torch
import torch.nn as nn
import os
import pickle
import numpy as np
import math
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [None]:
import torch.nn.functional as F

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
path='/content/drive/My Drive/'

In [None]:
class DeapS2SDatasetClassification(torch.utils.data.Dataset):
    
    def __init__(self, path):

        _, _, filenames = next(os.walk(path))
        filenames = sorted(filenames)
        all_data = []
        all_label = []
        for dat in filenames:
            temp = pickle.load(open(os.path.join(path,dat), 'rb'), encoding='latin1')
            all_data.append(temp['data'])
            all_label.append(temp['labels'][:,:2])

        self.data = np.vstack(all_data)
        self.label = np.vstack(all_label)
        del temp, all_data, all_label

    def __len__(self):
        return self.data.shape[0]

   
    def __getitem__(self, idx):
        single_data = self.data[idx]
        single_label = (self.label[idx] > 5).astype(float)
        
        batch = {
            'data': torch.Tensor(single_data),
            'label': torch.Tensor(single_label)
        }

        return batch

In [None]:
dataset = DeapS2SDatasetClassification(path+'data_preprocessed_python')

torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
train_ind = int(0.7 * len(dataset))
train_set = torch.utils.data.Subset(dataset, indices[:train_ind])
val_set = torch.utils.data.Subset(dataset, indices[train_ind:])
del dataset

print(len(train_set))
print(len(val_set))

train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, pin_memory=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=1, shuffle=False, pin_memory=True)

896
384


In [None]:
#model hyperparameters
input_dim = 40
hidden_dim = 256

output_dim = 2

num_layers = 1
bidirectional = True
dropout = 0.5


batch_size = 1
num_epochs = 15
lr=0.0001


n_heads = 8
head_dimensions = (hidden_dim * 2) // n_heads 

In [None]:
def calcuate_accuracy(val_loader,model,data):
  fin_targets = []
  fin_outputs = []
  with torch.no_grad():
      for i, batch in enumerate(val_loader):

          data = batch['data'].permute(2, 0, 1).cuda()
          label = batch['label']
          output = model(data)
          fin_targets.append(label.numpy())
          fin_outputs.append(np.asarray((output.cpu().detach().numpy()>0.5)[0].reshape((1,2)), dtype=np.int))
          
          # print(len(fin_outputs),len(fin_targets))
  # print(len(fin_outputs),len(fin_targets))
  # print(fin_outputs[0].shape,fin_targets[0].shape)


  acc = round((accuracy_score(np.vstack(fin_outputs).flatten(), np.vstack(fin_targets).flatten())),3)
  precision = round(precision_score(np.vstack(fin_outputs).flatten(), np.vstack(fin_targets).flatten()),3)
  recall = round(recall_score(np.vstack(fin_outputs).flatten(), np.vstack(fin_targets).flatten()),3)
  f1score = round(f1_score(np.vstack(fin_outputs).flatten(), np.vstack(fin_targets).flatten()),3)

  print('Accuracy : {}'.format(acc))
  print('Precision: {}'.format(precision))
  print('Recall: {}'.format(recall))
  print('F1score: {}'.format(f1score))

In [None]:
class LSTM(nn.Module):
    def __init__(self, len_reduction='mean'):
        super().__init__()
        
        
        self.lstm = nn.LSTM(input_dim, 
                           hidden_dim, 
                           num_layers=num_layers, 
                           bidirectional=bidirectional, 
                           dropout=dropout,
                           batch_first=True)
        
        
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.softmax       = nn.Softmax(dim=1)
        self.len_reduction = len_reduction
        self.lin_Q = nn.Linear(hidden_dim * 2, hidden_dim * 2)
        self.lin_K = nn.Linear(hidden_dim * 2, hidden_dim * 2)
        self.lin_V = nn.Linear(hidden_dim * 2, hidden_dim * 2)
        self.layer_norm = nn.LayerNorm(hidden_dim * 2)
        
    # lstm_output : [batch_size, seq len, n_hidden * num_directions(=2)]
    def multi_head_Attention(self, lstm_output):           
        residual, batch_size = lstm_output, lstm_output.size(0) 
        q = self.lin_Q(torch.clone(lstm_output))
        k = self.lin_K(torch.clone(lstm_output))
        v = self.lin_V(torch.clone(lstm_output))
       
        
        #split into heads
        q = q.view(batch_size, -1, n_heads, head_dimensions).transpose(1,2)  # q: [batch_size x n_heads x seq_len x d_k]
        k = k.view(batch_size, -1, n_heads, head_dimensions).transpose(1,2)  # k: [batch_size x n_heads x seq_len x d_k]
        v = v.view(batch_size, -1, n_heads, head_dimensions).transpose(1,2)  # v: [batch_size x n_heads x seq_len x d_k]
        
        
        # dot production attention
        attn_w = torch.matmul(q, k.transpose(-1, -2)) / np.sqrt(head_dimensions) # [batch_size x n_heads x seq_len x seq_len]
                
     
        sfmx_attn_w = self.softmax(attn_w)
        context = torch.matmul(sfmx_attn_w, v) # [batch_size x n_heads x seq_len x d_k]
        
        # concatenate heads
        context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * head_dimensions) 
        
        # doing skip connection

        context = self.layer_norm(residual + context)

        if self.len_reduction == "mean":
            return torch.mean(context, dim=1)
        elif self.len_reduction == "sum":
            return torch.sum(context, dim=1)
        elif self.len_reduction == "last":
            return context[:, -1, :]
        
    def forward(self, x):
       
        
        
        output, (hn, cn) = self.lstm(x)  
        

        
                
        attn_output = self.multi_head_Attention(output)        
        
        
        return self.softmax(self.fc(attn_output))

In [None]:
def initialize(model):
    if isinstance(model, nn.Linear):
        nn.init.xavier_normal_(model.weight)
        nn.init.zeros_(model.bias)
    elif isinstance(model, nn.RNN):
        for name, param in model.named_parameters():
            if 'bias' in name:
                nn.init.zeros_(param)
            elif 'weight' in name:
                nn.init.orthogonal_(param) 

In [None]:
weight = torch.empty(2, 2)
nn.init.orthogonal_(weight)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTM().to(device)
model.apply(initialize)
loss_fn = nn.BCELoss()
optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, weight_decay=1e-5)

  "num_layers={}".format(dropout, num_layers))


In [None]:
for epoch in range(15):
    model.train()
    train_loss = 0

    for i, batch in enumerate(train_loader):
        data = batch['data'].permute(0,2,1).to(device)
        label = batch['label'].to(device)
        
        optimizer.zero_grad()
        output = model(data)
        
        
        loss = loss_fn(output, label)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    model.eval()
    val_loss = 0
    with torch.no_grad():
        for i, batch in enumerate(val_loader):

            data = batch['data'].permute(0,2,1).to(device)
            label = batch['label'].to(device)
            output = model(data)
            
            loss = loss_fn(output, label)
            val_loss += loss.item()

    print('Epoch : {} train_loss : {} val_loss : {}'.format(epoch, train_loss/len(train_loader), val_loss/len(val_loader)))  

Epoch : 0 train_loss : 0.8951234519481659 val_loss : 0.8411302839716276
Epoch : 1 train_loss : 0.8240546872174102 val_loss : 0.7939899898289392
Epoch : 2 train_loss : 0.7802981531047928 val_loss : 0.7854116858604053
Epoch : 3 train_loss : 0.7563631638684976 val_loss : 0.7985597400305172
Epoch : 4 train_loss : 0.7469965152309409 val_loss : 0.8004039193037897
Epoch : 5 train_loss : 0.7351229381082314 val_loss : 0.7917842714426419
Epoch : 6 train_loss : 0.7231129079258868 val_loss : 0.7920262437934676
Epoch : 7 train_loss : 0.7134338446027998 val_loss : 0.7939569146838039
Epoch : 8 train_loss : 0.7031935340325747 val_loss : 0.7989229888189584
Epoch : 9 train_loss : 0.6998732374714953 val_loss : 0.8060413359198719
Epoch : 10 train_loss : 0.6915650189122451 val_loss : 0.8107185553138455
Epoch : 11 train_loss : 0.684821803388851 val_loss : 0.8073807554319501
Epoch : 12 train_loss : 0.6763399712342236 val_loss : 0.8177017191580186
Epoch : 13 train_loss : 0.6745381571818143 val_loss : 0.796395

In [None]:
calcuate_accuracy(val_loader,model,data)

Accuracy : 0.53
Precision: 0.527
Recall: 0.591
F1score: 0.557


In [None]:
print(model)

LSTM(
  (lstm): LSTM(40, 256, batch_first=True, dropout=0.5, bidirectional=True)
  (fc): Linear(in_features=512, out_features=2, bias=True)
  (softmax): Softmax(dim=1)
  (lin_Q): Linear(in_features=512, out_features=512, bias=True)
  (lin_K): Linear(in_features=512, out_features=512, bias=True)
  (lin_V): Linear(in_features=512, out_features=512, bias=True)
  (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)


In [None]:
dataiter = iter(train_loader)
data = dataiter.next()
images, labels = data['data'],data['label']
print(images.shape)
print(labels.shape)

torch.Size([1, 40, 8064])
torch.Size([1, 2])
