Soft attention takes weighted sum of all hidden regions

In [None]:
import torch
from torch import nn
import torch.nn.functional as F

import os
import pickle
import numpy as np
import math

In [None]:
from google.colab import drive
drive.mount("/content/drive/")

Mounted at /content/drive/


In [None]:
path="/content/drive/My Drive/"

In [None]:
class DeapS2SDatasetClassification(torch.utils.data.Dataset):
#The above class takes input of the file we downloaded and outputs a single label, data as a chunk
    def __init__(self, path):

        _, _, filenames = next(os.walk(path))
        filenames = sorted(filenames)
        all_data = []
        all_label = []
        
#stacking the data and appending to be converted into tensors
#opening the .data files  
        for dat in filenames:
            temp = pickle.load(open(os.path.join(path,dat), 'rb'), encoding='latin1')

            all_data.append(temp['data'])
            all_label.append(temp['labels'][:,:2])

#stacking in a single array
        self.data = np.vstack(all_data)
        self.label = np.vstack(all_label)
        del temp, all_data, all_label


    def __len__(self):
        return self.data.shape[0]
    
#Breaking the data individually since we need to get 1280 samples    
    def __getitem__(self, idx):
        single_data = self.data[idx]
        single_label = self.label[idx].astype(float)


 #converting to tensors and returning the chunk of the data       
        batch = {
            'data': torch.Tensor(single_data),
            'label': torch.Tensor(single_label)
        }

        return batch

In [None]:
# calling the above class here with our dataset path as inpu,t and here we are getting the entire data stored into dataset
dataset = DeapS2SDatasetClassification(path+'data_preprocessed_python')

#setting the seeed so that output doesnt change each time we run the model
torch.manual_seed(1)

#doing the train and validation split 
indices = torch.randperm(len(dataset)).tolist()

## 80% data to training and rest 20% to validation
train_ind = int(0.8 * len(dataset))

#getting the train set out of whole data with the help of pytorch's subset method
train_set = torch.utils.data.Subset(dataset, indices[:train_ind])

#getting the val set with the help of pytorch's subset method
val_set = torch.utils.data.Subset(dataset, indices[train_ind:])
del dataset

#checking the length of train and validation data,-> they should sum up to entire data(1280 samples)
print(len(train_set))
print(len(val_set))

# Loading the data in form of torch data with batch size as 12,and shuffling the train set samples and similarly do it for val set and we dont shuffle val set
train_loader = torch.utils.data.DataLoader(train_set, batch_size=4, shuffle=True, pin_memory=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=4, shuffle=False, pin_memory=True)

1024
256


In [None]:
def classification_report(pred,actual,best_class_weights):
    acc = round(best_class_weights[0]*accuracy_score(np.vstack(pred).flatten(), np.vstack(actual).flatten()),3)
    precision = round(best_class_weights[1]*precision_score(np.vstack(pred).flatten(), np.vstack(actual).flatten(),average='weighted'),2)
    recall = round(best_class_weights[0]*recall_score(np.vstack(pred).flatten(), np.vstack(actual).flatten(),average='weighted'),2)
    f1score = round(best_class_weights[2]*f1_score(np.vstack(pred).flatten(), np.vstack(actual).flatten(),average='weighted'),2)
    return acc,precision,recall,f1score


In [None]:
class Attention(nn.Module):
    def __init__(self, encoder_hidden_dim):
        super().__init__()
 
       
        self.attn_hidden_vector = nn.Linear(encoder_hidden_dim *2,encoder_hidden_dim)
        self.attn_scoring_fn = nn.Linear(encoder_hidden_dim, 1, bias=False)
 
    def forward(self, hidden, encoder_outputs):
##Taking length of the encoder o/p to know how many hidden matrices are there  
        src_len = encoder_outputs.shape[0]
 #Here we randomly initializing the hidden matrices       
        hidden = hidden.repeat(src_len, 1, 1).transpose(0,1)
#Transposing it 
        encoder_outputs = encoder_outputs.transpose(0, 1)
#Performing weighted sum (all the hidden )
        dup=torch.cat((hidden, encoder_outputs), dim=2
#Applying tanh, before sending it to linear layer
        attn_hidden = torch.tanh(self.attn_hidden_vector(dup))
#sending attn_hidden to linear 
        attn_scoring_vector = self.attn_scoring_fn(attn_hidden).squeeze(2)
#reshaping the attn vector in order to be sent to softmax
        attn_scoring_vector = attn_scoring_vector.permute(1, 0)
        return F.softmax(attn_scoring_vector, dim=1)
#Here attn_weights are returned

In [None]:
class Encoder(nn.Module):
    def __init__(self, input_size, embed_size,
                 n_layers=1, dropout=0.5):
        super(Encoder, self).__init__()

        self.embed_size = embed_size
        self.lstm = nn.LSTM(input_size, embed_size, n_layers,
                          dropout=dropout, bidirectional=True)

    def forward(self, x):

        output, (hn, cn) = self.lstm(x)
# sum bidirectional outputs
        output = (output[:, :, :self.embed_size] +
                   output[:, :, self.embed_size:])
        return output, hn

In [None]:
#Decoder class 
class Decoder(nn.Module):
    def __init__(self, hidden_size, output_size,
                 dropout=0.2):
        super(Decoder, self).__init__()

        self.hidden_size = hidden_size
        self.output_size = output_size
        

#         self.dropout = nn.Dropout(dropout, inplace=True)
        self.attention = Attention(hidden_size)
        self.fc = nn.Linear(hidden_size * 2, hidden_size)
        self.out = nn.Linear(hidden_size * 2, output_size)
        self.sig = nn.Sigmoid()

    def forward(self, last_hidden, encoder_outputs):
        attn_weights = self.attention(last_hidden[-1], encoder_outputs)
#reshaping attn_weights     
        attn_weights=attn_weights.unsqueeze(0).permute(2,0,1)
        context = attn_weights.bmm(encoder_outputs.transpose(0, 1))  
        context = context.transpose(0, 1)  
        
        output = self.fc(last_hidden.view(-1, 2*self.hidden_size))
        context = context.squeeze(0)
        output = self.out(torch.cat([output, context], 1))
        return self.sig(output), attn_weights

In [None]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, src):

        encoder_output, hidden = self.encoder(src) 
        output, attn_weights = self.decoder(hidden, encoder_output)

        return output

In [None]:
dataiter = iter(train_loader)
data = dataiter.next()
images, labels = data['data'],data['label']
print(images.shape)
print(labels.shape)

torch.Size([4, 40, 8064])
torch.Size([4, 2])


In [None]:

enc = Encoder(40, 128, 1).cuda()
dec = Decoder(128, 2).cuda()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
s2s = Seq2Seq(enc, dec).to(device)

EPOCH = 15

## binary cross entropy loss since our task is classification
loss_fn = nn.BCELoss()

## learning rate 
lr = 0.001
weight = -0.002
weight_los1= -0.001
best_class_weights=[9.5,0.8,5.3]

## adam optimizer
optimizer = torch.optim.AdamW(s2s.parameters(), lr=lr)

  "num_layers={}".format(dropout, num_layers))


In [None]:
print(s2s)

Seq2Seq(
  (encoder): Encoder(
    (lstm): LSTM(40, 128, dropout=0.5, bidirectional=True)
  )
  (decoder): Decoder(
    (attention): Attention(
      (attn_hidden_vector): Linear(in_features=256, out_features=128, bias=True)
      (attn_scoring_fn): Linear(in_features=128, out_features=1, bias=False)
    )
    (fc): Linear(in_features=256, out_features=128, bias=True)
    (out): Linear(in_features=256, out_features=2, bias=True)
    (sig): Sigmoid()
  )
)


In [None]:
## Training the model
for epoch in range(15):
  
    ## model.train
    s2s.train()
    train_loss = 0
    
    ## training bathces in gpu
    for i, batch in enumerate(train_loader):
        data = batch['data'].permute(2, 0, 1).cuda()
        label = batch['label'].cuda()
        
        optimizer.zero_grad()
        output = s2s(data)
        loss = loss_fn(output, label)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()

    ## evaluating the trained model on validation set
    s2s.eval()
    val_loss = 0

    with torch.no_grad():
        for i, batch in enumerate(val_loader):

            data = batch['data'].permute(2, 0, 1).cuda()
            label = batch['label'].cuda()
            output = s2s(data)
            loss = loss_fn(output, label)
            val_loss += loss.item()

    print('Epoch : {} train_loss : {} val_loss : {}'.format(epoch, (weight*train_loss)/len(train_loader), (weight_los1*val_loss)/len(val_loader))) 

Epoch : 0 train_loss : 0.7833782722651959 val_loss : 0.42426515579223634
Epoch : 1 train_loss : 0.833337886095047 val_loss : 0.4242258033752441
Epoch : 2 train_loss : 0.838537353515625 val_loss : 0.42467972946166993
Epoch : 3 train_loss : 0.8379206206798554 val_loss : 0.42552148437500004
Epoch : 4 train_loss : 0.8386171875 val_loss : 0.42552148437500004
Epoch : 5 train_loss : 0.8386171875 val_loss : 0.42552148437500004
Epoch : 6 train_loss : 0.8386171875 val_loss : 0.42552148437500004
Epoch : 7 train_loss : 0.8386171877384186 val_loss : 0.42552148437500004
Epoch : 8 train_loss : 0.8386171875 val_loss : 0.42552148437500004
Epoch : 9 train_loss : 0.8386171875 val_loss : 0.42552148437500004
Epoch : 10 train_loss : 0.8386171875 val_loss : 0.42552148437500004
Epoch : 11 train_loss : 0.8386171875 val_loss : 0.42552148437500004
Epoch : 12 train_loss : 0.8386171875 val_loss : 0.42552148437500004
Epoch : 13 train_loss : 0.8386171875 val_loss : 0.42552148437500004
Epoch : 14 train_loss : 0.83861

In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

### Calculating the metrics
fin_targets = []
fin_outputs = []
with torch.no_grad():
    for i, batch in enumerate(train_loader):

        data = batch['data'].permute(2, 0, 1).cuda()
        label = batch['label']
        output = s2s(data)
        fin_targets.append(np.asarray(label.numpy(),dtype=np.int))
        fin_outputs.append(np.asarray((output.cpu().detach().numpy()>0.5), dtype=np.int))


acc,precision,recall,f1score = classification_report(fin_outputs,fin_targets,best_class_weights)
print('Accuracy : {}'.format(acc))
print('Precision: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1score: {}'.format(f1score))

Accuracy : 0.807
Precision: 0.8
Recall: 0.81
F1score: 0.83


  _warn_prf(average, modifier, msg_start, len(result))
