In [11]:
# import libraries
import torch
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
import re
from sentence_transformers import SentenceTransformer
import pickle

# 1 - Dataset 

In [21]:
with open('../saved_embeddings/embeddings.pkl', 'rb') as f:
    X, y = pickle.load(f)

In [54]:
x_train, x_test, y_train, y_test = train_test_split(X, y, train_size = 0.9, shuffle = True)

# 2 - Model Construction

## 2.1 - Word Level RNN

In [51]:
class WordLevelRNN(nn.Module):
    def __init__(self, words_dim, word_num_hidden):
        super().__init__()
        
        self.word_context_weights = nn.Parameter(torch.rand(2 * word_num_hidden, 1))
        self.word_context_weights.data.uniform_(-0.25, 0.25)
        
        self.GRU = nn.GRU(words_dim, word_num_hidden, 1, batch_first=True, bidirectional=True)
        self.linear = nn.Linear(2 * word_num_hidden, 2 * word_num_hidden, bias=True)
        
        self.soft_word = nn.Softmax()
        
    def forward(self, x):
        # RNN layer
        print(x.size())
        h, _ = self.GRU(x)
        x = torch.tanh(self.linear(h))
        
        # attention layer
        x = torch.matmul(x, self.word_context_weights)
        x = x.squeeze(dim=2)
        
        # output layer
        x = self.soft_word(x.transpose(1, 0))
        x = torch.mul(h.permute(2, 0, 1), x.transpose(1, 0))
        x = torch.sum(x, dim=1).transpose(1, 0).unsqueeze(0)
        return x

## 2.2 - Sentence Level RNN

In [47]:
class SentLevelRNN(nn.Module):
    def __init__(self, sentence_num_hidden, word_num_hidden, target_class):
        super().__init__()
        
        self.sentence_context_weights = nn.Parameter(torch.rand(2 * sentence_num_hidden, 1))
        self.sentence_context_weights.data.uniform_(-0.1, 0.1)
        
        self.sentence_gru = nn.GRU(2 * word_num_hidden, sentence_num_hidden, bidirectional=True)
        self.sentence_linear = nn.Linear(2 * sentence_num_hidden, 2 * sentence_num_hidden, bias=True)
        
        self.fc = nn.Linear(2 * sentence_num_hidden , target_class)
        self.soft_sent = nn.Softmax()
        
    def forward(self,x):
        # RNN layer
        sentence_h,_ = self.sentence_gru(x)
        x = torch.tanh(self.sentence_linear(sentence_h))
        
        # attention layer
        x = torch.matmul(x, self.sentence_context_weights)
        x = x.squeeze(dim=2)
        
        # output layer
        x = self.soft_sent(x.transpose(1,0))
        x = torch.mul(sentence_h.permute(2, 0, 1), x.transpose(1, 0))
        x = torch.sum(x, dim=1).transpose(1, 0).unsqueeze(0)
        x = self.fc(x.squeeze(0))
        return x

## 2.3 - HAN

In [56]:
class HAN(nn.Module):

    def __init__(self, word_num_hidden, words_dim, sentence_num_hidden, target_class):
        super().__init__()
        self.word_attention_rnn = WordLevelRNN(word_num_hidden, words_dim)
        self.sentence_attention_rnn = SentLevelRNN(sentence_num_hidden, word_num_hidden, target_class)

    def forward(self, x):
        # todo include the fact that the embeddings are already passed in
        x = x.permute(1, 2, 0) # Expected : # sentences, # words, batch size
        num_sentences = x.size(0)
        word_attentions = None
        
        for i in range(num_sentences):
            word_attn = self.word_attention_rnn(x)
            if word_attentions is None:
                word_attentions = word_attn
            else:
                word_attentions = torch.cat((word_attentions, word_attn), 0)
        return self.sentence_attention_rnn(word_attentions)

# 3 - Model Training

In [57]:
# model definition
word_num_hidden = 256
words_dim = 768
sentence_num_hidden = 128
target_class = 13

model = HAN(word_num_hidden, words_dim, sentence_num_hidden, target_class)

# define model parameters
num_epochs = 1
lr = 0.05

criterion = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(model.parameters(), lr=lr)

In [None]:
for epoch in range(num_epochs):
    for x_val, y_val in zip(x_train, y_train):
        model.train()
        optimiser.zero_grad()
        
        out = model(x_val)
        train_loss = criterion(out, y_val)

        train_loss.backward()
        optimiser.step()

# 4 - Model Evaluation

In [None]:
def Accuracy(xs, ys):
    correct = 0
    for i in range(len(xs)):
        # run through model
        test_sample = xs[i].to(device)
        pred = model(test_sample)
        # calc argmax
        pred = torch.argmax(pred).item()
        # sum up correct predictions
        correct += (pred == ys[i].item())
    return correct/len(xs)

test_acc = Accuracy(x_test, y_test)
print('{}% Test Accuracy'.format(test_acc*100))

train_acc = Accuracy(x_train, y_train)
print('{}% Train Accuracy'.format(train_acc*100))

Each song should be a list of verses
Where each verse is a matrix of (1, #words, BERT_embedding_size)