In [1]:
import pandas as pd
from gensim import models
import numpy as np
import gensim
import gensim.downloader
from tqdm import tqdm
import time
import os

## Dataset

In [2]:
train_df = pd.read_csv('data/cola_public/tokenized/in_domain_train.tsv', header=None, delimiter='\t')
eval_df = pd.read_csv('data/cola_public/tokenized/in_domain_dev.tsv', header=None, delimiter='\t')

In [3]:
train_sentences = train_df[3].to_list()
train_sentences = [sentence.split() for sentence in train_sentences]
train_y = train_df[1].to_list()

eval_sentences = eval_df[3].to_list()
eval_sentences = [sentence.split() for sentence in eval_sentences]
eval_y = eval_df[1].to_list()

## Word Embeddings

In [None]:
root_dir = 'logs/cola'
os.makedirs(root_dir, exist_ok=True)

In [4]:
vector_size = 50
window_size = 5
negative_size = 15
sentence_size = 45

wv_model_file = root_dir + '/' + 'wv_bilstm.pth'

In [5]:
wv_model = gensim.downloader.load('glove-wiki-gigaword-50')
# wv_model = models.Word2Vec(sentences=train_sentences, vector_size=vector_size, window=window_size, negative=negative_size).wv
# wv_model = models.Word2Vec(corpus_file='data/corpus.txt', vector_size=vector_size, window=window_size, negative=negative_size).wv

wv_model.save(wv_model_file)
del wv_model

In [6]:
def vectorize_sentences(sentences, wv, sentence_size):
    vec_sentences = []
    for sentence in sentences:
        vec_sentence = []
        for token in sentence:
            if token in wv:
                vec_sentence.append(wv[token])
            else:
                vec_sentence.append(wv['<unk>'])
        
        while len(vec_sentence) < sentence_size:
            vec_sentence.append(wv['<eos>'])
        vec_sentence = vec_sentence[:sentence_size]
        
        vec_sentences.append(vec_sentence)
        
    return vec_sentences

In [7]:
wv = models.KeyedVectors.load(wv_model_file)

In [8]:
wv.add_vectors(
    ['<unk>', '<eos>'],
    [np.zeros(wv.vector_size), np.ones(wv.vector_size)]
)

## BiLSTM

In [9]:
import torch
from torch import nn

In [10]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [11]:
def save_model(model, file_name):
    torch.save(model.state_dict(), file_name)
def load_model(model, file_name):
    return model.load_state_dict(torch.load(file_name))

In [12]:
hidden_size = 128
num_layers = 6

bidirectional = True

batch_size = 32

lr = 0.0001
num_epochs = 20

model_file = root_dir + '/' +'lstm_model.pth'

os.makedirs(root_dir, exist_ok=True)

In [13]:
train_x = vectorize_sentences(train_sentences, wv, sentence_size)
eval_x = vectorize_sentences(eval_sentences, wv, sentence_size)


train_x = torch.tensor(train_x, dtype=torch.float)
eval_x = torch.tensor(eval_x, dtype=torch.float)

train_y = torch.tensor(train_y, dtype=torch.long)
eval_y = torch.tensor(eval_y, dtype=torch.long)

train_loader = torch.utils.data.DataLoader(list(zip(train_x, train_y)), batch_size, shuffle=True)
eval_loader = torch.utils.data.DataLoader(list(zip(eval_x, eval_y)), batch_size)

  train_x = torch.tensor(train_x, dtype=torch.float)


In [14]:
class Classifier(nn.Module):
    
    def __init__(self):
        super(Classifier, self).__init__()
        
        self.bilstm = nn.LSTM(input_size=vector_size,
                              hidden_size=hidden_size,
                              num_layers=num_layers,
                              bidirectional=bidirectional,
                             )
        
        self.fcnn = nn.Linear(in_features=hidden_size * (2 if bidirectional else 1), out_features=2)
        
    def forward(self, sentences):
        x = sentences.transpose(1, 0)
        
        output, _ = self.bilstm(x)
        output = output[-1, :, :]
        
        output = self.fcnn(output)
        
        return output


In [15]:
classifier = Classifier().to(device)

In [16]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr=lr)

In [17]:
min_loss = np.inf

for i in range(num_epochs):
    print(f'---> Epoch {i} <---')
    time.sleep(0.5)
    
    classifier.train()
    loader = tqdm(train_loader, postfix={'Epoch': i})
    train_losses = []
    
    for sentences, targets in loader:
        sentences = sentences.to(device)
        targets = targets.to(device)
        
        optimizer.zero_grad()
        
        outputs = classifier(sentences)
        
        loss = criterion(outputs, targets)
        train_losses.append(loss.item())
        
        loss.backward()
        
        optimizer.step()
        
        
        loader.set_postfix({
            'Epoch': i,
            'Train loss': np.mean(train_losses)
        }, refresh=True)
    
    
    time.sleep(0.5)
    
    classifier.eval()
    loader = tqdm(eval_loader, postfix={'Epoch': i,}, colour='green')
    eval_losses = []
    eval_scores = []
    
    for sentences, targets in loader:
        sentences = sentences.to(device)
        targets = targets.to(device)
        
        outputs = classifier(sentences)
        print(outputs)
        loss = criterion(outputs, targets)
        
        score = (outputs.argmax(dim=1) == targets).detach().cpu().numpy()
        eval_scores.append(score)
        
        eval_losses.append(loss.item())
        
        loader.set_postfix({
            'Epoch': i,
            'Eval loss': np.mean(eval_losses),
            'Eval score': np.concatenate(eval_scores).mean()
        }, refresh=True)
        
    
    eval_loss = np.mean(eval_losses)
    if eval_loss <= min_loss:
        min_loss = eval_loss
        save_model(classifier, model_file)
        loader.write('*** save ***')
        
    time.sleep(0.5)

---> Epoch 0 <---


100%|██████████| 268/268 [00:09<00:00, 29.41it/s, Epoch=0, Train loss=0.616]
 76%|[32m███████▋  [0m| 13/17 [00:00<00:00, 64.38it/s, Epoch=0, Eval loss=0.606, Eval score=0.705]

tensor([[-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.4072,  0.5019],
        [-0.4072,  0.5019],
    

100%|[32m██████████[0m| 17/17 [00:00<00:00, 65.80it/s, Epoch=0, Eval loss=0.619, Eval score=0.693]


tensor([[-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019],
        [-0.4072,  0.5019]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.4072,  0.5019],
        [-0.4072,  0.5019],
    

100%|██████████| 268/268 [00:09<00:00, 29.17it/s, Epoch=1, Train loss=0.609]
 41%|[32m████      [0m| 7/17 [00:00<00:00, 60.42it/s, Epoch=1, Eval loss=0.607, Eval score=0.705]

tensor([[-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.3608,  0.4545],
        [-0.3608,  0.4545],
    

100%|[32m██████████[0m| 17/17 [00:00<00:00, 69.95it/s, Epoch=1, Eval loss=0.618, Eval score=0.693]


tensor([[-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545],
        [-0.3608,  0.4545]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.3608,  0.4545],
        [-0.3608,  0.4545],
    

100%|██████████| 268/268 [00:08<00:00, 30.40it/s, Epoch=2, Train loss=0.609]
 94%|[32m█████████▍[0m| 16/17 [00:00<00:00, 77.26it/s, Epoch=2, Eval loss=0.617, Eval score=0.693]

tensor([[-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3820,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4851],
        [-0.3821,  0.4851],
        [-0.3821,  0.4850],
        [-0.3821,  0.4851],
        [-0.3821,  0.4851],
        [-0.3821,  0.4850],
        [-0.3821,  0.4851],
        [-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4851],
        [-0.3821,  0.4851],
        [-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4851]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.3821,  0.4850],
        [-0.3821,  0.4850],
    

100%|[32m██████████[0m| 17/17 [00:00<00:00, 77.00it/s, Epoch=2, Eval loss=0.618, Eval score=0.693]


tensor([[-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4851],
        [-0.3821,  0.4850],
        [-0.3821,  0.4851],
        [-0.3821,  0.4850],
        [-0.3821,  0.4850],
        [-0.3821,  0.4851],
        [-0.3821,  0.4851],
        [-0.3821,  0.4851],
        [-0.3821,  0.4851],
        [-0.3821,  0.4851],
        [-0.3821,  0.4851],
        [-0.3821,  0.4851],
        [-0.3821,  0.4851]], device='cuda:0', grad_fn=<AddmmBackward0>)
---> Epoch 3 <---


  3%|▎         | 8/268 [00:00<00:09, 27.45it/s, Epoch=3, Train loss=0.637]


KeyboardInterrupt: 