In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
import numpy as np
from nltk.corpus import treebank
from tqdm import tqdm
from sklearn.preprocessing import LabelBinarizer, OrdinalEncoder
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
import nltk

nltk.download('treebank')
nltk.download('universal_tagset')
device = "cuda" if torch.cuda.is_available() else "cpu"

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package treebank to
[nltk_data]     C:\Users\Pedram\AppData\Roaming\nltk_data...
[nltk_data]   Package treebank is already up-to-date!
[nltk_data] Downloading package universal_tagset to
[nltk_data]     C:\Users\Pedram\AppData\Roaming\nltk_data...
[nltk_data]   Package universal_tagset is already up-to-date!


In [2]:
all_sents = []
for file in tqdm(treebank.fileids()):
    all_sents.extend(treebank.tagged_sents(file, tagset='universal'))
tags = set()
words = set()
np.random.seed(42)
train_data, data = train_test_split(all_sents, train_size=0.8, shuffle=True)
test_data, valid_data = train_test_split(data, train_size=0.75, shuffle=False)
for sent in train_data:
    for (word, tag) in sent:
        tags.add(tag)
        words.add(word.lower())
tags = list(tags)
words = list(words)
output_encoder = LabelBinarizer()
output_encoder.fit(tags)
input_encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1, dtype=int)
input_encoder.fit(np.array(words).reshape(-1, 1))

100%|██████████| 199/199 [00:06<00:00, 32.27it/s]


OrdinalEncoder(dtype=<class 'int'>, handle_unknown='use_encoded_value',
               unknown_value=-1)

In [3]:
class TreebankDataset(Dataset):
    def __init__(self, sents, input_encoder, output_encoder, device):
        self.sents = sents
        self.input_encoder = input_encoder
        self.output_encoder = output_encoder
        self.device = device
    
    def __len__(self,):
        return len(self.sents)
    
    def __getitem__(self, idx):
        X , y = [], []
        for (word, tag) in self.sents[idx]:
            X.append(word.lower())
            y.append(tag)
        X = self.input_encoder.transform(np.array(X).reshape(-1, 1)).reshape(-1) + 1
        y = self.output_encoder.transform(y)
        return torch.from_numpy(X).to(self.device), torch.from_numpy(y).to(self.device)

In [28]:
class RNNTagger(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(RNNTagger, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim)
        self.hidden_to_tag = nn.Linear(hidden_dim, tagset_size)
    
    def forward(self, sent):
        embedings = self.embedding(sent)
        rnn_out, _ = self.rnn(embedings.view(len(sent), 1, -1))
        tag_space = self.hidden_to_tag(rnn_out.view(len(sent), -1))
        tag_scores = F.softmax(tag_space, dim=1)
        return tag_scores

class LSTMTagger(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.hidden_to_tag = nn.Linear(hidden_dim, tagset_size)
    
    def forward(self, sent):
        embedings = self.embedding(sent)
        lstm_out, _ = self.lstm(embedings.view(len(sent), 1, -1))
        tag_space = self.hidden_to_tag(lstm_out.view(len(sent), -1))
        tag_scores = F.softmax(tag_space, dim=1)
        return tag_scores

class GRUTagger(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(GRUTagger, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.gru = nn.GRU(embedding_dim, hidden_dim)
        self.hidden_to_tag = nn.Linear(hidden_dim, tagset_size)
    
    def forward(self, sent):
        embedings = self.embedding(sent)
        gru_out, _ = self.gru(embedings.view(len(sent), 1, -1))
        tag_space = self.hidden_to_tag(gru_out.view(len(sent), -1))
        tag_scores = F.softmax(tag_space, dim=1)
        return tag_scores



In [22]:
import torch.optim as optim

train_dataset = TreebankDataset(train_data, input_encoder, output_encoder, device)
test_dataset = TreebankDataset(test_data, input_encoder, output_encoder, device)
valid_dataset = TreebankDataset(valid_data, input_encoder, output_encoder, device)

def train(model, loss_function, optimizer, n_epochs, train_dataset, valid_dataset, test_dataset):
    patience = 2
    best_acc = 0.0
    history = []
    for epoch in range(n_epochs):
        epoch_info = {}
        model.train()
        correct, total, epoch_loss = 0, 0, 0.0
        for (X_train, y_train) in tqdm(train_dataset, desc=f"Training epoch {epoch + 1}/{n_epochs}"):
            model.zero_grad()
            tag_scores = model(X_train)
            y_preds = torch.argmax(tag_scores, dim=1)
            y_trues = torch.argmax(y_train, dim=1)
            total += y_trues.size()[0]
            correct += (y_preds == y_trues).sum().item()
            loss = loss_function(tag_scores, torch.argmax(y_train, dim=1))
            epoch_loss += loss
            loss.backward()
            optimizer.step()
        epoch_info = {'train_acc': float(correct) / total, 'train_loss': epoch_loss / total}
        model.eval()
        correct, total, epoch_loss = 0, 0, 0.0
        for (X_val, y_val) in tqdm(valid_dataset, desc="Scores on validation data"):
            tag_scores = model(X_val)
            y_preds = torch.argmax(tag_scores, dim=1)
            y_trues = torch.argmax(y_val, dim=1)
            total += y_trues.size()[0]
            correct += (y_preds == y_trues).sum().item()
            loss = loss_function(tag_scores, torch.argmax(y_val, dim=1))
            epoch_loss += loss
        epoch_info['val_acc'], epoch_info['val_loss'] = float(correct) / total, epoch_loss / total
        print("val Accuracy: ", epoch_info['val_acc'])
        history.append(epoch_info)
        if epoch_info['val_acc'] > best_acc:
            best_acc = epoch_info['val_acc']
        else:
            patience -= 1
            if patience == 0:
                print(f"Early stopping on epoch {epoch + 1}")
                break
    model.eval()
    correct, total, epoch_loss = 0, 0, 0.0
    for (X_test, y_test) in tqdm(test_dataset, desc="Scores on test data"):
        tag_scores = model(X_test)
        y_preds = torch.argmax(tag_scores, dim=1)
        y_trues = torch.argmax(y_test, dim=1)
        total += y_trues.size()[0]
        correct += (y_preds == y_trues).sum().item()
        loss = loss_function(tag_scores, torch.argmax(y_test, dim=1))
        epoch_loss += loss
    test_acc, test_loss = float(correct) / total, epoch_loss / total
    print("test Accuracy: ", test_acc)
    return history, (test_acc, test_loss)    

RNN Model - Test #1 : hidden dim = 64

In [17]:
model = RNNTagger(256, 64, input_encoder.categories_[0].shape[0] + 1, len(output_encoder.classes_))
model.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
n_epochs = 10
train(model, loss_function, optimizer, n_epochs, train_dataset, valid_dataset, test_dataset)
pass

Training epoch 1/10: 100%|██████████| 3131/3131 [01:35<00:00, 32.62it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.43it/s]


val Accuracy:  0.8255633255633256


Training epoch 2/10: 100%|██████████| 3131/3131 [01:42<00:00, 30.59it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 31.74it/s]


val Accuracy:  0.8638306138306139


Training epoch 3/10: 100%|██████████| 3131/3131 [01:44<00:00, 29.99it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 33.85it/s]


val Accuracy:  0.87995337995338


Training epoch 4/10: 100%|██████████| 3131/3131 [01:43<00:00, 30.27it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 33.99it/s]


val Accuracy:  0.8918026418026418


Training epoch 5/10: 100%|██████████| 3131/3131 [01:43<00:00, 30.35it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 33.87it/s]


val Accuracy:  0.8982128982128982


Training epoch 6/10: 100%|██████████| 3131/3131 [01:43<00:00, 30.27it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 33.06it/s]


val Accuracy:  0.9065656565656566


Training epoch 7/10: 100%|██████████| 3131/3131 [01:43<00:00, 30.26it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.08it/s]


val Accuracy:  0.89996114996115


Training epoch 8/10: 100%|██████████| 3131/3131 [01:43<00:00, 30.34it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 34.40it/s]


val Accuracy:  0.9024864024864024
Early stopping on epoch 7


Scores on test data: 100%|██████████| 587/587 [00:18<00:00, 31.14it/s]

test Accuracy:  0.9090968118953314





RNN Model - Test #2 : hidden dim = 128

In [18]:
model = RNNTagger(256, 128, input_encoder.categories_[0].shape[0] + 1, len(output_encoder.classes_))
model.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
n_epochs = 10
train(model, loss_function, optimizer, n_epochs, train_dataset, valid_dataset, test_dataset)
pass

Training epoch 1/10: 100%|██████████| 3131/3131 [01:37<00:00, 32.26it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 34.76it/s]


val Accuracy:  0.822066822066822


Training epoch 2/10: 100%|██████████| 3131/3131 [01:39<00:00, 31.54it/s]
Scores on validation data: 100%|██████████| 196/196 [00:07<00:00, 25.73it/s]


val Accuracy:  0.8591686091686092


Training epoch 3/10: 100%|██████████| 3131/3131 [01:38<00:00, 31.78it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 35.19it/s]


val Accuracy:  0.878982128982129


Training epoch 4/10: 100%|██████████| 3131/3131 [01:38<00:00, 31.80it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 31.72it/s]


val Accuracy:  0.8886946386946387


Training epoch 5/10: 100%|██████████| 3131/3131 [01:40<00:00, 31.03it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 29.72it/s]


val Accuracy:  0.9011266511266511


Training epoch 6/10: 100%|██████████| 3131/3131 [01:37<00:00, 32.25it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 33.18it/s]


val Accuracy:  0.9055944055944056


Training epoch 7/10: 100%|██████████| 3131/3131 [01:39<00:00, 31.31it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 35.57it/s]


val Accuracy:  0.9098679098679099


Training epoch 8/10: 100%|██████████| 3131/3131 [01:36<00:00, 32.46it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 37.46it/s]


val Accuracy:  0.912004662004662


Training epoch 9/10: 100%|██████████| 3131/3131 [01:33<00:00, 33.51it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 37.19it/s]


val Accuracy:  0.9137529137529138


Training epoch 10/10: 100%|██████████| 3131/3131 [01:43<00:00, 30.33it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 28.04it/s]


val Accuracy:  0.9125874125874126


Scores on test data: 100%|██████████| 587/587 [00:21<00:00, 27.67it/s]


test Accuracy:  0.9177975456139211


RNN Model - Test #3 : hidden dim = 256

In [19]:
model = RNNTagger(256, 256, input_encoder.categories_[0].shape[0] + 1, len(output_encoder.classes_))
model.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
n_epochs = 10
train(model, loss_function, optimizer, n_epochs, train_dataset, valid_dataset, test_dataset)
pass

Training epoch 1/10: 100%|██████████| 3131/3131 [01:40<00:00, 31.10it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 33.64it/s]


val Accuracy:  0.8162393162393162


Training epoch 2/10: 100%|██████████| 3131/3131 [01:37<00:00, 32.06it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 32.84it/s]


val Accuracy:  0.8607226107226107


Training epoch 3/10: 100%|██████████| 3131/3131 [01:43<00:00, 30.25it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 31.91it/s]


val Accuracy:  0.8714063714063714


Training epoch 4/10: 100%|██████████| 3131/3131 [01:43<00:00, 30.13it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.64it/s]


val Accuracy:  0.8820901320901321


Training epoch 5/10: 100%|██████████| 3131/3131 [01:45<00:00, 29.82it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.51it/s]


val Accuracy:  0.8898601398601399


Training epoch 6/10: 100%|██████████| 3131/3131 [01:45<00:00, 29.75it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 30.02it/s]


val Accuracy:  0.8958818958818959


Training epoch 7/10: 100%|██████████| 3131/3131 [01:44<00:00, 29.89it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 31.66it/s]


val Accuracy:  0.896076146076146


Training epoch 8/10: 100%|██████████| 3131/3131 [01:46<00:00, 29.52it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 33.29it/s]


val Accuracy:  0.8986013986013986


Training epoch 9/10: 100%|██████████| 3131/3131 [01:45<00:00, 29.66it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 31.81it/s]


val Accuracy:  0.903069153069153


Training epoch 10/10: 100%|██████████| 3131/3131 [01:44<00:00, 30.10it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 29.89it/s]


val Accuracy:  0.9057886557886557


Scores on test data: 100%|██████████| 587/587 [00:19<00:00, 29.48it/s]


test Accuracy:  0.9100707746250244


LSTM Model - Test #1 : hidden dim = 64

In [23]:
model = LSTMTagger(256, 64, input_encoder.categories_[0].shape[0] + 1, len(output_encoder.classes_))
model.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
n_epochs = 10
train(model, loss_function, optimizer, n_epochs, train_dataset, valid_dataset, test_dataset)
pass

Training epoch 1/10: 100%|██████████| 3131/3131 [01:52<00:00, 27.82it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.29it/s]


val Accuracy:  0.8346930846930847


Training epoch 2/10: 100%|██████████| 3131/3131 [01:44<00:00, 29.83it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 33.34it/s]


val Accuracy:  0.8813131313131313


Training epoch 3/10: 100%|██████████| 3131/3131 [01:45<00:00, 29.54it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 33.09it/s]


val Accuracy:  0.9048174048174048


Training epoch 4/10: 100%|██████████| 3131/3131 [01:42<00:00, 30.62it/s]
Scores on validation data: 100%|██████████| 196/196 [00:08<00:00, 21.85it/s]


val Accuracy:  0.9145299145299145


Training epoch 5/10: 100%|██████████| 3131/3131 [01:52<00:00, 27.91it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 28.93it/s]


val Accuracy:  0.9197746697746698


Training epoch 6/10: 100%|██████████| 3131/3131 [01:48<00:00, 28.78it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.64it/s]


val Accuracy:  0.9153069153069153


Training epoch 7/10: 100%|██████████| 3131/3131 [01:53<00:00, 27.50it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 30.96it/s]


val Accuracy:  0.925019425019425


Training epoch 8/10: 100%|██████████| 3131/3131 [01:45<00:00, 29.62it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.58it/s]


val Accuracy:  0.9172494172494172
Early stopping on epoch 8


Scores on test data: 100%|██████████| 587/587 [00:19<00:00, 29.49it/s]


test Accuracy:  0.9212388805921693


LSTM Model - Test #2 : hidden dim = 128

In [24]:
model = LSTMTagger(256, 128, input_encoder.categories_[0].shape[0] + 1, len(output_encoder.classes_))
model.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
n_epochs = 10
train(model, loss_function, optimizer, n_epochs, train_dataset, valid_dataset, test_dataset)
pass

Training epoch 1/10: 100%|██████████| 3131/3131 [01:39<00:00, 31.33it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 30.31it/s]


val Accuracy:  0.8298368298368298


Training epoch 2/10: 100%|██████████| 3131/3131 [01:40<00:00, 31.22it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 32.91it/s]


val Accuracy:  0.8714063714063714


Training epoch 3/10: 100%|██████████| 3131/3131 [01:55<00:00, 27.16it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 30.77it/s]


val Accuracy:  0.9011266511266511


Training epoch 4/10: 100%|██████████| 3131/3131 [01:48<00:00, 28.84it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 35.05it/s]


val Accuracy:  0.9048174048174048


Training epoch 5/10: 100%|██████████| 3131/3131 [01:50<00:00, 28.44it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.11it/s]


val Accuracy:  0.9143356643356644


Training epoch 6/10: 100%|██████████| 3131/3131 [01:58<00:00, 26.50it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 32.70it/s]


val Accuracy:  0.9193861693861693


Training epoch 7/10: 100%|██████████| 3131/3131 [01:50<00:00, 28.41it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.33it/s]


val Accuracy:  0.9224941724941725


Training epoch 8/10: 100%|██████████| 3131/3131 [01:52<00:00, 27.73it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.59it/s]


val Accuracy:  0.9269619269619269


Training epoch 9/10: 100%|██████████| 3131/3131 [01:51<00:00, 28.10it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 32.91it/s]


val Accuracy:  0.9298756798756799


Training epoch 10/10: 100%|██████████| 3131/3131 [01:46<00:00, 29.51it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 33.16it/s]


val Accuracy:  0.932983682983683


Scores on test data: 100%|██████████| 587/587 [00:19<00:00, 30.87it/s]


test Accuracy:  0.9381858320888254


LSTM Model - Test #3 : hidden dim = 256

In [25]:
model = LSTMTagger(256, 256, input_encoder.categories_[0].shape[0] + 1, len(output_encoder.classes_))
model.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
n_epochs = 10
train(model, loss_function, optimizer, n_epochs, train_dataset, valid_dataset, test_dataset)
pass

Training epoch 1/10: 100%|██████████| 3131/3131 [01:45<00:00, 29.80it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 29.45it/s]


val Accuracy:  0.8484848484848485


Training epoch 2/10: 100%|██████████| 3131/3131 [01:43<00:00, 30.27it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 34.56it/s]


val Accuracy:  0.8916083916083916


Training epoch 3/10: 100%|██████████| 3131/3131 [01:48<00:00, 28.91it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 30.79it/s]


val Accuracy:  0.91006216006216


Training epoch 4/10: 100%|██████████| 3131/3131 [01:49<00:00, 28.70it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 31.41it/s]


val Accuracy:  0.9203574203574204


Training epoch 5/10: 100%|██████████| 3131/3131 [01:50<00:00, 28.28it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 29.91it/s]


val Accuracy:  0.9257964257964258


Training epoch 6/10: 100%|██████████| 3131/3131 [01:48<00:00, 28.98it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 28.21it/s]


val Accuracy:  0.9285159285159286


Training epoch 7/10: 100%|██████████| 3131/3131 [01:48<00:00, 28.91it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 31.64it/s]


val Accuracy:  0.9327894327894328


Training epoch 8/10: 100%|██████████| 3131/3131 [01:49<00:00, 28.60it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 30.85it/s]


val Accuracy:  0.9353146853146853


Training epoch 9/10: 100%|██████████| 3131/3131 [01:47<00:00, 29.05it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 29.45it/s]


val Accuracy:  0.9374514374514374


Training epoch 10/10: 100%|██████████| 3131/3131 [01:46<00:00, 29.39it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.06it/s]


val Accuracy:  0.9362859362859363


Scores on test data: 100%|██████████| 587/587 [00:21<00:00, 26.91it/s]


test Accuracy:  0.9386403480293487


GRU Model - Test #1 : hidden dim = 64

In [29]:
model = GRUTagger(256, 64, input_encoder.categories_[0].shape[0] + 1, len(output_encoder.classes_))
model.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
n_epochs = 10
train(model, loss_function, optimizer, n_epochs, train_dataset, valid_dataset, test_dataset)
pass

Training epoch 1/10: 100%|██████████| 3131/3131 [01:43<00:00, 30.11it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 38.36it/s]


val Accuracy:  0.8222610722610723


Training epoch 2/10: 100%|██████████| 3131/3131 [01:36<00:00, 32.48it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 34.90it/s]


val Accuracy:  0.8512043512043512


Training epoch 3/10: 100%|██████████| 3131/3131 [01:45<00:00, 29.61it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.63it/s]


val Accuracy:  0.8952991452991453


Training epoch 4/10: 100%|██████████| 3131/3131 [02:04<00:00, 25.19it/s]
Scores on validation data: 100%|██████████| 196/196 [00:08<00:00, 22.00it/s]


val Accuracy:  0.8933566433566433


Training epoch 5/10: 100%|██████████| 3131/3131 [02:09<00:00, 24.11it/s]
Scores on validation data: 100%|██████████| 196/196 [00:07<00:00, 27.40it/s]


val Accuracy:  0.8997668997668997


Training epoch 6/10: 100%|██████████| 3131/3131 [01:53<00:00, 27.49it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.62it/s]


val Accuracy:  0.9077311577311578


Training epoch 7/10: 100%|██████████| 3131/3131 [01:43<00:00, 30.21it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 33.70it/s]


val Accuracy:  0.9067599067599068
Early stopping on epoch 7


Scores on test data: 100%|██████████| 587/587 [00:19<00:00, 30.78it/s]


test Accuracy:  0.9103954288682553


GRU Model - Test #2 : hidden dim = 128

In [30]:
model = GRUTagger(256, 128, input_encoder.categories_[0].shape[0] + 1, len(output_encoder.classes_))
model.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
n_epochs = 10
train(model, loss_function, optimizer, n_epochs, train_dataset, valid_dataset, test_dataset)
pass

Training epoch 1/10: 100%|██████████| 3131/3131 [01:43<00:00, 30.20it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 29.86it/s]


val Accuracy:  0.8360528360528361


Training epoch 2/10: 100%|██████████| 3131/3131 [01:45<00:00, 29.58it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 32.73it/s]


val Accuracy:  0.8756798756798757


Training epoch 3/10: 100%|██████████| 3131/3131 [01:46<00:00, 29.49it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 31.02it/s]


val Accuracy:  0.8871406371406372


Training epoch 4/10: 100%|██████████| 3131/3131 [01:45<00:00, 29.57it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 32.90it/s]


val Accuracy:  0.8995726495726496


Training epoch 5/10: 100%|██████████| 3131/3131 [01:44<00:00, 29.97it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 33.05it/s]


val Accuracy:  0.9081196581196581


Training epoch 6/10: 100%|██████████| 3131/3131 [01:46<00:00, 29.52it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 29.19it/s]


val Accuracy:  0.9009324009324009


Training epoch 7/10: 100%|██████████| 3131/3131 [01:45<00:00, 29.68it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.23it/s]


val Accuracy:  0.9013209013209014
Early stopping on epoch 7


Scores on test data: 100%|██████████| 587/587 [00:20<00:00, 28.22it/s]


test Accuracy:  0.9079280566197


GRU Model - Test #3 : hidden dim = 256

In [31]:
model = GRUTagger(256, 256, input_encoder.categories_[0].shape[0] + 1, len(output_encoder.classes_))
model.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
n_epochs = 10
train(model, loss_function, optimizer, n_epochs, train_dataset, valid_dataset, test_dataset)
pass

Training epoch 1/10: 100%|██████████| 3131/3131 [01:42<00:00, 30.55it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.18it/s]


val Accuracy:  0.8311965811965812


Training epoch 2/10: 100%|██████████| 3131/3131 [01:44<00:00, 29.83it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.59it/s]


val Accuracy:  0.8412975912975913


Training epoch 3/10: 100%|██████████| 3131/3131 [01:46<00:00, 29.27it/s]
Scores on validation data: 100%|██████████| 196/196 [00:08<00:00, 22.90it/s]


val Accuracy:  0.8673271173271173


Training epoch 4/10: 100%|██████████| 3131/3131 [01:48<00:00, 28.97it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 32.80it/s]


val Accuracy:  0.8760683760683761


Training epoch 5/10: 100%|██████████| 3131/3131 [01:48<00:00, 28.81it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 31.89it/s]


val Accuracy:  0.8879176379176379


Training epoch 6/10: 100%|██████████| 3131/3131 [01:44<00:00, 29.92it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 31.14it/s]


val Accuracy:  0.8933566433566433


Training epoch 7/10: 100%|██████████| 3131/3131 [01:46<00:00, 29.30it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.62it/s]


val Accuracy:  0.8984071484071484


Training epoch 8/10: 100%|██████████| 3131/3131 [01:45<00:00, 29.61it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 31.51it/s]


val Accuracy:  0.9048174048174048


Training epoch 9/10: 100%|██████████| 3131/3131 [01:46<00:00, 29.51it/s]
Scores on validation data: 100%|██████████| 196/196 [00:06<00:00, 32.43it/s]


val Accuracy:  0.9028749028749029


Training epoch 10/10: 100%|██████████| 3131/3131 [01:46<00:00, 29.50it/s]
Scores on validation data: 100%|██████████| 196/196 [00:05<00:00, 33.02it/s]


val Accuracy:  0.9038461538461539
Early stopping on epoch 10


Scores on test data: 100%|██████████| 587/587 [00:20<00:00, 28.97it/s]


test Accuracy:  0.9122134926303487
