In [1]:
import os
import torch
from utils import get_vocabs_dep_parser

In [2]:
data_dir = 'data_new'
path_train = os.path.join(data_dir, 'train.labeled')
path_test = os.path.join(data_dir, 'test.labeled')
# path_train = os.path.join(data_dir, 'train_short.labeled')
# path_test = os.path.join(data_dir, 'test_short.labeled')

# get only train vocabs to know which words are unknown in test
paths_list_train = [path_train]
word_dict_train, pos_dict_train = get_vocabs_dep_parser(paths_list_train)

paths_list_all = [path_train, path_test]
word_dict_all, pos_dict_all = get_vocabs_dep_parser(paths_list_all)

In [3]:
from data_handling import DepDataset
from torch.utils.data.dataloader import DataLoader
print("path_train -", path_train)
print("path_test -", path_test)
BATCH_SIZE = 1

train = DepDataset(word_dict_all, pos_dict_all, data_dir, 'train.labeled', padding=False)
train_dataloader = DataLoader(train, batch_size=BATCH_SIZE, shuffle=True)
test = DepDataset(word_dict_all, pos_dict_all, data_dir, 'test.labeled', padding=False, train_word_dict=word_dict_train)
test_dataloader = DataLoader(test, batch_size=BATCH_SIZE, shuffle=False)

# train = DepDataset(word_dict, pos_dict, data_dir, 'train_short', padding=False)
# train_dataloader = DataLoader(train, batch_size=BATCH_SIZE, shuffle=True)
# test = DepDataset(word_dict, pos_dict, data_dir, 'test_short', padding=False)
# test_dataloader = DataLoader(test, batch_size=BATCH_SIZE, shuffle=False)

path_train - data_new/train.labeled
path_test - data_new/test.labeled
idx_pos_mappings - [0, 1, 2, 3, '#', '$', "''", '(', ')', ',', '.', ':', 'CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD', 'NN', 'NNP', 'NNPS', 'NNS', 'PDT', 'POS', 'PRP', 'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'SYM', 'TO', 'UH', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB', '``']
pos_idx_mappings - {'<pad>': 0, '<unk>': 1, '<root>': 2, '<root_pos>': 3, '#': 4, '$': 5, "''": 6, '(': 7, ')': 8, ',': 9, '.': 10, ':': 11, 'CC': 12, 'CD': 13, 'DT': 14, 'EX': 15, 'FW': 16, 'IN': 17, 'JJ': 18, 'JJR': 19, 'JJS': 20, 'LS': 21, 'MD': 22, 'NN': 23, 'NNP': 24, 'NNPS': 25, 'NNS': 26, 'PDT': 27, 'POS': 28, 'PRP': 29, 'PRP$': 30, 'RB': 31, 'RBR': 32, 'RBS': 33, 'RP': 34, 'SYM': 35, 'TO': 36, 'UH': 37, 'VB': 38, 'VBD': 39, 'VBG': 40, 'VBN': 41, 'VBP': 42, 'VBZ': 43, 'WDT': 44, 'WP': 45, 'WP$': 46, 'WRB': 47, '``': 48}
idx_pos_mappings - [0, 1, 2, 3, '#', '$', "''", '(', ')', ',', '.', ':', 'CC', '

In [4]:
print("Number of Train Sentences ", len(train))
print("Number of Test Sentences ",len(test))

Number of Train Sentences  5000
Number of Test Sentences  1000


In [5]:
#### TRAIN BASIC MODEL
from torch import nn
import torch.optim as optim
from model import DnnSepParser
from trainer import Trainer
from loss import NllLoss, HingeLoss
from utils import IGNORE_IDX
from torch.optim.lr_scheduler import StepLR


WORD_EMBEDDING_DIM = 100
TAG_EMBEDDING_DIM = 25
HIDDEN_FC_DIM = 100
STACK_LSTM_NUM = 2
word_vocab_size = len(train.word_idx_mappings)
tag_vocab_size = len(train.pos_idx_mappings)
max_sentence_len = max(train.max_seq_len, test.max_seq_len)
ACCUMULATE_GRAD_STEPS = 50
NUM_EPOCHS = 30
len_train = len(train)
len_test = len(test)



model = DnnSepParser(WORD_EMBEDDING_DIM, TAG_EMBEDDING_DIM, STACK_LSTM_NUM, word_vocab_size, tag_vocab_size, hidden_fc_dim=100)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

if use_cuda:
    model.cuda()

loss_function = NllLoss()

optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = StepLR(optimizer, step_size=2, gamma=0.1)
trainer = Trainer(model, optimizer, loss_function, device)

trainer.train_dep_parser(NUM_EPOCHS, train_dataloader, test_dataloader, ACCUMULATE_GRAD_STEPS, len_train, len_test, early_stopping=5)


1000
2000
3000
4000
5000
unknown accuracy: 89.80870025150469
Epoch: 1 | Training Loss: 1.2316 | Training accuracy: 62.865% | Test Loss: 0.4446 | Test accuracy: 86.891% | Epoch Time: 42.57 secs
saving model
1000
2000
3000
4000
5000
unknown accuracy: 91.59222696123065
Epoch: 2 | Training Loss: 0.3566 | Training accuracy: 89.407% | Test Loss: 0.3674 | Test accuracy: 88.918% | Epoch Time: 38.04 secs
saving model
1000
2000
3000
4000
5000
unknown accuracy: 91.94726655058759
Epoch: 3 | Training Loss: 0.2354 | Training accuracy: 92.760% | Test Loss: 0.3706 | Test accuracy: 89.217% | Epoch Time: 37.88 secs
1000
2000
3000
4000
5000
unknown accuracy: 90.98199975506249
Epoch: 4 | Training Loss: 0.1782 | Training accuracy: 94.522% | Test Loss: 0.3879 | Test accuracy: 89.116% | Epoch Time: 37.88 secs
1000
2000
3000
4000
5000
unknown accuracy: 92.20225005280356
Epoch: 5 | Training Loss: 0.1382 | Training accuracy: 95.767% | Test Loss: 0.3944 | Test accuracy: 89.637% | Epoch Time: 37.70 secs
1000
2000

In [6]:
#### TRAIN ADVANCED MODEL

WORD_EMBEDDING_DIM = 250
TAG_EMBEDDING_DIM = 25
HIDDEN_FC_DIM = 500
STACK_LSTM_NUM = 2
ACCUMULATE_GRAD_STEPS = 50
NUM_EPOCHS = 30

model = DnnSepParser(WORD_EMBEDDING_DIM, TAG_EMBEDDING_DIM, STACK_LSTM_NUM, word_vocab_size, tag_vocab_size, hidden_fc_dim=HIDDEN_FC_DIM, is_advanced=True)
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

if use_cuda:
    model.cuda()

loss_function = NllLoss()

optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = StepLR(optimizer, step_size=2, gamma=0.1)
trainer = Trainer(model, optimizer, loss_function, device, scheduler)

trainer.train_dep_parser(NUM_EPOCHS, train_dataloader, test_dataloader, ACCUMULATE_GRAD_STEPS, len_train, len_test, early_stopping=5)


1000
2000
3000
4000
5000
unknown accuracy: 89.90104435676392
Epoch: 1 | Training Loss: 1.2086 | Training accuracy: 64.291% | Test Loss: 0.4402 | Test accuracy: 86.687% | Epoch Time: 56.76 secs
saving model
1000
2000
3000
4000
5000
unknown accuracy: 91.48669971548202
Epoch: 2 | Training Loss: 0.3432 | Training accuracy: 89.806% | Test Loss: 0.3638 | Test accuracy: 88.960% | Epoch Time: 51.91 secs
saving model
1000
2000
3000
4000
5000
unknown accuracy: 92.9754050787261
Epoch: 3 | Training Loss: 0.1830 | Training accuracy: 94.614% | Test Loss: 0.3242 | Test accuracy: 90.527% | Epoch Time: 51.65 secs
saving model
1000
2000
3000
4000
5000
unknown accuracy: 92.80051880605387
Epoch: 4 | Training Loss: 0.1205 | Training accuracy: 96.436% | Test Loss: 0.3305 | Test accuracy: 90.711% | Epoch Time: 51.69 secs
1000
2000
3000
4000
5000
unknown accuracy: 92.67063536436231
Epoch: 5 | Training Loss: 0.0875 | Training accuracy: 97.646% | Test Loss: 0.3340 | Test accuracy: 90.623% | Epoch Time: 51.49 se