## Import dependencies

In [1]:
import os
import random
import time
import warnings
from argparse import Namespace
from shutil import (
    copyfile,
)

import numpy as np
import torch

from data.loader import DataLoader
from model.trainer import GCNTrainer
from utils import (
    scorer,
    constant,
)
from utils.vocab import Vocab

# ignore warnings
warnings.filterwarnings("ignore")




## Parse arguments

In [2]:
args = Namespace(
    data_dir='dataset/definition/textbook',
    vocab_dir='dataset/definition/textbook/vocab',
    emb_dim=300,
    ner_dim=30,
    pos_dim=30,
    hidden_dim=200,
    num_layers=2,
    input_dropout=0.5,
    gcn_dropout=0.5,
    word_dropout=0.04,
    topn=10000000000.0,
    lower=False,
    ratio=1,
    only_label=0,
    sent_loss=100.0,
    dep_path_loss=100.0,
    consistency_loss=1.0,
    prune_k=-1,
    conv_l2=0,
    pooling='max',
    pooling_l2=0.003,
    mlp_layers=2,
    no_adj=False,
    rnn=True,
    rnn_hidden=200,
    rnn_layers=1,
    rnn_dropout=0.5,
    lr=0.0003,
    lr_decay=0.9,
    decay_epoch=5,
    optim='adamax',
    num_epoch=10,
    batch_size=50,
    max_grad_norm=5.0,
    log_step=20,
    log='logs.txt',
    save_dir='./saved_models',
    id='0507',
    info='',
    seed=0,
    cuda=torch.cuda.is_available(),
    cpu=not torch.cuda.is_available(),
    load=False,
    model_file=None
)
opt = vars(args)

opt['num_class'] = len(constant.LABEL_TO_ID)




## Set random seed

In [3]:
torch.manual_seed(args.seed)
np.random.seed(args.seed)
random.seed(args.seed)

if torch.cuda.is_available():
    torch.cuda.manual_seed(args.seed)



## Load vocab

In [4]:
# vocabulary: set of unique words that the dataset contains.
vocab = Vocab(os.path.join(opt['vocab_dir'], 'vocab.pkl'))
opt['vocab_size'] = vocab.size

# word embedding: vector representation of each word in the vocabulary
emb_matrix = np.load(os.path.join(opt['vocab_dir'], 'embedding.npy'))

print(f"""Loaded vocab with {vocab.size} words and {emb_matrix.shape[1]} dims.""")

Loaded vocab with 26106 words and 300 dims.


## Load data

In [5]:
print(f"Loading data from {opt['data_dir']} with batch size {opt['batch_size']}...")
train_batch = DataLoader(os.path.join(opt['data_dir'], 'train.json'), opt, vocab, evaluation=False)
dev_batch = DataLoader(os.path.join(opt['data_dir'], 'dev.json'), opt, vocab, evaluation=True)

model_save_dir = os.path.join(opt['save_dir'], opt['id'])
opt['model_save_dir'] = os.path.join(opt['save_dir'], opt['id'])
os.makedirs(model_save_dir, exist_ok=True)

Loading data from dataset/definition/textbook with batch size 50...
354 batches created for dataset/definition/textbook/train.json
45 batches created for dataset/definition/textbook/dev.json


## Train model

In [6]:
trainer = GCNTrainer(opt, emb_matrix=emb_matrix)

Finetune all embeddings.


In [7]:
id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()])
dev_score_history = []
current_lr = opt['lr']

In [8]:
global_step = 0
global_start_time = time.time()
max_steps = len(train_batch) * opt['num_epoch']

In [9]:
# start training
for epoch in range(1, opt['num_epoch'] + 1):
    train_loss = 0
    train_sent_loss = 0
    train_dep_path_loss = 0
    for i, batch in enumerate(train_batch):
        start_time = time.time()
        global_step += 1
        loss, sent_loss, dep_path_loss = trainer.update(batch)
        train_loss += loss
        train_sent_loss += sent_loss
        train_dep_path_loss += dep_path_loss
        if global_step % opt['log_step'] == 0:
            duration = time.time() - start_time
            print(
                f"{time.strftime('%H:%M:%S', time.localtime())}: step {global_step}/{max_steps} (epoch {epoch}/{opt['num_epoch']}), loss = {loss:.6f}, sent_loss = {sent_loss:.6f}, dep_path_loss = {dep_path_loss:.6f} ({duration:.3f} sec/batch), lr: {current_lr:.6f}"
            )

    # eval on dev
    print("Evaluating on dev set...")
    predictions = []
    dev_loss = 0
    for i, batch in enumerate(dev_batch):
        preds, _, loss, _ = trainer.predict(batch)
        predictions += preds
        dev_loss += loss

    predictions = [[id2label[l + 1]] for p in predictions for l in p]
    train_loss = train_loss / len(train_batch) * opt['batch_size']  # avg loss per batch
    train_sent_loss = train_sent_loss / len(train_batch) * opt['batch_size']  # avg loss per batch
    train_dep_path_loss = train_dep_path_loss / len(train_batch) * opt['batch_size']  # avg loss per batch
    dev_loss = dev_loss / len(dev_batch) * opt['batch_size']

    dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(), predictions, method='macro')
    print(
        f"epoch {epoch}: train_loss = {train_loss:.6f}, "
        f"train_sent_loss = {train_sent_loss:.6f}, "
        f"train_dep_path_loss = {train_dep_path_loss:.6f}, "
        f"dev_loss = {dev_loss:.6f}, dev_f1 = {dev_f1:.4f}"
    )
    dev_score = dev_f1

    # save
    model_file = model_save_dir + f'/checkpoint_epoch_{epoch}.pt'
    trainer.save(model_file, epoch)
    if epoch == 1 or dev_score > max(dev_score_history):
        copyfile(model_file, model_save_dir + '/best_model.pt')

        print(f"new best model saved at epoch {epoch}: {dev_p * 100:.2f}\t{dev_r * 100:.2f}\t{dev_score * 100:.2f}")

    dev_score_history += [dev_score]

    print("Training ended with {} epochs.".format(epoch))

10:42:11: step 20/3540 (epoch 1/10), loss = 912.394592, sent_loss = 0.605355, dep_path_loss = 7.904532 (0.314 sec/batch), lr: 0.000300
10:42:16: step 40/3540 (epoch 1/10), loss = 780.796692, sent_loss = 0.650625, dep_path_loss = 8.824707 (0.284 sec/batch), lr: 0.000300
10:42:22: step 60/3540 (epoch 1/10), loss = 835.857971, sent_loss = 0.638335, dep_path_loss = 7.909427 (0.279 sec/batch), lr: 0.000300
10:42:30: step 80/3540 (epoch 1/10), loss = 571.141479, sent_loss = 0.607847, dep_path_loss = 5.441769 (0.227 sec/batch), lr: 0.000300
10:42:37: step 100/3540 (epoch 1/10), loss = 756.993103, sent_loss = 0.586071, dep_path_loss = 4.268195 (0.345 sec/batch), lr: 0.000300
10:42:43: step 120/3540 (epoch 1/10), loss = 810.769470, sent_loss = 0.441815, dep_path_loss = 7.167094 (0.320 sec/batch), lr: 0.000300
10:42:49: step 140/3540 (epoch 1/10), loss = 594.306335, sent_loss = 0.481428, dep_path_loss = 4.250006 (0.270 sec/batch), lr: 0.000300
10:42:55: step 160/3540 (epoch 1/10), loss = 503.317