In [1]:
import os
import sys
import time

from tqdm import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F

from src import utils
from src import bilstm
import src.dataset as dset
import src.pytorch_utils as ptu
import src.chu_liu_edmonds as chu

import warnings
warnings.filterwarnings('ignore')

seed = 42
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

np.random.seed(seed)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
models_path = 'models'

cuda


In [2]:
train_dataset = dset.DataSet('data/train.labeled', tqdm_bar=True)
test_dataset = dset.DataSet('data/test.labeled', train_dataset=train_dataset, tqdm_bar=True)
comp_dataset = dset.DataSet('data/comp.unlabeled', train_dataset=train_dataset, tagged=False, tqdm_bar=True)

100%|██████████| 125430/125430 [00:17<00:00, 7107.88it/s]
100%|██████████| 25325/25325 [00:03<00:00, 6968.51it/s]
100%|██████████| 24744/24744 [00:03<00:00, 7733.28it/s]


In [3]:
version = 'V1_final'
save = True

model = bilstm.BiLSTM(train_dataset=train_dataset,
                      word_embed_dim=100,
                      tag_embed_dim=25,
                      hidden_dim=125,
                      num_layers=2,
                      bias=True,
                      mlp1_dim=100,
                      p_dropout=0.1,
                      word_dropout=0.25)

checkpoint = ptu.Checkpoint(models_path=models_path,
                            version=version,
                            model=model,
                            score=lambda y_true, y_pred: (np.array(y_true) == np.array(y_pred)).mean(),
                            loss_decision_func=utils.loss_decision_func,
                            out_decision_func=lambda y_pred, flat_y_pred, mask, padding: flat_y_pred.argmax(axis=1),
                            seed=42,
                            optimizer=torch.optim.Adam,
                            criterion=nn.NLLLoss,
                            save=save,
                            prints=True)

model version: V1_final
Number of parameters 2097001 trainable 2097001


In [6]:
hyperparam_list = [
    {'train_epochs': 5, 'batch_size': 16, 'optimizer_params': {'lr': 1e-3}},
    {'train_epochs': 5, 'batch_size': 64, 'optimizer_params': {'lr': 4e-4}},
]

for session in hyperparam_list:
    checkpoint.train(device=device,
                     train_dataset=train_dataset.dataset,
                     val_dataset=test_dataset.dataset,
                     prints=True,
                     epochs_save=5,
                     save=save,
                     **session)

epoch   1/  5 | train_loss 1.09143 | val_loss 1.10195 | train_score 0.67990 | val_score 0.67782 | train_time   0.60 min
epoch   2/  5 | train_loss 0.71656 | val_loss 0.74116 | train_score 0.78503 | val_score 0.77821 | train_time   1.20 min
epoch   3/  5 | train_loss 0.57078 | val_loss 0.62577 | train_score 0.83035 | val_score 0.81369 | train_time   1.81 min
epoch   4/  5 | train_loss 0.48826 | val_loss 0.57920 | train_score 0.85198 | val_score 0.82590 | train_time   2.42 min
epoch   5/  5 | train_loss 0.43209 | val_loss 0.55634 | train_score 0.86679 | val_score 0.83404 | train_time   3.02 min
epoch   6/ 10 | train_loss 0.40247 | val_loss 0.53906 | train_score 0.87791 | val_score 0.83984 | train_time   3.58 min
epoch   7/ 10 | train_loss 0.39269 | val_loss 0.53840 | train_score 0.88097 | val_score 0.84070 | train_time   4.16 min
epoch   8/ 10 | train_loss 0.38523 | val_loss 0.54106 | train_score 0.88256 | val_score 0.84099 | train_time   4.73 min
epoch   9/ 10 | train_loss 0.37867 | val