In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from collections import namedtuple
import json
from argparse import Namespace
from neural_srl.shared.measurements import Timer
from neural_srl.shared.tagger_data import TaggerData
from neural_srl.shared import reader
import os
import shutil
import torch
import numpy as np
import torch.nn as nn
from neural_srl.theano.tagger import BiLSTMTaggerModel
from tqdm import tqdm
import sklearn.metrics
from datetime import datetime

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU is available


In [4]:
Argv = namedtuple("Argv", "config dev gold labels model task train vocab")

In [5]:
args = Argv(config='../config/srl_small_config.json', dev='../data/srl/conll2012.devel.txt', gold='../data/srl/conll2012.devel.props.gold.txt', labels='', model='conll2012_small_model', task='srl', train='../data/srl/conll2012.train.txt', vocab='')

In [6]:
args.config

'../config/srl_small_config.json'

In [7]:
def get_config(config_filepath):
  with open(config_filepath, 'r') as config_file: 
    conf = json.load(config_file, object_hook=lambda d: Namespace(**d))
  return conf

In [8]:
config = get_config(args.config)

In [9]:
i = 0
global_step = 0
epoch = 0
train_loss = 0.0

In [10]:
with Timer('Data loading'):
    vocab_path = args.vocab if args.vocab != '' else None
    label_path = args.labels if args.labels != '' else None
    gold_props_path = args.gold if args.gold != '' else None
    
    print ('Task: {}'.format(args.task))
    
    data = TaggerData(config,
                        *reader.get_srl_data(config, args.train, args.dev, vocab_path, label_path))
    
    batched_dev_data = data.get_development_data(batch_size=config.dev_batch_size)
    print ('Dev data has {} batches.'.format(len(batched_dev_data)))

Task: srl
(['In', 'recent', 'years', ',', 'advanced', 'education', 'for', 'professionals', 'has', 'become', 'a', 'hot', 'topic', 'in', 'the', 'business', 'community', '.'], 5, ['O', 'O', 'O', 'O', 'B-ARGM-ADJ', 'B-V', 'B-ARG1', 'I-ARG1', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'])
(['President', 'Chen', 'Travels', 'Abroad'], 2, ['B-ARG0', 'I-ARG0', 'B-V', 'B-ARGM-ADV'])


IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



Extracting features
Extraced 43852 words and 129 tags
Max training sentence length: 210
Max development sentence length: 275
Dev data has 552 batches.
Data loading duration was 0:00:21.


In [11]:
data.train_sents[0][0]

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 14, 15, 16, 17]

In [12]:
" ".join([data.word_dict.idx2str[i] for i in data.train_sents[0][0]])

'in recent years , advanced education for professionals has become a hot topic in the business community .'

In [13]:
data.train_sents[0][1]

[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [14]:
data.train_sents[0][2]

[0, 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [15]:
" ".join([data.label_dict.idx2str[i] for i in data.train_sents[12][2]])

'O O O O O O O O O O O O O O O O O O O O O O O B-ARG0 O O O B-V B-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 O'

In [16]:
data.label_dict.idx2str[:3]

['O', 'B-ARGM-ADJ', 'B-V']

In [17]:
len(data.train_tensors)

252315

In [18]:
data.train_tensors[0]

(array([[ 1,  0],
        [ 2,  0],
        [ 3,  0],
        [ 4,  0],
        [ 5,  0],
        [ 6,  1],
        [ 7,  0],
        [ 8,  0],
        [ 9,  0],
        [10,  0],
        [11,  0],
        [12,  0],
        [13,  0],
        [ 1,  0],
        [14,  0],
        [15,  0],
        [16,  0],
        [17,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 0,  0],
        [ 

In [19]:
with Timer('Preparation'):
    if not os.path.isdir(args.model):
      print ('Directory {} does not exist. Creating new.'.format(args.model))
      os.makedirs(args.model)
    else:
      if len(os.listdir(args.model)) > 0:
        print ('[WARNING] Log directory {} is not empty, previous checkpoints might be overwritten'
             .format(args.model))
    shutil.copyfile(args.config, os.path.join(args.model, 'config'))
    # Save word and label dict to model directory.
    data.word_dict.save(os.path.join(args.model, 'word_dict'))
    data.label_dict.save(os.path.join(args.model, 'label_dict'))
    writer = open(os.path.join(args.model, 'checkpoints.tsv'), 'w')
    writer.write('step\tdatetime\tdev_loss\tdev_accuracy\tbest_dev_accuracy\n')

Preparation duration was 0:00:00.


In [20]:
args.model

'conll2012_small_model'

In [21]:
train_data = data.get_training_data(include_last_batch=True)

Extracted 252315 samples and 1972 batches.


In [22]:
np.count_nonzero(train_data[0][3][0])

12

In [23]:
np.count_nonzero(train_data[0][0][0])

13

In [24]:
" ".join([data.label_dict.idx2str[i] for i in train_data[312][1][0]])

'B-ARGM-DIS B-ARGM-TMP O O B-V B-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 I-ARG1 O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O'

In [25]:
with Timer('Building model'):
    model = BiLSTMTaggerModel(data, config=config)  
    for param in model.params:
      print(param, param.name, param.shape.eval())
    loss_function = model.get_loss_function()
    eval_function = model.get_eval_function()

Using 2 feature types, projected output dim=200.
lstm_0_rdrop 0.1 True
<neural_srl.theano.layer.HighwayLSTMLayer object at 0x7f3a1316fa60>
lstm_1_rdrop 0.1 True
<neural_srl.theano.layer.HighwayLSTMLayer object at 0x7f3a1316e9e0>
embedding_0 embedding_0 [43852   100]
embedding_1 embedding_1 [  2 100]
lstm_0_W lstm_0_W [ 200 1800]
lstm_0_U lstm_0_U [ 300 1500]
lstm_0_b lstm_0_b [1800]
lstm_1_W lstm_1_W [ 300 1800]
lstm_1_U lstm_1_U [ 300 1500]
lstm_1_b lstm_1_b [1800]
softmax_W softmax_W [300 129]
softmax_b softmax_b [129]




Building model duration was 0:00:12.




In [26]:
for batched_tensor in train_data:
    print("Start")
    x, y, _, weights = batched_tensor
    loss, *inputs = loss_function(x, weights, y)
    break

Start


In [27]:
inputs[0].shape

(100, 128, 200)

In [28]:
loss

array(120.36957783)

In [29]:
x[0].shape

(100, 2)

In [30]:
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 3070 Ti'

In [31]:
data.embedding_shapes

[[43852, 100], [2, 100]]

In [32]:
nn.Embedding.from_pretrained(torch.tensor(data.embeddings[0]))

Embedding(43852, 100)

In [33]:
data.word_dict.size()

43852

In [34]:
len(data.embeddings[0])

43852

In [35]:
data.label_dict.size()

129

In [36]:
class BIOModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_layers, hidden_dim, pretrained_embedding):
        super(BIOModel, self).__init__()
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        self.word_embed = nn.Embedding.from_pretrained(pretrained_embedding)
        self.mask_embed = nn.Embedding(2, embedding_dim, device=device)
        
        self.biLSTM = nn.LSTM(embedding_dim * 2, hidden_dim, n_layers, dropout=0.1, bidirectional=True, batch_first=True)
        self.linear = nn.Linear(hidden_dim * 2, data.label_dict.size())
        self.softmax = nn.Softmax(dim=-1)
        
    def forward(self, x):
        batch_size, sequence_length = x.size(0), x.size(1)
        x_w = self.word_embed(x[:,:,0])
        x_m = self.mask_embed(x[:,:,1])
        x = torch.concat((x_w, x_m), axis=2)
        
        hidden_state, cell_state = self.init_hidden(batch_size)
        
        output, (hidden_state, cell_state) = self.biLSTM(x, (hidden_state, cell_state))
        
        output = self.linear(output)
        scores = self.softmax(output)
        pred = torch.argmax(scores, dim=-1)
        
        return output, pred
    
    def init_hidden(self, batch_size):
        h0 = torch.zeros((self.n_layers * 2, batch_size, self.hidden_dim), device=device)
        c0 = torch.zeros((self.n_layers * 2, batch_size, self.hidden_dim), device=device)
        return h0, c0

In [37]:
pretrained_embedding = torch.tensor(data.embeddings[0], device=device)

In [38]:
bioModel = BIOModel(data.word_dict.size(), 
                    embedding_dim=len(data.embeddings[0][0]), 
                    n_layers=4, hidden_dim=model.lstm_hidden_size, 
                    pretrained_embedding=pretrained_embedding)

In [39]:
bioModel.to(device)

BIOModel(
  (word_embed): Embedding(43852, 100)
  (mask_embed): Embedding(2, 100)
  (biLSTM): LSTM(200, 300, num_layers=4, batch_first=True, dropout=0.1, bidirectional=True)
  (linear): Linear(in_features=600, out_features=129, bias=True)
  (softmax): Softmax(dim=-1)
)

In [40]:
x_tensor = torch.stack([torch.tensor(i, device=device) for i in x])
x_tensor.shape

torch.Size([128, 100, 2])

In [41]:
scores, preds = bioModel(x_tensor)

In [42]:
def loss(scores, y):
    # targets = torch.reshape(y, [y.shape[0] * y.shape[1]])
    # scores_flatten = torch.reshape(scores, (scores.shape[0] * scores.shape[1], -1))
    ce_loss = nn.CrossEntropyLoss()
    return ce_loss(scores.permute((0,2,1)), y)

In [43]:
y_tensor = torch.stack([torch.tensor(i, device=device) for i in y])

In [44]:
loss(scores, y_tensor)

tensor(4.8391, device='cuda:0', grad_fn=<NllLoss2DBackward0>)

In [45]:
from torch.optim import Adadelta, Adam

In [46]:
# optimizer = Adadelta(bioModel.parameters(), rho=0.95)

In [47]:
optimizer = Adam(bioModel.parameters(), lr=0.001)

In [52]:
def evaluation(model):
    flatten_ys = []
    flatten_preds = []
    for i, batch in enumerate(tqdm(batched_dev_data)):
        x, y, sq_lengths, weights = batch
        x_tensor = torch.stack([torch.tensor(i, device=device) for i in x])
        y_tensor = torch.stack([torch.tensor(i, device=device) for i in y])

        scores, preds = bioModel(x_tensor)
        
        for i, y in enumerate(y_tensor):
            
            flatten_y_list = y_tensor[i][:sq_lengths[i]].cpu().tolist()
            flatten_pred = preds[i][:sq_lengths[i]].cpu().tolist()
            flatten_ys = flatten_ys + flatten_y_list
            flatten_preds = flatten_preds + flatten_pred

    f1 = sklearn.metrics.f1_score(flatten_ys, flatten_preds, average="macro")
    
    return f1

In [53]:
# evaluation(bioModel)

In [54]:
def train(model, optimizer, loss_fn):
    train_data = data.get_training_data(include_last_batch=True)
    best_f1 = 0
    for epoch in range(config.max_epochs):
        print(f"Epoch: {epoch + 1}")
        train_loss = 0.0
        
        for i, batch in enumerate(tqdm(train_data)):
            x, y, _, weights = batched_tensor
            x_tensor = torch.stack([torch.tensor(i, device=device) for i in x])
            y_tensor = torch.stack([torch.tensor(i, device=device) for i in y])
            
            model.train()
            optimizer.zero_grad()
            
            scores, preds = model(x_tensor)
            
            # print(scores.shape)
            # print(y_tensor.shape)
            # return
            
            loss = loss_fn(scores, y_tensor)
            train_loss += loss
            
            loss.backward()
            # torch.nn.utils.clip_grad_norm(model.parameters(), float(config.max_grad_norm))
            optimizer.step()
            
        i += 1
        train_loss = train_loss / i
        print("Epoch {}, steps={}, loss={:.6f}".format(epoch + 1, i, train_loss))
        model.eval()
        f1_dev = evaluation(model)
        if f1_dev > best_f1:
            torch.save(model.state_dict(), os.path.join(args.model, f"out-epoch-{epoch}-{datetime.now().strftime('%d-%m-%Y_%H')}.pt"))
        print("Eval: macro f1={:.3f}, best f1={:.3f}".format(f1_dev, best_f1))

In [55]:
print("Loss ma van con giu nguyen la t dap may chet me may")

Loss ma van con giu nguyen la t dap may chet me may


In [None]:
train(bioModel, optimizer, loss)

Extracted 252315 samples and 1972 batches.
Epoch: 1


100%|████████████████████████████████████████████████████████████████| 1972/1972 [01:57<00:00, 16.83it/s]


Epoch 1, steps=1972, loss=0.111


100%|██████████████████████████████████████████████████████████████████| 552/552 [01:17<00:00,  7.13it/s]


Eval: macro f1=0.050
Epoch: 2


100%|████████████████████████████████████████████████████████████████| 1972/1972 [01:56<00:00, 16.97it/s]


Epoch 2, steps=1972, loss=0.014


100%|██████████████████████████████████████████████████████████████████| 552/552 [01:31<00:00,  6.01it/s]


Eval: macro f1=0.054
Epoch: 3


100%|████████████████████████████████████████████████████████████████| 1972/1972 [01:57<00:00, 16.73it/s]


Epoch 3, steps=1972, loss=0.010


100%|██████████████████████████████████████████████████████████████████| 552/552 [01:15<00:00,  7.32it/s]


Eval: macro f1=0.057
Epoch: 4


100%|████████████████████████████████████████████████████████████████| 1972/1972 [01:54<00:00, 17.26it/s]


Epoch 4, steps=1972, loss=0.004


100%|██████████████████████████████████████████████████████████████████| 552/552 [01:30<00:00,  6.12it/s]


Eval: macro f1=0.059
Epoch: 5


100%|████████████████████████████████████████████████████████████████| 1972/1972 [01:55<00:00, 17.12it/s]


Epoch 5, steps=1972, loss=0.000


100%|██████████████████████████████████████████████████████████████████| 552/552 [01:29<00:00,  6.14it/s]


Eval: macro f1=0.059
Epoch: 6


100%|████████████████████████████████████████████████████████████████| 1972/1972 [01:53<00:00, 17.40it/s]


Epoch 6, steps=1972, loss=0.001


100%|██████████████████████████████████████████████████████████████████| 552/552 [01:15<00:00,  7.34it/s]


Eval: macro f1=0.060
Epoch: 7


100%|████████████████████████████████████████████████████████████████| 1972/1972 [01:50<00:00, 17.87it/s]


Epoch 7, steps=1972, loss=0.000


100%|██████████████████████████████████████████████████████████████████| 552/552 [01:14<00:00,  7.42it/s]


Eval: macro f1=0.060
Epoch: 8


100%|████████████████████████████████████████████████████████████████| 1972/1972 [01:50<00:00, 17.87it/s]


Epoch 8, steps=1972, loss=0.000


100%|██████████████████████████████████████████████████████████████████| 552/552 [01:14<00:00,  7.42it/s]


Eval: macro f1=0.061
Epoch: 9


100%|████████████████████████████████████████████████████████████████| 1972/1972 [01:55<00:00, 17.06it/s]


Epoch 9, steps=1972, loss=0.000


100%|██████████████████████████████████████████████████████████████████| 552/552 [01:15<00:00,  7.34it/s]


Eval: macro f1=0.061
Epoch: 10


100%|████████████████████████████████████████████████████████████████| 1972/1972 [01:56<00:00, 16.89it/s]


Epoch 10, steps=1972, loss=0.000


100%|██████████████████████████████████████████████████████████████████| 552/552 [01:16<00:00,  7.22it/s]


Eval: macro f1=0.061
Epoch: 11


100%|████████████████████████████████████████████████████████████████| 1972/1972 [01:54<00:00, 17.27it/s]


Epoch 11, steps=1972, loss=0.000


100%|██████████████████████████████████████████████████████████████████| 552/552 [01:14<00:00,  7.37it/s]


Eval: macro f1=0.062
Epoch: 12


100%|████████████████████████████████████████████████████████████████| 1972/1972 [01:55<00:00, 17.01it/s]


Epoch 12, steps=1972, loss=0.001


100%|██████████████████████████████████████████████████████████████████| 552/552 [01:15<00:00,  7.30it/s]


Eval: macro f1=0.063
Epoch: 13


 21%|█████████████▉                                                   | 422/1972 [00:25<01:37, 15.93it/s]