In [1]:
import sys
sys.path.append("/work/multi_doc_analyzer")
sys.path.append("/work/relation_extraction/Bert_model/bert_modify_seq/data/")

import torch as T
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.cuda
from allennlp.nn import util as nn_util
from multi_doc_analyzer.structure.structure import *
from multi_doc_analyzer.tokenization.tokenizer import MDATokenizer
from tqdm import tqdm
from stanfordcorenlp import StanfordCoreNLP

from allennlp.data.dataset_readers import DatasetReader
from allennlp.data.tokenizers import Token
from allennlp.data.token_indexers import TokenIndexer

from allennlp.data.instance import Instance
from allennlp.data.fields import TextField, LabelField, ArrayField

from tacred_preprocess import DataLoader

from allennlp.data.vocabulary import Vocabulary
from allennlp.data.iterators import BucketIterator, DataIterator, BasicIterator
from allennlp.nn.util import get_text_field_mask
from allennlp.models import Model
from allennlp.modules.text_field_embedders import TextFieldEmbedder
import random

from allennlp.data.token_indexers import PretrainedBertIndexer
from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder
from allennlp.modules.token_embedders.bert_token_embedder import PretrainedBertEmbedder

from sklearn.metrics import precision_recall_fscore_support as prs
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import csv



In [2]:
train_path = "/work/tacred/data/json/train.json"
test_path = "/work/tacred/data/json/test.json"
model_folder = "/work/model_checkpoint/bert_model_checkpoint/bert_modify_seq/tacred/"
output_path = "/work/relation_extraction/Bert_model/bert_modify_seq/analysis/tacred/"

In [3]:
class Config(dict):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        for k, v in kwargs.items():
            setattr(self, k, v)
    
    def set(self, key, val):
        self[key] = val
        setattr(self, key, val)
        
config = Config(
    seed=1,
    batch_size=200,
    lr=3e-4,                # learning rate
    epochs=100,
    mlp_hidden_sz=300,
    lstm_hidden_sz=768,
    arg_sz=20,              # position embedding size
    max_seq_len=400,
    cuda=T.device("cuda:0")
)

In [4]:
USE_GPU = T.cuda.is_available()
USE_GPU

True

In [5]:
# set seed for both CPU and CUDA
T.manual_seed(config.seed)

<torch._C.Generator at 0x7f436353a890>

In [6]:
e_label2idx = {'PERSON': 0, 'ORGANIZATION': 1, 'DATE': 2, 'NUMBER': 3, 'TITLE': 4, 'COUNTRY': 5, 'LOCATION': 6, 'CITY': 7, 'MISC': 8, 'STATE_OR_PROVINCE': 9, 'DURATION': 10, 'NATIONALITY': 11, 'CAUSE_OF_DEATH': 12, 'CRIMINAL_CHARGE': 13, 'RELIGION': 14, 'URL': 15, 'IDEOLOGY': 16}

r_label2idx = {'no_relation': 0, 'per:title': 1, 'org:top_members/employees': 2, 'per:employee_of': 3, 'org:alternate_names': 4, 'org:country_of_headquarters': 5, 'per:countries_of_residence': 6, 'org:city_of_headquarters': 7, 'per:cities_of_residence': 8, 'per:age': 9, 'per:stateorprovinces_of_residence': 10, 'per:origin': 11, 'org:subsidiaries': 12, 'org:parents': 13, 'per:spouse': 14, 'org:stateorprovince_of_headquarters': 15, 'per:children': 16, 'per:other_family': 17, 'per:alternate_names': 18, 'org:members': 19, 'per:siblings': 20, 'per:schools_attended': 21, 'per:parents': 22, 'per:date_of_death': 23, 'org:member_of': 24, 'org:founded_by': 25, 'org:website': 26, 'per:cause_of_death': 27, 'org:political/religious_affiliation': 28, 'org:founded': 29, 'per:city_of_death': 30, 'org:shareholders': 31, 'org:number_of_employees/members': 32, 'per:date_of_birth': 33, 'per:city_of_birth': 34, 'per:charges': 35, 'per:stateorprovince_of_death': 36, 'per:religion': 37, 'per:stateorprovince_of_birth': 38, 'per:country_of_birth': 39, 'org:dissolved': 40, 'per:country_of_death': 41}


r_idx2label = {v: k for k, v in r_label2idx.items()}

class RelationDatasetReader(DatasetReader):
    """
    Reads Structure object formatted datasets files, and creates AllenNLP instances.
    """
    def __init__(self, token_indexers: Dict[str, TokenIndexer]=None, 
                 MAX_WORDPIECES: int=config.max_seq_len, 
                 is_training = False, reader: DataLoader=None):
        # make sure results may be reproduced when sampling...
        super().__init__(lazy=False)
        random.seed(0)
        self.is_training = is_training
        self.reader = reader
        
        # NOTE AllenNLP automatically adds [CLS] and [SEP] word peices in the begining and end of the context,
        # therefore we need to subtract 2
        self.MAX_WORDPIECES = MAX_WORDPIECES - 2
        
        # BERT specific init
        self._token_indexers = token_indexers

    def text_to_instance(self, sentence: Sentence) -> Instance:

        field = {}
        
        # tokens
        sentence_field = TextField(sentence["tokens"], self._token_indexers)
        fields = {"tokens": sentence_field}

#         arg_vec = T.tensor([[0, 0] for i in range(len(sentence.tokens) + 2)], dtype=T.long)   # long type to feed into embedding layer
        arg_vec = T.tensor([[config.max_seq_len-1, config.max_seq_len-1] for i in range(config.max_seq_len)], dtype=T.long)   # long type to feed into embedding layer

        # position 
        for i in range(sentence["len"]):
            arg_vec[i][0] = sentence["s_position"][i]    # arg_l position, i-1 for [CLS]
            arg_vec[i][1] = sentence["o_position"][i]   # arg_r position, i-1 for [CLS]
        fields["arg_idx"] = ArrayField(arg_vec)

        # information for each sentence length
        fields["sen_len"] = LabelField(sentence["len"], skip_indexing=True)

        # relation
        if self.is_training:
            fields["label"] = LabelField(sentence["relation"], skip_indexing=True)
        
        return Instance(fields)
    
    def _read(self, file_path: str)->Iterator: 
        sen_list = self.reader.data
        for s in sen_list:
            if s["len"] <= config.max_seq_len:
                yield self.text_to_instance(s)

In [7]:
class BERT(Model):
    def __init__(self, word_embeddings: TextFieldEmbedder,
                out_sz: int=len(r_label2idx)):
        super().__init__(vocab)
        self.word_embeddings = word_embeddings
        self._position_embeddings = T.nn.Embedding(num_embeddings=(config.max_seq_len)*2, embedding_dim=config.arg_sz, padding_idx=config.max_seq_len*2 - 1)
        
        # bert output is of dimension 768
        self.lstm = T.nn.LSTM(input_size=768 + 2*config.arg_sz, hidden_size=768, batch_first=True, bidirectional=True)
        self.projection1 = nn.Linear(config.lstm_hidden_sz * 2, config.mlp_hidden_sz)
        self.projection2 = nn.Linear(config.mlp_hidden_sz, out_sz)
        self.loss = nn.CrossEntropyLoss()
        
    def forward(self, tokens: Dict[str, T.tensor], arg_idx: T.tensor, sen_len: T.tensor, label: T.tensor = None) -> Dict[str, T.tensor]:
        
        # Bert as embedding
        embeddings = self.word_embeddings(tokens)
        
        # prepare for concatenate
        arg_idx = arg_idx.type(T.long)
        
        # cut both position and token embedding to precise length
        cut_len = max(i for i in sen_len)
        embeddings = embeddings[:,:cut_len,:]
        arg_idx = arg_idx[:,:cut_len,:]
        
        # mask of the extra part of token embedding after cut from cut_len 
        mask = T.tensor([[[1] if j < sen_len[i] else [0] for j in range(cut_len)] for i in range(len(label))], dtype = T.float, device=config.cuda)
        m_embeddings = T.mul(embeddings, mask)
        
        # cancatenate word and position embedding
        arg_emb = self._position_embeddings(arg_idx)
        arg_cat = T.cat((arg_emb[:,:,0,:], arg_emb[:,:,1,:]), -1)
        concat = T.cat((m_embeddings, arg_cat), -1)

        # BiLSTM layer
        ot, hs = self.lstm(concat)

        # MLP
        mlp_hs = self.projection1(T.cat((ot[:, 0, :config.lstm_hidden_sz], ot[:, -1, config.lstm_hidden_sz:]), -1))
        class_logits = self.projection2(mlp_hs)

        # Cross entropy
        output = {"class_logits": class_logits}
        output["loss"] = self.loss(class_logits, label)

        return output

In [8]:
from scipy.special import expit # the sigmoid function
def tonp(tsr): return tsr.detach().cpu().numpy()

In [9]:
# Predict
class Predictor:
    def __init__(self, model: Model, iterator: DataIterator,
                 cuda_device: int=-1) -> None:
        self.model = model
        self.iterator = iterator
        self.cuda_device = cuda_device
        
    def _extract_data(self, batch) -> np.ndarray:
        out_dict = self.model(**batch)
        return expit(tonp(out_dict["class_logits"]))
    
    def predict(self, ds: Iterable[Instance]) -> np.ndarray:
        pred_generator = self.iterator(ds, num_epochs=1, shuffle=False)
        self.model.eval()
        pred_generator_tqdm = tqdm(pred_generator, total=self.iterator.get_num_batches(ds))
        preds = []
        with T.no_grad():
            for batch in pred_generator_tqdm:
                batch = nn_util.move_to_device(batch, self.cuda_device)
                preds.append(self._extract_data(batch))
        return np.concatenate(preds, axis=0)

In [10]:
def plot_comfusion_matrix(label_classes, predict_classes, out_folder, file_name):
    label_types = list(r_idx2label.values())

    cm = confusion_matrix(label_classes, predict_classes, label_types)
    print(cm)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(cm)
    for (i, j), z in np.ndenumerate(cm):
        ax.text(j, i, '{:0.0f}'.format(z), ha='center', va='center', color='white')
    fig.colorbar(cax)
    ax.set_xticklabels([''] + label_types)
    ax.set_yticklabels([''] + label_types)
    plt.xlabel('Predicted')
    plt.ylabel('True')

    plt.savefig(out_folder + 'confusion_matrix_' + file_name + '.png')
    plt.show()
    
    # remove the data which is none or predict none
    
    pre, recall, f1, sup = prs(label_classes, predict_classes, average='macro')
    
    print("Accuracy:", sum(cm[i][i] for i in range(len(cm))) / len(label_classes))
    print("Precision:", pre)
    print("Recall:", recall)
    print("F1 score:", f1)

In [11]:
def err_analyze(ds, true, pred, opt):
    
    # classify different kinds of error
    detail = [[[] for j in range(len(r_label2idx))] for i in range(len(r_label2idx))]
    for i in range(len(ds)):
         if true[i] != pred[i]:
            detail[r_label2idx[true[i]]][r_label2idx[pred[i]]].append(i)
    
    # print into a csv file
    with open(output_path + "error_detail_" + opt + ".csv", "w", newline='') as f:
        writer = csv.writer(f)
        writer.writerow(["Sentence", "Two_Entity", "Predict", "Label", "idx"])
        for j in range(len(detail)):
            for k in range(len(detail)):
                with_element = 0
                if k == j:
                    continue
                for i in detail[j][k]:
                    with_element = 1
                    ent1 = []
                    ent2 = []
                    for g in range(len(vars(ds[i].fields['arg_idx'])['array'])):
                        if int(vars(ds[i].fields['arg_idx'])['array'][g][0]) == config.max_seq_len or int(vars(ds[i].fields['arg_idx'])['array'][g][1]) == config.max_seq_len:
                            if int(vars(ds[i].fields['arg_idx'])['array'][g][0]) == config.max_seq_len:
                                ent1.append(vars(ds[i].fields['tokens'])['tokens'][g-1])
                            else:
                                ent2.append(vars(ds[i].fields['tokens'])['tokens'][g-1])
    
                    tostr = lambda a: [str(a[i]) for i in range(len(a))] 
                    writer.writerow([" ".join(tostr(vars(ds[i].fields['tokens'])['tokens'])), [ent1, ent2], pred[i], true[i], i])
                if with_element == 1:
                    writer.writerow("")

In [12]:

if __name__ == '__main__':

    tacred_reader = DataLoader(train_path, list(e_label2idx.keys()), list(r_label2idx.keys()), {"lower": True, "len": config.max_seq_len})
    
    token_indexer = PretrainedBertIndexer(
        pretrained_model="bert-base-uncased",
#         do_lowercase=False               # for cased condition
    )
 
	# AllenNLP DatasetReader
    reader = RelationDatasetReader(
        is_training=True, 
        reader=tacred_reader, 
        token_indexers={"tokens": token_indexer}
    )

    train_ds = reader.read(train_path)
    print(len(train_ds))
#     print(type(train_ds[0]))
#     for e in range(20):
#         print(len(vars(train_ds[e].fields['tokens'])['tokens']))
#         print(vars(train_ds[e].fields['tokens']))
#         print(len(vars(train_ds[0].fields['arg_idx'])['array']))
#         print(vars(train_ds[e].fields['arg_idx']))
#         print(vars(train_ds[e].fields['sen_len']))
#         print(vars(train_ds[e].fields['label']))
    
    # user-defined new label
    vocab = Vocabulary()

    iterator = BucketIterator(batch_size=config.batch_size, sorting_keys=[("tokens", "num_tokens")])
    iterator.index_with(vocab)

    bert_embedder = PretrainedBertEmbedder(
        pretrained_model="bert-base-uncased",
        top_layer_only=True, # conserve memory   
    )
    
    word_embeddings: TextFieldEmbedder = BasicTextFieldEmbedder({"tokens": bert_embedder},
                                                                # we'll be ignoring masks so we'll need to set this to True
                                                               allow_unmatched_keys = True)

#     print(token_indexer.tokens_to_indices([Token(text="[unused100]")], vocab, "test"))

    model = BERT(word_embeddings)
    if USE_GPU:
        model.cuda()

    optimizer = optim.Adam(model.parameters(), lr=config.lr)

100%|██████████| 22631/22631 [00:44<00:00, 504.49it/s]
22631it [00:22, 1016.72it/s]


22631


In [13]:
    # training
    from allennlp.training.trainer import Trainer

    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        iterator=iterator,
        train_dataset=train_ds,
        cuda_device=0 if USE_GPU else -1,
        num_epochs=config.epochs,
    )

In [14]:
    # train the model 
    metrics = trainer.train()

  0%|          | 0/114 [00:00<?, ?it/s]

tensor(24, device='cuda:0')


loss: 3.7134 ||:   1%|          | 1/114 [00:10<18:51, 10.01s/it]

tensor([-0.2518,  0.1652, -0.0495, -0.0221,  0.8059,  1.1774,  0.0598,  0.4681,
         0.5558,  0.6295, -0.5866,  0.8782, -0.1160, -0.6415,  0.6031,  0.1350,
        -0.3219,  1.3848,  0.4577,  0.0767,  0.2865,  0.4113, -0.1523,  0.2720,
        -0.0000, -0.0000,  0.0000, -0.0000, -0.0000], device='cuda:0')
tensor(55, device='cuda:0')


loss: 3.6259 ||:   2%|▏         | 2/114 [00:13<15:13,  8.16s/it]

tensor([-0.1307,  0.3477,  0.3502, -0.6668, -0.2149, -0.0994, -0.4518, -0.2232,
        -0.2339, -1.8533, -0.9194,  0.2991,  0.2871,  0.1352,  0.8881,  0.3445,
         0.0874,  0.7331,  0.9836,  0.3046,  0.2187, -0.2822,  0.0938,  0.5817,
        -0.4007,  0.1454, -0.8977,  0.3219,  0.1818,  0.3850, -0.1549, -0.8216,
        -1.2073, -0.0361, -0.0113,  1.0261,  0.9779,  0.2871, -0.0249,  0.3803,
        -0.5879, -0.2668,  0.4545, -0.0635,  0.3209,  0.4456,  0.0312,  0.6545,
        -0.1959,  0.0677, -0.0254,  0.1724,  0.1978,  0.7447, -0.2597,  0.0000,
         0.0000, -0.0000,  0.0000,  0.0000,  0.0000,  0.0000, -0.0000,  0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000], device='cuda:0')
tensor(60, device='cuda:0')


loss: 3.5423 ||:   3%|▎         | 3/114 [00:17<12:42,  6.87s/it]

tensor([-0.5141, -0.1442, -0.4955, -0.3965, -0.2301, -0.1228, -1.0978,  0.1005,
        -1.2142, -1.0719, -0.6482,  0.0142, -0.5522, -0.2994, -0.2505, -0.1149,
        -0.0480,  0.0636, -0.0442,  0.0421, -0.0691, -0.4543,  0.5734, -0.2916,
         0.3190, -1.3754, -1.4429, -1.6216, -0.6903, -0.1770, -1.3821, -0.4393,
        -0.6021, -0.3540, -0.4736, -1.0451, -0.3734, -0.3750, -0.6224,  0.9233,
        -0.0930, -1.1457,  0.3070, -0.2388, -0.2322, -1.7192, -1.5263, -1.6805,
        -0.4019, -0.0357, -0.3158, -0.0632,  0.4816, -0.6415,  0.0544,  0.6335,
        -0.2432,  0.1927,  0.5852,  0.5151, -0.0000, -0.0000, -0.0000,  0.0000,
        -0.0000, -0.0000,  0.0000, -0.0000, -0.0000, -0.0000], device='cuda:0')
tensor(25, device='cuda:0')


loss: 3.4611 ||:   4%|▎         | 4/114 [00:19<09:48,  5.35s/it]

tensor([-0.2496, -0.7045, -0.0302, -0.5849, -0.4721, -0.3607, -0.4444, -0.2626,
         0.4482, -0.6179, -0.7026, -0.5278,  0.1957, -0.3179, -0.5048, -0.2055,
        -0.3155, -0.3622, -0.2223, -0.2271, -0.8942, -0.5530,  1.1373,  0.8656,
         0.7394, -0.0000,  0.0000, -0.0000,  0.0000, -0.0000,  0.0000],
       device='cuda:0')
tensor(94, device='cuda:0')
tensor([-0.3125, -0.4871, -0.1229,  0.2564,  0.4757,  0.6137, -0.4090, -0.7419,
        -0.2765, -0.9448, -1.0754, -0.8632, -0.2144,  0.5882,  0.0046, -0.0704,
        -1.1385, -1.0024, -0.1040, -0.7501,  0.3452,  0.0778,  0.0347, -0.3314,
         0.1355,  0.2006, -0.3420, -0.0262, -0.4987, -0.6225, -1.4834,  1.0123,
         0.1080,  0.2170, -0.9338, -1.3123, -0.2108,  0.0523, -0.1835,  0.6623,
        -1.1437, -0.2493, -1.3436, -0.8286, -0.0700,  0.3581,  0.6237, -0.2984,
         0.2528,  1.0146, -0.2148, -0.7051, -0.1491,  0.2687, -0.9116, -0.2902,
        -0.8578, -0.8997, -0.8352, -0.0322, -1.0025, -0.5851, -0.8390, -0.81

loss: 3.3412 ||:   4%|▍         | 5/114 [00:25<10:12,  5.62s/it]

tensor(165, device='cuda:0')


loss: 3.1949 ||:   5%|▌         | 6/114 [00:28<08:49,  4.90s/it]

tensor([-0.9323, -0.3196, -0.5342, -0.5665, -0.5265, -0.2273,  0.0768, -0.2328,
        -0.1859, -0.6783, -0.4957, -0.7513, -0.5966,  0.2001, -1.0973,  0.1327,
        -0.9166, -0.1748, -0.8423,  0.2387,  0.6047, -0.4461, -0.5364, -0.1230,
         0.1519,  0.8460,  0.1942, -0.5369,  0.1563, -0.6674, -0.6881,  0.0075,
        -0.8417, -0.8072, -1.0045,  0.1428, -0.9778, -0.0189, -0.9643, -0.2484,
        -0.0739, -0.3664, -0.5974,  0.0299,  0.5126,  0.2668, -0.0058,  0.3257,
        -1.5339, -0.5076, -0.2820, -0.5653, -0.3252, -0.8410,  0.0653, -0.7325,
         0.1511, -0.8934,  0.1893,  0.0850, -0.1170, -0.3037,  0.6838,  0.6662,
         0.5313,  0.0805, -0.3026, -0.5717, -1.1850, -0.5607, -0.2371, -0.3950,
         0.3223, -0.4760,  0.2479, -0.6014, -0.2105, -0.8314, -0.0582,  0.9740,
        -0.3156, -0.6545,  0.2467,  0.0642,  0.8363, -0.0804, -0.4161, -0.0798,
        -1.1399, -0.5941,  0.2647, -0.1103,  0.0643, -0.1905,  0.3102, -0.9545,
        -0.2276, -0.9692, -0.3037, -1.04

loss: 3.1477 ||:   6%|▌         | 7/114 [00:29<06:36,  3.70s/it]

tensor([-0.0310, -0.0751, -0.4667,  0.2916, -0.2361,  0.3333, -0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000, -0.0000, -0.0000, -0.0000],
       device='cuda:0')
tensor(30, device='cuda:0')


loss: 3.0352 ||:   7%|▋         | 8/114 [00:31<05:30,  3.12s/it]

tensor([-0.3359, -0.3217,  0.1126, -0.3711,  0.1859, -0.2163, -0.3457,  0.6267,
         0.2330,  0.5270,  0.3430, -0.6960, -0.2712, -0.0364, -1.0072, -0.6879,
        -0.3835, -0.4511, -0.1394,  0.2572,  0.3805, -0.9487, -0.4201, -0.4127,
         0.5325, -0.1915, -0.6382, -0.3342,  0.5488,  0.7224, -0.0000,  0.0000,
         0.0000], device='cuda:0')
tensor(36, device='cuda:0')


loss: 2.9234 ||:   8%|▊         | 9/114 [00:33<04:53,  2.80s/it]

tensor([ 0.0700,  0.4593,  0.2643, -0.5324, -0.7552, -0.2970,  0.4641, -0.4512,
        -1.0232, -0.8316,  0.1828,  0.2178, -0.8394, -0.4122,  0.2679,  0.2674,
         0.2297,  0.2041,  0.2591, -0.0225, -0.5308,  0.2768,  0.4197,  0.7033,
         0.2613,  0.5097,  0.2743,  1.1159,  0.1432, -0.2925,  0.6167, -0.1819,
         0.6083,  0.9653,  0.5242, -0.3136,  0.0000, -0.0000], device='cuda:0')
tensor(63, device='cuda:0')


loss: 2.7888 ||:   9%|▉         | 10/114 [00:38<05:40,  3.27s/it]

tensor([-0.4213,  0.2446,  0.0549,  0.5463,  0.1927, -0.3501, -0.1787,  0.4457,
        -0.5498,  0.2741, -0.8916, -0.0225, -0.3589, -0.3040, -0.4100, -0.7097,
        -1.0770, -0.3005, -0.6235, -0.0331, -1.7928,  0.1683, -0.4478,  0.1075,
        -0.2479,  0.1179,  0.0580, -0.3590, -0.8626,  0.6260, -0.5398, -0.4385,
        -0.0364,  0.0539, -0.1308,  0.0787,  0.2529,  0.7965, -0.4109, -0.9574,
         0.5008,  0.2007,  0.0423,  0.0951, -0.2523, -0.1840,  0.3005, -0.3628,
         0.1330,  0.2093, -0.5027,  0.1386, -0.3299,  0.3039, -0.2748, -1.5860,
        -0.4322,  0.0422, -0.5339,  0.4386, -0.0111, -0.3155,  0.8161, -0.0000,
        -0.0000,  0.0000,  0.0000, -0.0000,  0.0000, -0.0000,  0.0000,  0.0000,
        -0.0000,  0.0000, -0.0000, -0.0000, -0.0000,  0.0000, -0.0000, -0.0000,
        -0.0000,  0.0000, -0.0000,  0.0000,  0.0000], device='cuda:0')
tensor(83, device='cuda:0')


loss: 2.6507 ||:  10%|▉         | 11/114 [00:43<06:37,  3.86s/it]

tensor([-0.3574, -0.1146,  0.3800,  0.2706,  0.2597,  0.5293,  0.1806,  0.3100,
         0.0815, -0.1756,  0.1648, -0.7919,  0.0376,  0.3410,  0.4432, -0.1385,
         0.9564,  0.7353,  0.4635, -0.4891, -0.3872, -0.2282,  0.9700,  0.6403,
        -0.1795, -0.8768,  0.1373, -0.3236,  1.2879,  0.5081, -1.0280, -0.3841,
         0.3703,  0.3085, -0.5501, -0.3474, -0.7982, -0.6542, -0.5712, -0.9659,
        -0.8749, -0.0280, -0.7070,  0.6887, -0.9831, -1.1973, -0.1574, -0.1949,
         0.1044,  0.1619, -0.5665, -0.5109, -0.4327, -0.3599, -1.1482, -0.4340,
        -0.1265, -0.3279, -0.5465, -0.7670, -0.4167,  0.2083, -0.1808,  0.8463,
        -0.6659,  0.0145, -0.3948, -0.1928,  0.2696, -0.0357, -0.1695, -0.0970,
         1.4220,  0.2821, -0.7729, -0.7014, -0.3059, -1.7529, -0.6667,  0.1984,
        -0.6198,  0.4858, -0.5756, -0.0000,  0.0000, -0.0000, -0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000, -0.0000,
         0.0000,  0.0000, -0.0000,  0.00

loss: 2.5627 ||:  11%|█         | 12/114 [00:44<05:09,  3.04s/it]

tensor([-0.2273,  0.4772,  0.1011, -0.4058,  0.1767,  0.0621, -0.0835, -0.2085,
         0.3378, -0.4734, -0.5774,  0.0409,  0.0193,  0.4055,  0.7745,  0.0000,
         0.0000, -0.0000], device='cuda:0')
tensor(32, device='cuda:0')


loss: 2.4845 ||:  11%|█▏        | 13/114 [00:46<04:39,  2.76s/it]

tensor([-0.8185, -0.4685, -0.2072,  0.2772, -0.5667, -0.2545, -0.2489,  0.2819,
        -0.4806, -0.4087,  0.1040, -0.2534, -0.3985,  0.5142, -0.9180, -0.1284,
        -0.4828, -0.2807, -0.5525, -0.1787,  0.3937, -0.5594, -0.7498, -0.5358,
         0.1392, -1.1682, -1.3527, -0.9307, -0.9250, -0.8353,  0.6762, -1.7179,
         0.0000, -0.0000,  0.0000, -0.0000,  0.0000], device='cuda:0')
tensor(23, device='cuda:0')


loss: 2.4260 ||:  12%|█▏        | 14/114 [00:48<04:00,  2.41s/it]

tensor([-0.1187, -0.1039,  0.0781,  0.0084, -0.3398, -0.1888, -0.2244,  1.1037,
         0.5777, -0.1922, -0.0630,  0.1162, -0.5892,  0.7965,  0.3522, -0.7679,
        -0.0222, -0.2010,  0.0990,  0.2949,  0.7500,  0.6433, -0.2783, -0.0000,
        -0.0000,  0.0000, -0.0000], device='cuda:0')
tensor(31, device='cuda:0')


loss: 2.3750 ||:  13%|█▎        | 15/114 [00:50<03:56,  2.38s/it]

tensor([-0.1090,  0.2152,  0.5615, -0.2302, -0.3604,  0.0236, -0.3075,  0.1941,
        -0.7865, -0.5035, -0.2051,  0.0229,  0.1189, -1.2423,  0.1143, -0.2499,
        -0.5310, -0.5313, -0.0356, -0.3292, -0.4201,  0.6357,  0.3388, -0.4779,
        -0.1703,  0.1807,  0.0194, -0.2068,  0.9573,  0.0624,  0.5778,  0.0000,
         0.0000,  0.0000,  0.0000, -0.0000, -0.0000, -0.0000,  0.0000, -0.0000],
       device='cuda:0')
tensor(26, device='cuda:0')


loss: 2.3393 ||:  14%|█▍        | 16/114 [00:52<03:30,  2.15s/it]

tensor([-0.6088, -0.2837,  0.3933, -0.3604, -0.7864, -0.3923, -0.1653,  0.4835,
        -1.1665, -0.2300,  0.1509, -0.4627, -0.7459,  0.1423,  0.3867,  0.4191,
         0.1266, -0.7217, -1.0786, -0.3601, -0.1292, -0.3538, -0.2342,  0.4814,
        -0.1512, -0.8600, -0.0000, -0.0000], device='cuda:0')
tensor(36, device='cuda:0')


loss: 2.2975 ||:  15%|█▍        | 17/114 [00:54<03:34,  2.21s/it]

tensor([-0.6495, -0.5946, -0.8965, -0.4933,  0.0267,  0.1737, -0.4027, -0.2071,
        -0.4631, -0.2877, -0.9817,  0.3170, -0.7423,  0.2342, -0.9032, -0.3429,
        -0.9705, -0.8604, -0.2179, -1.1329, -0.4148, -0.0484, -0.4757, -0.1323,
         0.2999, -0.3851, -1.1030, -0.9185,  0.5467, -0.1474, -1.6250, -1.0385,
        -0.8108, -0.1230, -0.4461,  0.1322, -0.0000,  0.0000, -0.0000,  0.0000,
        -0.0000, -0.0000], device='cuda:0')
tensor(47, device='cuda:0')


loss: 2.2517 ||:  16%|█▌        | 18/114 [00:57<04:04,  2.54s/it]

tensor([-0.5827, -0.4186, -0.2956,  0.1075, -0.6159, -0.3959, -0.0357, -0.5568,
        -0.4122, -0.2484, -0.0054,  0.1201, -0.8858, -0.5753, -0.5149,  0.6986,
         0.2031,  0.4732, -0.5296, -0.0974, -0.9514,  0.7220,  0.6454, -0.4417,
        -0.4842, -0.1234, -0.2991, -0.4472, -1.0917, -0.1548, -0.8399,  0.2515,
        -0.1561, -0.4688, -0.3038, -0.3245,  0.1892, -1.6274, -0.0057, -0.4611,
        -0.1545, -0.7867, -0.5439, -0.2543, -0.1329,  0.7151, -0.5998, -0.0000,
        -0.0000, -0.0000,  0.0000, -0.0000,  0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000], device='cuda:0')
tensor(38, device='cuda:0')


loss: 2.2362 ||:  17%|█▋        | 19/114 [01:00<04:08,  2.62s/it]

tensor([-0.3013,  0.1426, -0.1259, -0.4659, -0.1140, -0.2688,  0.1240, -0.1788,
        -0.5233, -0.0377,  0.7277, -0.1774, -0.2670, -0.2092, -0.2670, -0.4333,
        -0.3804, -0.2511, -0.2283, -0.0778, -0.3394,  0.1344,  0.5822,  0.0134,
         0.4541, -0.2952,  0.0769, -0.4665, -1.0273, -0.7126, -0.6957, -0.6260,
        -0.4612, -1.0670, -0.7815, -0.5746,  0.4065, -0.5182, -0.0000, -0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000, -0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0')
tensor(47, device='cuda:0')


loss: 2.2074 ||:  18%|█▊        | 20/114 [01:03<04:13,  2.69s/it]

tensor([-0.1018,  0.5282,  0.0360,  0.3835, -1.1442, -0.5932,  0.0655,  0.5135,
        -0.2532, -0.1048,  0.1912,  0.0045,  0.3458,  0.1485, -0.5240,  0.2343,
         0.1477,  0.5180,  0.1958, -0.2776, -0.1453,  0.1132, -0.2580,  0.0239,
         0.3147, -0.8758,  0.4298, -0.5199, -0.1262, -0.0982, -0.2127,  0.3860,
         0.2583, -0.0385,  0.0942, -0.8339, -0.4594,  0.4205,  0.1300, -0.7209,
        -1.1790, -1.3409, -0.5847,  0.8856,  0.1363, -0.4513,  0.7268, -0.0000,
         0.0000,  0.0000,  0.0000, -0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0')
tensor(17, device='cuda:0')


loss: 2.1760 ||:  18%|█▊        | 21/114 [01:04<03:29,  2.25s/it]

tensor([-0.4828, -0.1164,  0.2224, -0.5488, -0.4464, -0.2182,  0.3008, -0.3151,
         0.1787,  0.7081, -0.4212, -0.3347, -0.4419, -0.0604,  0.2041,  0.3055,
        -0.0054,  0.0000,  0.0000, -0.0000, -0.0000, -0.0000], device='cuda:0')
tensor(32, device='cuda:0')


loss: 2.1470 ||:  19%|█▉        | 22/114 [01:06<03:26,  2.25s/it]

tensor([-0.2959, -0.1040,  0.1184, -0.5399, -0.3465,  0.3653,  0.1057,  0.2416,
        -0.7254,  0.0901,  0.0569,  0.3261, -0.3283, -0.8882, -0.0678,  0.0995,
         0.1137,  0.2204, -0.7331,  0.0306, -0.0447, -0.1761,  0.4975, -0.4272,
        -0.2197,  0.0993, -0.2971,  0.0742, -0.5724, -0.3134,  0.7885, -0.3189,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,  0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000], device='cuda:0')
tensor(40, device='cuda:0')


loss: 2.1091 ||:  20%|██        | 23/114 [01:09<03:44,  2.46s/it]

tensor([-5.9056e-01,  4.9683e-01,  7.1626e-01, -1.8289e-01, -7.4251e-02,
        -1.4139e+00,  1.6014e-01, -1.2291e+00, -1.9129e-01,  4.0380e-01,
         4.5130e-01, -6.1619e-01, -5.5615e-01,  1.9865e-01, -9.2796e-01,
         3.7267e-01, -1.0672e+00, -9.9648e-01,  6.2264e-01,  2.7975e-01,
        -4.4868e-01, -5.9699e-01,  1.3530e-02, -1.2567e+00, -9.5470e-01,
        -9.8624e-01, -7.5276e-02,  6.4674e-02, -7.1862e-05,  6.8381e-02,
        -2.4261e-01,  1.2804e-01, -3.5291e-01,  9.1536e-02, -2.4293e-01,
        -1.5386e-01,  2.2278e-01,  2.9770e-01, -2.2988e-01,  1.7030e-01,
         0.0000e+00,  0.0000e+00, -0.0000e+00,  0.0000e+00,  0.0000e+00,
        -0.0000e+00,  0.0000e+00, -0.0000e+00,  0.0000e+00, -0.0000e+00,
        -0.0000e+00, -0.0000e+00, -0.0000e+00, -0.0000e+00, -0.0000e+00],
       device='cuda:0')
tensor(67, device='cuda:0')


loss: 2.0802 ||:  21%|██        | 24/114 [01:14<04:38,  3.10s/it]

tensor([-0.3752,  0.2348, -0.2214,  0.5285,  0.7529, -0.5713,  0.1720, -0.0688,
        -0.2315, -0.1011, -1.3751, -0.1801, -0.5515, -0.3268, -0.9099, -1.0234,
        -0.2033, -0.0620,  0.3569, -0.2109, -0.1290,  0.0503, -0.5997, -0.1492,
        -0.0138,  0.7186, -0.0550,  0.2298,  0.3692, -0.9238, -0.1801, -0.2178,
         0.1637,  0.2056, -0.1512, -0.2972, -0.3882, -0.2018, -0.2126, -0.2460,
         0.6640, -0.9931, -0.7504, -0.5517, -0.4823, -0.8655,  0.0893,  0.4196,
        -0.1761, -0.4303, -0.5311, -0.1817, -0.5809, -0.2736, -0.4974, -0.3323,
        -0.1678, -0.9626, -0.1337, -0.4503,  0.3740,  0.7161, -0.1206, -1.0261,
        -0.7538, -0.7946,  0.3036, -0.0000, -0.0000, -0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000], device='cuda:0')
tensor(60, device='cuda:0')


loss: 2.0538 ||:  22%|██▏       | 25/114 [01:18<04:49,  3.26s/it]

tensor([-0.3727,  0.0238, -0.4031, -0.3669, -0.2611, -0.1235, -0.5400,  0.3754,
        -0.5285, -1.0664, -0.0659,  0.3450, -0.3056, -0.2558, -0.5310, -0.9730,
        -0.4710, -0.3891, -0.9537, -0.9231, -0.5030,  0.0388,  0.2558, -0.3295,
        -0.5539,  0.4796, -1.1675, -0.2186,  0.2746, -0.5447, -1.1008, -0.5850,
        -1.0412, -1.1223,  0.3115, -0.0717,  0.6108, -0.3918,  0.7965, -0.5900,
        -0.6123, -0.7932,  0.0574, -0.0054, -0.5268, -0.4189, -0.2576, -0.6752,
         0.2362, -0.3860, -0.6212,  0.2131, -0.3184, -0.5209, -0.0695, -0.6485,
        -0.2603,  0.1662,  0.8775,  0.8537, -0.0000,  0.0000, -0.0000,  0.0000,
        -0.0000, -0.0000], device='cuda:0')
tensor(34, device='cuda:0')


KeyboardInterrupt: 

In [None]:
    # load model
#     model.load_state_dict(T.load(model_folder + "model.th"))

In [None]:
    # save 
    with open(model_folder+'model.th', 'wb') as f:
        T.save(model.state_dict(), f)

In [None]:
    # training data analysis
    seq_iterator = BasicIterator(batch_size=config.batch_size)
    seq_iterator.index_with(vocab)
    
    predictor = Predictor(model, seq_iterator, cuda_device=0 if USE_GPU else -1)
    train_preds = predictor.predict(train_ds) 
    
    label_types = [r_idx2label.get(i.fields['label'].label) for i in train_ds]
    predict_types = [r_idx2label.get(i) for i in np.argmax(train_preds, axis=-1)]
    err_analyze(train_ds, label_types, predict_types, "train")

In [None]:
    plot_comfusion_matrix(label_types, predict_types, output_path, "train_full")

In [None]:
    # testing data analysis
    tacred_reader = DataLoader(test_path, list(e_label2idx.keys()), list(r_label2idx.keys()), {"lower": True, "len": config.max_seq_len})
    
    # AllenNLP DatasetReader
    reader = RelationDatasetReader(
        is_training=True, 
        reader=tacred_reader, 
        token_indexers={"tokens": token_indexer}
    )
    
    test_ds = reader.read(test_path)
    print(len(test_ds))
    seq_iterator = BasicIterator(batch_size=config.batch_size)
    seq_iterator.index_with(vocab)
    
    predictor = Predictor(model, seq_iterator, cuda_device=0 if USE_GPU else -1)
    test_preds = predictor.predict(test_ds) 
    
    label_types = [r_idx2label.get(i.fields['label'].label) for i in test_ds]
    predict_types = [r_idx2label.get(i) for i in np.argmax(test_preds, axis=-1)]  
    
    err_analyze(test_ds, label_types, predict_types, "test")

In [None]:
    plot_comfusion_matrix(label_types, predict_types, output_path, "test_full")