In [None]:
!pip install conllu
!wget https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11234/1-3105/ud-treebanks-v2.5.tgz
!wget https://www.ida.liu.se/~TDDE09/commons/projectivize.py
!tar zxf ud-treebanks-v2.5.tgz

import os
import torch
import projectivize
import torch.nn as nn
from io import open
from conllu import parse_incr
import copy
import operator
import torch.nn.functional as F
import torch.optim as optim
import tqdm
import matplotlib.pyplot as plt
import numpy as np
import subprocess

Collecting conllu
  Downloading https://files.pythonhosted.org/packages/ae/be/be6959c3ff2dbfdd87de4be0ccdff577835b5d08b1d25bf7fd4aaf0d7add/conllu-4.4-py2.py3-none-any.whl
Installing collected packages: conllu
Successfully installed conllu-4.4
--2021-03-10 14:36:02--  https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11234/1-3105/ud-treebanks-v2.5.tgz
Resolving lindat.mff.cuni.cz (lindat.mff.cuni.cz)... 195.113.20.140
Connecting to lindat.mff.cuni.cz (lindat.mff.cuni.cz)|195.113.20.140|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 355216681 (339M) [application/x-gzip]
Saving to: ‘ud-treebanks-v2.5.tgz’


2021-03-10 14:36:18 (23.2 MB/s) - ‘ud-treebanks-v2.5.tgz’ saved [355216681/355216681]

--2021-03-10 14:36:19--  https://www.ida.liu.se/~TDDE09/commons/projectivize.py
Resolving www.ida.liu.se (www.ida.liu.se)... 130.236.57.103, 2001:6b0:17:2004::57:103
Connecting to www.ida.liu.se (www.ida.liu.se)|130.236.57.103|:443... connected.
HTTP request sent

In [None]:
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 print(device)

cpu


In [None]:
#polish train data = UD_Polish-PDB/pl_pdb-ud-train.conllu
#english train data = UD_English-GUM/en_gum-ud-train.conllu

#arabic train data = UD_Arabic-PADT/ar_padt-ud-train.conllu
#arabic dev data = UD_Arabic-PADT/ar_padt-ud-dev.conllu

def datafiles(filename):
  data_file = open(filename, "r", encoding="utf-8")
  ud_files = []
  for tokenlist in parse_incr(data_file):
    ud_files.append(tokenlist)
  data = []
  for sentence in ud_files:
    current_sentence = []
    for token in sentence:
      current_token = []
      current_token.append(token['form'])
      current_token.append(token['upostag'])
      current_sentence.append(tuple(current_token))
    data.append(current_sentence)
  return data

#Arabic
arabic_train_data = datafiles("ud-treebanks-v2.5/UD_Arabic-PADT/ar_padt-ud-train.conllu")
arabic_dev_data = datafiles("ud-treebanks-v2.5/UD_Arabic-PADT/ar_padt-ud-dev.conllu")
#English
english_train_data = datafiles("ud-treebanks-v2.5/UD_English-GUM/en_gum-ud-train.conllu")
english_dev_data = datafiles("ud-treebanks-v2.5/UD_English-GUM/en_gum-ud-dev.conllu")

In [None]:
print("Data Examples")
print(english_train_data[0])
print(arabic_train_data[0])
#We are not really using this dataset so move on haha

Data Examples
[('Aesthetic', 'ADJ'), ('Appreciation', 'NOUN'), ('and', 'CCONJ'), ('Spanish', 'ADJ'), ('Art', 'NOUN'), (':', 'PUNCT')]
[('برلين', 'X'), ('ترفض', 'VERB'), ('حصول', 'NOUN'), ('شركة', 'NOUN'), ('اميركية', 'ADJ'), ('على', 'ADP'), ('رخصة', 'NOUN'), ('تصنيع', 'NOUN'), ('دبابة', 'NOUN'), ('"', 'PUNCT'), ('ليوبارد', 'X'), ('"', 'PUNCT'), ('الالمانية', 'ADJ')]


In [None]:
#Dataset Class
class Dataset():

    ROOT = ('<root>', '<root>', 0)  # Pseudo-root

    def __init__(self, filename):
        self.filename = filename

    def __iter__(self):
        with open(self.filename, 'rt', encoding='utf-8') as lines:
            tmp = [Dataset.ROOT]
            for line in lines:
                if not line.startswith('#'):  # Skip lines with comments
                    line = line.rstrip()
                    if line:
                        columns = line.split('\t')
                        if columns[0].isdigit():  # Skip range tokens
                            tmp.append((columns[1], columns[3], int(columns[6])))
                    else:
                        yield tmp
                        tmp = [Dataset.ROOT]

In [None]:
dep_train_english_data = Dataset("ud-treebanks-v2.5/UD_English-GUM/en_gum-ud-train.conllu")
dep_dev_english_data = Dataset("ud-treebanks-v2.5/UD_English-GUM/en_gum-ud-dev.conllu")
#Example sentence list
print(list(dep_train_english_data)[0])

[('<root>', '<root>', 0), ('Aesthetic', 'ADJ', 2), ('Appreciation', 'NOUN', 0), ('and', 'CCONJ', 5), ('Spanish', 'ADJ', 5), ('Art', 'NOUN', 2), (':', 'PUNCT', 2)]


In [None]:
PAD = '<pad>'
UNK = '<unk>'
def make_vocabs_head_tagger(gold_data):
    words = {PAD:0, UNK:1}
    tags =  {PAD:0}

    word_i = 2
    tag_i = 1

    for sentence in gold_data:
      for word,tag, _ in sentence:
        if tag not in tags:
          tags[tag] = tag_i
          tag_i +=1
        if word not in words:
          words[word] = word_i
          word_i +=1

    return words, tags

In [None]:
word_vocab, tag_vocab = make_vocabs_head_tagger(dep_train_english_data)

In [None]:
#Arc Parser and oracle move implementation
class Parser(object):

    def predict(self, words, tags):
        raise NotImplementedError

class ArcStandardParser(Parser):

    MOVES = tuple(range(3))

    SH, LA, RA = MOVES  # Parser moves are specified as integers.

    @staticmethod
    def initial_config(num_words):
        # Replaced the next line with our own code
        i = 0
        stack = []
        heads = [0]*num_words
        return (i,stack,heads)

    @staticmethod
    def valid_moves(config):
        # Replaced the next line with our own code
        (SH, LA, RA) = ArcStandardParser.MOVES
        (i, stack, heads) = config[:]
        valid_moves = []
        if i < len(heads):
            valid_moves.append(SH)
        if len(stack) > 1:
            valid_moves.append(LA)
            valid_moves.append(RA)
        return valid_moves

    @staticmethod
    def next_config(config, move):
        # Replaced the next line with our own code
        (SH, LA, RA) = ArcStandardParser.MOVES
        i = config[0]
        stack = config[1][:]
        heads = config[2][:]
        if move == SH:
            stack.append(i)
            i += 1
        elif move == LA:
            head = stack[-1]
            word = stack.pop(-2)
            heads[word] = head
        elif move == RA:
            head = stack[-2]
            word = stack.pop(-1)
            heads[word] = head
        return (i,stack,heads)

    @staticmethod
    def is_final_config(config):
        # Replaced the next line with our own code
        return len(ArcStandardParser.valid_moves(config)) < 1

#new oracle 2
def has_no_remaining_arcs(word, config, gold_heads):
    return config[2].count(word) == gold_heads.count(word)

def oracle_moves(gold_heads):
    parser = ArcStandardParser()
    (SH, LA, RA) = parser.MOVES
    config = parser.initial_config(len(gold_heads))
    oracle_moves = []
    while not parser.is_final_config(config):
        valid_moves = parser.valid_moves(config)
        if LA in valid_moves: #LA condition is the same as RA
            i_la, stack_la, heads_la = parser.next_config(config, LA) #i_la, stack_la, heads_la=config_LA
            i_ra, stack_ra, heads_ra = parser.next_config(config, RA) #i_ra, stack_ra, heads_ra=config_RA
            secmost = config[1][-2]
            topmost = config[1][-1]
            if(gold_heads[secmost] == heads_la[secmost] and has_no_remaining_arcs(secmost, config, gold_heads)):
            #if(gold_heads[secmost] == heads_la[secmost]):   #This line gets same results
                yield (config, LA)
                config = (i_la, stack_la, heads_la)
                continue

            elif(gold_heads[topmost] == heads_ra[topmost] and has_no_remaining_arcs(topmost, config, gold_heads)):
                yield (config, RA)
                config = (i_ra, stack_ra, heads_ra)
                continue

        if SH in valid_moves: #Same as using else instead os if sh...
            yield (config, SH)
            config = parser.next_config(config, SH)

        else: #?
            print("?")
            break

In [None]:
#biLSTM concatenate words and tags (xi=e(wi)◦e(pi)) so there is no need to add ps2,1,0 here
def get_feats(config, word_ids):
  b, s, h = config

  vs2, vs1, vs0 = [0,0,0]
  b0 = 0
  if(b!=0):
    b0 = b - 1
  if(len(s) == 1):
    vs0 = max(s[-1] - 1 , 0) #if s[-1] == 0 (-1) and s[-1] == 1 (0) => vs0 = 0 ('<root>')

  elif(len(s) == 2):
    vs0 = max(s[-1] - 1 , 0)
    vs1 = max(s[-2] - 1 , 0)

  elif(len(s) >= 3):
    vs0 = max(s[-1] - 1 , 0)
    vs1 = max(s[-2] - 1 , 0)
    vs2 = max(s[-3] - 1 , 0)

  feats_from_config = [vs2, vs1, vs0, b0] #Index of vs2, vs1, vs0, bs0 from config
  feats_torch = torch.LongTensor(feats_from_config) #torch.Size([1, 4])
  return torch.LongTensor(feats_torch)

In [None]:
def training_example(vocab_words, vocab_tags, gold_data):
    for sentence in gold_data: #[('<root>', '<root>', 0), ('Aesthetic', 'ADJ', 2), ('Appreciation', 'NOUN', 0), ('and', 'CCONJ', 5), ('Spanish', 'ADJ', 5), ('Art', 'NOUN', 2), (':', 'PUNCT', 2)]
        word_ids, tag_ids, gold_heads = [], [], []
        moves = []
        idxs = []
        #create id-lists for the current sentence
        for tup in (sentence):
            word_ids.append(vocab_words[tup[0]]) # Ids of ['<root>', 'Aesthetic'... ]
            tag_ids.append(vocab_tags[tup[1]]) # Ids of ['<root>', 'ADJ'... ]
            gold_heads.append(tup[2]) #[0, 2, ...]

        for config, move in oracle_moves(gold_heads): #for each sentence we get oracle moves
            idx_feats = get_feats(config, word_ids) #long_tensor([vs0, vs1, vs2, b0]))
            idxs.append(idx_feats)
            moves.append(move)

        yield torch.LongTensor(word_ids).unsqueeze(0), torch.LongTensor(tag_ids).unsqueeze(0), torch.stack(idxs), torch.LongTensor(moves)

In [None]:
#Example of one sentence in training example function
w_ids, t_ids, f_idx, mov = next(training_example(word_vocab, tag_vocab, dep_train_english_data))
print("word ids: ",w_ids)
print("tag ids: ",t_ids)
print("mlp features indexs: " ,f_idx.shape)
print("force trinning moves: ", mov.shape)

word ids:  tensor([[2, 3, 4, 5, 6, 7, 8]])
tag ids:  tensor([[1, 2, 3, 4, 2, 3, 5]])
mlp features indexs:  torch.Size([13, 4])
force trinning moves:  torch.Size([13])


In [None]:
#"Encoder" Implementation
class BiLSTM(nn.Module):
    def __init__(self, num_words, num_pos, word_emb_dim = 100, pos_emb_dim=25, hidden_dim = 125, n_layers=2):
        super().__init__()
        # for the BiLSTM
        self.ew = nn.Embedding(num_words, word_emb_dim)
        self.ep = nn.Embedding(num_pos, pos_emb_dim)
        self.bi_lstm = nn.LSTM(word_emb_dim + pos_emb_dim, hidden_dim, num_layers = n_layers, batch_first=True, bidirectional = True)

    def forward(self, word, pos):
        ew = self.ew(word)            # Shape: [Batch_size, n_words, word_emb_dim]
        ep = self.ep(pos)             # Shape: [Batch_size, n_words, pos_emb_dim]
        x = torch.cat((ew, ep), 2)    # Shape: [Batch_size, n_words, word_emb_dim + pos_emb_dim]
        output, _ = self.bi_lstm(x)   # Shape: [Batch_size, n_words, 2*(word_emb_dim + pos_emb_dim)]

        return output

In [None]:
#w_ids, t_ids, g_heads sentence example
model_lstm = BiLSTM(len(word_vocab), len(tag_vocab))
test_output = model_lstm.forward(w_ids, t_ids)
print("Shape BiLSTM feats: ",test_output.shape)

Shape BiLSTM feats:  torch.Size([1, 7, 250])


In [None]:
class MLP(nn.Module):
    def __init__(self, word_emb_dim = 100, pos_emb_dim=25, hidden_unit = 100, output_dim = 3):
        super().__init__()
        self.hidden = nn.Linear(4*2*(word_emb_dim + pos_emb_dim), hidden_unit)
        self.relu = nn.ReLU() # try relu
        self.output = nn.Linear(hidden_unit, output_dim)

    def forward(self, features):
        hn = self.hidden(features)
        out = self.output(self.relu(hn))

        return out

In [None]:
#Example of MLP
#feats_from_config is a list of list of index bc each list represent a sentence, in this case batch_size = 1 so [[ words ]]
#this is for no batch (we are using this one)
print("Example for no batch | biLSTM output shape: ", test_output.shape)
feats_from_config = [0, 3, 4, 5] #Index of vs2, vs1, vs0, bs0 from config
feats_torch = torch.LongTensor(feats_from_config) #torch.Size([1, 4])
test_input_mlp = test_output.index_select(1, feats_torch)
print("Shape input for mlp: " ,test_input_mlp.shape)
test_input_mlp = test_input_mlp.view(-1,test_input_mlp.size(1)*test_input_mlp.size(2))
print("Concat embeds shape: ", test_input_mlp.shape)

print("#################################")

#Batch size > 1 case
test_output2 = torch.cat((test_output,test_output), 0)
print("Example for batch feats | biLSTM output shape: ", test_output2.shape)
feats_from_config2 = [[0, 3, 4, 5], [0, 3, 4, 5]]
feats_torch2 = torch.LongTensor(feats_from_config2)
indexs = feats_torch2.unsqueeze(2).expand(feats_torch2.size(0), feats_torch2.size(1), test_output2.size(2))
test_input_mlp2 = torch.gather(test_output2, 1, indexs)
print("Shape input for mlp: " ,test_input_mlp2.shape)
test_input_mlp2 = test_input_mlp2.view(-1,test_input_mlp2.size(1)*test_input_mlp2.size(2))
print("Concat embeds shape: ",test_input_mlp2.shape)

print("################################")
#Case 3 (list of list of index for one sentence (batch=1))
#Every list of index represent the setting for a move, in this case 2
test_output3 = test_output
print("Example for no batch | biLSTM output shape: ", test_output3.shape)
feats_from_config3 = [[0, 3, 4, 5], [0, 3, 4, 5]]
feats_torch3 = torch.LongTensor(feats_from_config3) #torch.Size([2, 4])
test_input_mlp3 = [test_output3.index_select(1, idxs) for idxs in feats_torch3]
test_input_mlp3 = torch.stack(test_input_mlp3)
test_input_mlp3 = test_input_mlp3.squeeze(1)
print("Shape input for mlp: " ,test_input_mlp3.shape)
test_input_mlp3 = test_input_mlp3.view(-1,test_input_mlp3.size(1)*test_input_mlp3.size(2))
print("Concat embeds shape: ", test_input_mlp3.shape)

Example for no batch | biLSTM output shape:  torch.Size([1, 7, 250])
Shape input for mlp:  torch.Size([1, 4, 250])
Concat embeds shape:  torch.Size([1, 1000])
#################################
Example for batch feats | biLSTM output shape:  torch.Size([2, 7, 250])
Shape input for mlp:  torch.Size([2, 4, 250])
Concat embeds shape:  torch.Size([2, 1000])
################################
Example for no batch | biLSTM output shape:  torch.Size([1, 7, 250])
Shape input for mlp:  torch.Size([2, 4, 250])
Concat embeds shape:  torch.Size([2, 1000])


In [None]:
# example data features = test_input_mlp2
model_MLP = MLP()
test_output_mlp = model_MLP.forward(test_input_mlp)
print("Shape BiLSTM feats: ",test_output_mlp.shape)
test_output_mlp2 = model_MLP.forward(test_input_mlp2)
print("Shape BiLSTM feats: ",test_output_mlp2.shape)

Shape BiLSTM feats:  torch.Size([1, 3])
Shape BiLSTM feats:  torch.Size([2, 3])


In [None]:
class Blackbox(nn.Module): #need to change name lol haha
    def __init__(self, vocab_words, vocab_tags, num_words, num_pos):
        super().__init__()

        self.model_lstm = BiLSTM(num_words, num_pos)
        self.model_MLP = MLP()
        self.vocab_words = vocab_words
        self.vocab_tags = vocab_tags

    def forward(self, word_ids, pos_ids, idxs_feats):
        lstm_output = self.model_lstm.forward(word_ids, pos_ids) #biLSTM output shape:  torch.Size([1, 7, 250])
        #input_mlp = lstm_output.index_select(1, idxs_feats) #Shape input for mlp:  torch.Size([1, 4, 250])
        input_mlp = [lstm_output.index_select(1, idxs) for idxs in idxs_feats] #len(n) = moves
        input_mlp = torch.stack(input_mlp) #to tensor
        input_mlp = input_mlp.squeeze(1) #shape tesnro ([n, 4, 250]) where n: moves
        input_mlp = input_mlp.view(-1,input_mlp.size(1)*input_mlp.size(2)) #Concat embeds shape:  torch.Size([n, 1000])
        output_mlp = self.model_MLP.forward(input_mlp) #torch.Size([n, 3])

        return output_mlp

    def predict(self, words, tags):
        for idx, (w, t) in enumerate(zip(words,tags)): #words string to words ids
            if w in self.vocab_words:
                words[idx] = self.vocab_words[w]
            else:
                words[idx] = self.vocab_words['<unk>']
            if t in self.vocab_tags:
                tags[idx] = self.vocab_tags[t]
            else:
                tags[idx] = self.vocab_tags['<pad>']

        config = ArcStandardParser.initial_config(len(words))
        while not ArcStandardParser.is_final_config(config):
            idx_feats = get_feats(config, words) #take features #idx_feats = get_feats(config, word_ids) #long_tensor([vs0, vs1, vs2, b0]))
            valid_moves = ArcStandardParser.valid_moves(config)

            with torch.no_grad():
                output_move = torch.argmax(self.forward(words, tags, idx_feats)) #output_mlp = self.forward(words, tags, idx_feats)
            if(output_move in valid_moves):
                config = parser.next_config(config, output_move)
            else:
                config = parser.next_config(config, valid_moves[-1])

        return config[2]

In [None]:
import torch
import torch.nn.functional as F
import torch.optim as optim
import tqdm
torch.autograd.set_detect_anomaly(True)

def train_model(train_data, n_epochs = 1, batch_size = 0, lr=1e-2):
    word_vocab, tag_vocab = make_vocabs_head_tagger(train_data)
    model = Blackbox(word_vocab, tag_vocab, len(word_vocab), len(tag_vocab)) #  model_lstm = BiLSTM(len(word_vocab), len(tag_vocab)) |  model_MLP = MLP()
    #model.to(device)
    # Initialize the optimizer
    optimizer = optim.Adam(model.parameters(), lr=lr)

    train_losses = []
    dev_losses = []
    dev_accuracies = []
    info = {'dev loss': 0, 'dev acc': 0}
    x = 0
    with tqdm.tqdm(total=3753) as pbar: #len(dep_train_english_data) = 3753
        for t in range(n_epochs):
            model.train()
            # start training
            running_loss = 0
            t_data = training_example(word_vocab, tag_vocab, train_data)

            for w_ids, t_ids, f_idx, move in t_data: #This represet a sentence w_ids, t_ids, f_idx, mov
                pbar.set_description(f'Sentence {x+1}')
                optimizer.zero_grad()
                output_mlp = model.forward(w_ids, t_ids, f_idx)
                loss = F.cross_entropy(output_mlp, move)
                loss.backward() #retain_graph=True
                optimizer.step()

                if(x%250 == 0):
                    print("Loss: "+str(loss.item()))

                x += 1
                pbar.update(1)
                running_loss += loss.item()

                train_losses.append(running_loss / x)

                #if(x == 500): #Train only 500 sentences
                #  break

    return model

In [None]:
#model_BB = train_model(dep_train_english_data) #UAS: 0.6553


Sentence 3:   0%|          | 2/3753 [00:00<10:33,  5.92it/s]

Loss: 1.104910135269165


Sentence 7:   0%|          | 6/3753 [00:01<13:53,  4.49it/s]

?


Sentence 8:   0%|          | 7/3753 [00:01<14:29,  4.31it/s]

?


Sentence 14:   0%|          | 13/3753 [00:04<26:13,  2.38it/s]

?


Sentence 23:   1%|          | 22/3753 [00:09<37:16,  1.67it/s]

?


Sentence 27:   1%|          | 26/3753 [00:12<40:55,  1.52it/s]

?


Sentence 30:   1%|          | 29/3753 [00:15<46:57,  1.32it/s]

?


Sentence 32:   1%|          | 31/3753 [00:16<43:10,  1.44it/s]

?


Sentence 33:   1%|          | 32/3753 [00:17<50:12,  1.24it/s]

?


Sentence 34:   1%|          | 33/3753 [00:18<46:20,  1.34it/s]

?


Sentence 40:   1%|          | 39/3753 [00:22<39:22,  1.57it/s]

?


Sentence 41:   1%|          | 40/3753 [00:23<41:19,  1.50it/s]

?


Sentence 45:   1%|          | 44/3753 [00:25<40:01,  1.54it/s]

?


Sentence 87:   2%|▏         | 86/3753 [00:50<35:02,  1.74it/s]

?


Sentence 89:   2%|▏         | 88/3753 [00:51<33:12,  1.84it/s]

?


Sentence 94:   2%|▏         | 93/3753 [00:54<36:23,  1.68it/s]

?


Sentence 108:   3%|▎         | 107/3753 [01:00<29:22,  2.07it/s]

?


Sentence 129:   3%|▎         | 128/3753 [01:15<34:56,  1.73it/s]

?


Sentence 147:   4%|▍         | 146/3753 [01:25<34:28,  1.74it/s]

?


Sentence 166:   4%|▍         | 165/3753 [01:35<26:15,  2.28it/s]

?


Sentence 175:   5%|▍         | 174/3753 [01:38<22:34,  2.64it/s]

?


Sentence 202:   5%|▌         | 201/3753 [01:50<29:54,  1.98it/s]

?


Sentence 212:   6%|▌         | 211/3753 [01:55<21:28,  2.75it/s]

?


Sentence 252:   7%|▋         | 251/3753 [02:11<19:36,  2.98it/s]

Loss: 0.1818116158246994


Sentence 262:   7%|▋         | 261/3753 [02:18<59:01,  1.01s/it]

?


Sentence 267:   7%|▋         | 266/3753 [02:22<51:04,  1.14it/s]

?


Sentence 271:   7%|▋         | 270/3753 [02:26<46:02,  1.26it/s]

?


Sentence 275:   7%|▋         | 274/3753 [02:29<43:07,  1.34it/s]

?


Sentence 276:   7%|▋         | 275/3753 [02:31<56:41,  1.02it/s]

?


Sentence 288:   8%|▊         | 287/3753 [02:39<36:39,  1.58it/s]

?


Sentence 332:   9%|▉         | 331/3753 [03:01<23:33,  2.42it/s]

?


Sentence 341:   9%|▉         | 340/3753 [03:05<38:12,  1.49it/s]

?


Sentence 360:  10%|▉         | 359/3753 [03:16<37:17,  1.52it/s]

?


Sentence 367:  10%|▉         | 366/3753 [03:19<28:46,  1.96it/s]

?


Sentence 379:  10%|█         | 378/3753 [03:24<22:09,  2.54it/s]

?


Sentence 383:  10%|█         | 382/3753 [03:26<26:05,  2.15it/s]

?


Sentence 393:  10%|█         | 392/3753 [03:32<41:48,  1.34it/s]

?


Sentence 398:  11%|█         | 397/3753 [03:34<27:36,  2.03it/s]

?


Sentence 404:  11%|█         | 403/3753 [03:38<34:03,  1.64it/s]

?


Sentence 405:  11%|█         | 404/3753 [03:39<42:34,  1.31it/s]

?


Sentence 407:  11%|█         | 406/3753 [03:41<39:19,  1.42it/s]

?


Sentence 441:  12%|█▏        | 440/3753 [03:59<30:25,  1.81it/s]

?


Sentence 449:  12%|█▏        | 448/3753 [04:03<27:27,  2.01it/s]

?


Sentence 461:  12%|█▏        | 460/3753 [04:11<44:11,  1.24it/s]

?


Sentence 475:  13%|█▎        | 474/3753 [04:18<24:31,  2.23it/s]

?


Sentence 489:  13%|█▎        | 488/3753 [04:23<15:21,  3.54it/s]

?


Sentence 496:  13%|█▎        | 495/3753 [04:25<15:27,  3.51it/s]

?


Sentence 502:  13%|█▎        | 501/3753 [04:29<27:28,  1.97it/s]

Loss: 0.3046056628227234


Sentence 522:  14%|█▍        | 521/3753 [04:38<24:27,  2.20it/s]

?


Sentence 523:  14%|█▍        | 522/3753 [04:39<34:27,  1.56it/s]

?


Sentence 558:  15%|█▍        | 557/3753 [04:49<17:03,  3.12it/s]

?


Sentence 582:  15%|█▌        | 581/3753 [04:59<17:13,  3.07it/s]

?


Sentence 613:  16%|█▋        | 612/3753 [05:11<17:17,  3.03it/s]

?


Sentence 638:  17%|█▋        | 637/3753 [05:25<31:19,  1.66it/s]

?


Sentence 646:  17%|█▋        | 645/3753 [05:29<19:28,  2.66it/s]

?


Sentence 648:  17%|█▋        | 647/3753 [05:30<24:36,  2.10it/s]

?


Sentence 662:  18%|█▊        | 661/3753 [05:36<23:43,  2.17it/s]

?


Sentence 673:  18%|█▊        | 672/3753 [05:41<33:14,  1.54it/s]

?


Sentence 678:  18%|█▊        | 677/3753 [05:44<23:26,  2.19it/s]

?


Sentence 679:  18%|█▊        | 678/3753 [05:45<23:35,  2.17it/s]

?


Sentence 682:  18%|█▊        | 681/3753 [05:45<19:13,  2.66it/s]

?


Sentence 684:  18%|█▊        | 683/3753 [05:46<17:10,  2.98it/s]

?


Sentence 685:  18%|█▊        | 684/3753 [05:46<16:40,  3.07it/s]

?


Sentence 714:  19%|█▉        | 713/3753 [06:02<24:55,  2.03it/s]

?


Sentence 720:  19%|█▉        | 719/3753 [06:05<24:59,  2.02it/s]

?


Sentence 723:  19%|█▉        | 722/3753 [06:07<30:12,  1.67it/s]

?


Sentence 752:  20%|██        | 751/3753 [06:15<18:36,  2.69it/s]

Loss: 0.41958051919937134


Sentence 827:  22%|██▏       | 826/3753 [06:39<19:32,  2.50it/s]

?


Sentence 836:  22%|██▏       | 835/3753 [06:45<30:51,  1.58it/s]

?


Sentence 852:  23%|██▎       | 851/3753 [06:53<23:01,  2.10it/s]

?


Sentence 861:  23%|██▎       | 860/3753 [06:57<22:05,  2.18it/s]

?


Sentence 863:  23%|██▎       | 862/3753 [06:59<25:40,  1.88it/s]

?


Sentence 873:  23%|██▎       | 872/3753 [07:03<16:03,  2.99it/s]

?


Sentence 911:  24%|██▍       | 910/3753 [07:16<15:02,  3.15it/s]

?


Sentence 921:  25%|██▍       | 920/3753 [07:19<12:29,  3.78it/s]

?


Sentence 935:  25%|██▍       | 934/3753 [07:22<12:40,  3.71it/s]

?


Sentence 957:  25%|██▌       | 956/3753 [07:31<16:46,  2.78it/s]

?


Sentence 974:  26%|██▌       | 973/3753 [07:35<10:47,  4.29it/s]

?


Sentence 981:  26%|██▌       | 980/3753 [07:39<28:09,  1.64it/s]

?


Sentence 1002:  27%|██▋       | 1001/3753 [07:45<12:43,  3.60it/s]

Loss: 0.299110472202301


Sentence 1005:  27%|██▋       | 1004/3753 [07:47<17:44,  2.58it/s]

?


Sentence 1007:  27%|██▋       | 1006/3753 [07:48<25:23,  1.80it/s]

?


Sentence 1059:  28%|██▊       | 1058/3753 [08:01<08:54,  5.05it/s]

?


Sentence 1133:  30%|███       | 1132/3753 [08:24<15:09,  2.88it/s]

?


Sentence 1158:  31%|███       | 1157/3753 [08:33<18:30,  2.34it/s]

?


Sentence 1176:  31%|███▏      | 1175/3753 [08:40<12:25,  3.46it/s]

?


Sentence 1253:  33%|███▎      | 1252/3753 [09:00<09:40,  4.31it/s]

Loss: 0.15255439281463623


Sentence 1256:  33%|███▎      | 1255/3753 [09:01<15:21,  2.71it/s]

?


Sentence 1288:  34%|███▍      | 1287/3753 [09:10<12:48,  3.21it/s]

?


Sentence 1348:  36%|███▌      | 1347/3753 [09:31<17:01,  2.35it/s]

?


Sentence 1374:  37%|███▋      | 1373/3753 [09:42<13:42,  2.89it/s]

?


Sentence 1389:  37%|███▋      | 1388/3753 [09:46<09:40,  4.07it/s]

?


Sentence 1394:  37%|███▋      | 1393/3753 [09:49<18:48,  2.09it/s]

?


Sentence 1397:  37%|███▋      | 1396/3753 [09:50<13:49,  2.84it/s]

?


Sentence 1411:  38%|███▊      | 1410/3753 [09:57<24:08,  1.62it/s]

?


Sentence 1412:  38%|███▊      | 1411/3753 [09:57<22:23,  1.74it/s]

?


Sentence 1417:  38%|███▊      | 1416/3753 [09:59<16:04,  2.42it/s]

?


Sentence 1419:  38%|███▊      | 1418/3753 [10:00<15:48,  2.46it/s]

?


Sentence 1428:  38%|███▊      | 1427/3753 [10:03<11:11,  3.47it/s]

?


Sentence 1449:  39%|███▊      | 1448/3753 [10:11<12:47,  3.00it/s]

?


Sentence 1450:  39%|███▊      | 1449/3753 [10:12<22:10,  1.73it/s]

?


Sentence 1466:  39%|███▉      | 1465/3753 [10:18<15:04,  2.53it/s]

?


Sentence 1484:  40%|███▉      | 1483/3753 [10:29<22:54,  1.65it/s]

?


Sentence 1495:  40%|███▉      | 1494/3753 [10:34<13:10,  2.86it/s]

?


Sentence 1502:  40%|███▉      | 1501/3753 [10:39<17:03,  2.20it/s]

Loss: 0.36831286549568176


Sentence 1516:  40%|████      | 1515/3753 [10:45<18:24,  2.03it/s]

?


Sentence 1518:  40%|████      | 1517/3753 [10:46<19:56,  1.87it/s]

?


Sentence 1521:  41%|████      | 1520/3753 [10:48<22:03,  1.69it/s]

?


Sentence 1523:  41%|████      | 1522/3753 [10:49<18:10,  2.05it/s]

?


Sentence 1551:  41%|████▏     | 1550/3753 [10:58<13:08,  2.79it/s]

?


Sentence 1554:  41%|████▏     | 1553/3753 [10:59<11:29,  3.19it/s]

?


Sentence 1564:  42%|████▏     | 1563/3753 [11:03<13:31,  2.70it/s]

?


Sentence 1575:  42%|████▏     | 1574/3753 [11:09<18:20,  1.98it/s]

?


Sentence 1577:  42%|████▏     | 1576/3753 [11:09<16:46,  2.16it/s]

?


Sentence 1592:  42%|████▏     | 1591/3753 [11:18<16:28,  2.19it/s]

?


Sentence 1597:  43%|████▎     | 1596/3753 [11:20<14:22,  2.50it/s]

?


Sentence 1603:  43%|████▎     | 1602/3753 [11:22<11:35,  3.09it/s]

?


Sentence 1632:  43%|████▎     | 1631/3753 [11:31<13:47,  2.56it/s]

?


Sentence 1642:  44%|████▎     | 1641/3753 [11:35<11:59,  2.93it/s]

?


Sentence 1670:  44%|████▍     | 1669/3753 [11:43<08:21,  4.15it/s]

?


Sentence 1689:  45%|████▍     | 1688/3753 [11:48<07:21,  4.68it/s]

?


Sentence 1699:  45%|████▌     | 1698/3753 [11:50<07:20,  4.67it/s]

?


Sentence 1732:  46%|████▌     | 1731/3753 [12:01<11:54,  2.83it/s]

?


Sentence 1752:  47%|████▋     | 1751/3753 [12:09<16:48,  1.99it/s]

Loss: 0.40944522619247437


Sentence 1757:  47%|████▋     | 1756/3753 [12:11<13:41,  2.43it/s]

?


Sentence 1770:  47%|████▋     | 1769/3753 [12:17<11:44,  2.81it/s]

?


Sentence 1791:  48%|████▊     | 1790/3753 [12:26<17:39,  1.85it/s]

?


Sentence 1797:  48%|████▊     | 1796/3753 [12:29<16:03,  2.03it/s]

?


Sentence 1799:  48%|████▊     | 1798/3753 [12:30<14:42,  2.21it/s]

?


Sentence 1812:  48%|████▊     | 1811/3753 [12:37<12:59,  2.49it/s]

?


Sentence 1817:  48%|████▊     | 1816/3753 [12:38<10:36,  3.04it/s]

?


Sentence 1818:  48%|████▊     | 1817/3753 [12:38<11:09,  2.89it/s]

?


Sentence 1820:  48%|████▊     | 1819/3753 [12:39<10:50,  2.97it/s]

?


Sentence 1842:  49%|████▉     | 1841/3753 [12:46<14:12,  2.24it/s]

?


Sentence 1867:  50%|████▉     | 1866/3753 [12:55<13:27,  2.34it/s]

?


Sentence 1879:  50%|█████     | 1878/3753 [12:59<09:37,  3.25it/s]

?


Sentence 1884:  50%|█████     | 1883/3753 [13:00<09:32,  3.27it/s]

?


Sentence 1892:  50%|█████     | 1891/3753 [13:04<10:40,  2.91it/s]

?


Sentence 1930:  51%|█████▏    | 1929/3753 [13:17<09:51,  3.09it/s]

?


Sentence 1934:  52%|█████▏    | 1933/3753 [13:20<15:39,  1.94it/s]

?


Sentence 1941:  52%|█████▏    | 1940/3753 [13:23<10:58,  2.75it/s]

?


Sentence 1963:  52%|█████▏    | 1962/3753 [13:31<11:09,  2.67it/s]

?


Sentence 1977:  53%|█████▎    | 1976/3753 [13:37<11:06,  2.67it/s]

?


Sentence 1995:  53%|█████▎    | 1994/3753 [13:43<07:15,  4.04it/s]

?


Sentence 2002:  53%|█████▎    | 2001/3753 [13:45<09:00,  3.24it/s]

Loss: 0.16384409368038177


Sentence 2025:  54%|█████▍    | 2024/3753 [13:53<13:36,  2.12it/s]

?


Sentence 2027:  54%|█████▍    | 2026/3753 [13:55<18:21,  1.57it/s]

?


Sentence 2034:  54%|█████▍    | 2033/3753 [13:57<09:15,  3.10it/s]

?


Sentence 2052:  55%|█████▍    | 2051/3753 [14:04<10:30,  2.70it/s]

?


Sentence 2069:  55%|█████▌    | 2068/3753 [14:12<09:02,  3.10it/s]

?


Sentence 2088:  56%|█████▌    | 2087/3753 [14:18<08:54,  3.12it/s]

?


Sentence 2113:  56%|█████▋    | 2112/3753 [14:29<09:29,  2.88it/s]

?


Sentence 2115:  56%|█████▋    | 2114/3753 [14:30<13:40,  2.00it/s]

?


Sentence 2126:  57%|█████▋    | 2125/3753 [14:35<14:25,  1.88it/s]

?


Sentence 2137:  57%|█████▋    | 2136/3753 [14:41<12:54,  2.09it/s]

?


Sentence 2138:  57%|█████▋    | 2137/3753 [14:42<13:19,  2.02it/s]

?


Sentence 2152:  57%|█████▋    | 2151/3753 [14:47<11:34,  2.31it/s]

?


Sentence 2179:  58%|█████▊    | 2178/3753 [14:58<12:15,  2.14it/s]

?


Sentence 2196:  58%|█████▊    | 2195/3753 [15:07<09:50,  2.64it/s]

?


Sentence 2210:  59%|█████▉    | 2209/3753 [15:13<07:40,  3.36it/s]

?


Sentence 2231:  59%|█████▉    | 2230/3753 [15:23<17:36,  1.44it/s]

?


Sentence 2237:  60%|█████▉    | 2236/3753 [15:25<10:00,  2.53it/s]

?


Sentence 2238:  60%|█████▉    | 2237/3753 [15:25<09:36,  2.63it/s]

?


Sentence 2249:  60%|█████▉    | 2248/3753 [15:31<13:46,  1.82it/s]

?


Sentence 2253:  60%|██████    | 2252/3753 [15:33<08:33,  2.93it/s]

Loss: 0.67448890209198


Sentence 2286:  61%|██████    | 2285/3753 [15:51<10:00,  2.45it/s]

?


Sentence 2287:  61%|██████    | 2286/3753 [15:51<11:02,  2.21it/s]

?


Sentence 2291:  61%|██████    | 2290/3753 [15:53<09:45,  2.50it/s]

?


Sentence 2295:  61%|██████    | 2294/3753 [15:56<13:05,  1.86it/s]

?


Sentence 2296:  61%|██████    | 2295/3753 [15:56<13:44,  1.77it/s]

?


Sentence 2297:  61%|██████    | 2296/3753 [15:57<11:42,  2.07it/s]

?


Sentence 2318:  62%|██████▏   | 2317/3753 [16:06<09:00,  2.66it/s]

?


Sentence 2371:  63%|██████▎   | 2370/3753 [16:31<10:06,  2.28it/s]

?


Sentence 2380:  63%|██████▎   | 2379/3753 [16:36<12:00,  1.91it/s]

?


Sentence 2383:  63%|██████▎   | 2382/3753 [16:38<13:44,  1.66it/s]

?


Sentence 2388:  64%|██████▎   | 2387/3753 [16:41<13:02,  1.75it/s]

?


Sentence 2389:  64%|██████▎   | 2388/3753 [16:41<12:33,  1.81it/s]

?


Sentence 2418:  64%|██████▍   | 2417/3753 [16:54<09:48,  2.27it/s]

?


Sentence 2420:  64%|██████▍   | 2419/3753 [16:55<10:06,  2.20it/s]

?


Sentence 2423:  65%|██████▍   | 2422/3753 [16:57<09:51,  2.25it/s]

?


Sentence 2442:  65%|██████▌   | 2441/3753 [17:06<11:55,  1.83it/s]

?


Sentence 2445:  65%|██████▌   | 2444/3753 [17:08<13:36,  1.60it/s]

?


Sentence 2449:  65%|██████▌   | 2448/3753 [17:10<11:00,  1.98it/s]

?


Sentence 2450:  65%|██████▌   | 2449/3753 [17:11<10:28,  2.07it/s]

?


Sentence 2453:  65%|██████▌   | 2452/3753 [17:12<10:08,  2.14it/s]

?


Sentence 2467:  66%|██████▌   | 2466/3753 [17:19<07:35,  2.82it/s]

?


Sentence 2497:  67%|██████▋   | 2496/3753 [17:32<09:24,  2.23it/s]

?


Sentence 2501:  67%|██████▋   | 2500/3753 [17:34<12:00,  1.74it/s]

?


Sentence 2502:  67%|██████▋   | 2501/3753 [17:35<12:35,  1.66it/s]

Loss: 0.3459390103816986
?


Sentence 2510:  67%|██████▋   | 2509/3753 [17:39<07:24,  2.80it/s]

?


Sentence 2513:  67%|██████▋   | 2512/3753 [17:40<09:42,  2.13it/s]

?


Sentence 2525:  67%|██████▋   | 2524/3753 [17:45<08:07,  2.52it/s]

?


Sentence 2527:  67%|██████▋   | 2526/3753 [17:46<08:17,  2.46it/s]

?


Sentence 2534:  67%|██████▋   | 2533/3753 [17:49<08:27,  2.40it/s]

?


Sentence 2539:  68%|██████▊   | 2538/3753 [17:51<06:23,  3.17it/s]

?


Sentence 2540:  68%|██████▊   | 2539/3753 [17:51<07:25,  2.72it/s]

?


Sentence 2541:  68%|██████▊   | 2540/3753 [17:52<08:28,  2.38it/s]

?


Sentence 2544:  68%|██████▊   | 2543/3753 [17:53<09:55,  2.03it/s]

?


Sentence 2545:  68%|██████▊   | 2544/3753 [17:54<09:11,  2.19it/s]

?


Sentence 2556:  68%|██████▊   | 2555/3753 [17:59<07:23,  2.70it/s]

?


Sentence 2569:  68%|██████▊   | 2568/3753 [18:02<05:26,  3.62it/s]

?


Sentence 2573:  69%|██████▊   | 2572/3753 [18:04<07:31,  2.62it/s]

?


Sentence 2574:  69%|██████▊   | 2573/3753 [18:05<09:22,  2.10it/s]

?


Sentence 2580:  69%|██████▊   | 2579/3753 [18:08<08:23,  2.33it/s]

?


Sentence 2587:  69%|██████▉   | 2586/3753 [18:10<08:42,  2.23it/s]

?


Sentence 2596:  69%|██████▉   | 2595/3753 [18:14<08:23,  2.30it/s]

?


Sentence 2599:  69%|██████▉   | 2598/3753 [18:16<10:33,  1.82it/s]

?


Sentence 2623:  70%|██████▉   | 2622/3753 [18:30<07:17,  2.58it/s]

?


Sentence 2647:  71%|███████   | 2646/3753 [18:42<05:56,  3.11it/s]

?


Sentence 2655:  71%|███████   | 2654/3753 [18:46<09:29,  1.93it/s]

?


Sentence 2661:  71%|███████   | 2660/3753 [18:48<05:02,  3.61it/s]

?


Sentence 2665:  71%|███████   | 2664/3753 [18:49<04:52,  3.73it/s]

?


Sentence 2667:  71%|███████   | 2666/3753 [18:50<05:01,  3.60it/s]

?


Sentence 2679:  71%|███████▏  | 2678/3753 [18:55<05:51,  3.06it/s]

?


Sentence 2680:  71%|███████▏  | 2679/3753 [18:56<09:20,  1.92it/s]

?


Sentence 2681:  71%|███████▏  | 2680/3753 [18:57<08:28,  2.11it/s]

?


Sentence 2684:  71%|███████▏  | 2683/3753 [18:58<07:50,  2.27it/s]

?


Sentence 2687:  72%|███████▏  | 2686/3753 [18:59<06:36,  2.69it/s]

?


Sentence 2697:  72%|███████▏  | 2696/3753 [19:03<06:55,  2.55it/s]

?


Sentence 2701:  72%|███████▏  | 2700/3753 [19:04<08:19,  2.11it/s]

?


Sentence 2706:  72%|███████▏  | 2705/3753 [19:07<07:23,  2.37it/s]

?


Sentence 2721:  72%|███████▏  | 2720/3753 [19:11<07:17,  2.36it/s]

?


Sentence 2723:  73%|███████▎  | 2722/3753 [19:12<08:54,  1.93it/s]

?


Sentence 2728:  73%|███████▎  | 2727/3753 [19:15<09:59,  1.71it/s]

?


Sentence 2749:  73%|███████▎  | 2748/3753 [19:25<09:50,  1.70it/s]

?


Sentence 2750:  73%|███████▎  | 2749/3753 [19:26<10:33,  1.59it/s]

?


Sentence 2753:  73%|███████▎  | 2752/3753 [19:27<08:07,  2.05it/s]

Loss: 0.23219415545463562


Sentence 2756:  73%|███████▎  | 2755/3753 [19:28<06:11,  2.69it/s]

?


Sentence 2759:  73%|███████▎  | 2758/3753 [19:30<09:31,  1.74it/s]

?


Sentence 2766:  74%|███████▎  | 2765/3753 [19:33<07:35,  2.17it/s]

?


Sentence 2776:  74%|███████▍  | 2775/3753 [19:37<04:06,  3.97it/s]

?


Sentence 2778:  74%|███████▍  | 2777/3753 [19:39<07:44,  2.10it/s]

?


Sentence 2798:  75%|███████▍  | 2797/3753 [19:46<07:45,  2.05it/s]

?


Sentence 2811:  75%|███████▍  | 2810/3753 [19:53<08:04,  1.94it/s]

?


Sentence 2814:  75%|███████▍  | 2813/3753 [19:54<07:49,  2.00it/s]

?


Sentence 2820:  75%|███████▌  | 2819/3753 [19:57<06:27,  2.41it/s]

?


Sentence 2861:  76%|███████▌  | 2860/3753 [20:12<05:09,  2.89it/s]

?


Sentence 2862:  76%|███████▌  | 2861/3753 [20:13<05:41,  2.61it/s]

?


Sentence 2894:  77%|███████▋  | 2893/3753 [20:27<03:24,  4.21it/s]

?


Sentence 2898:  77%|███████▋  | 2897/3753 [20:28<03:36,  3.95it/s]

?


Sentence 2900:  77%|███████▋  | 2899/3753 [20:28<03:05,  4.60it/s]

?


Sentence 2916:  78%|███████▊  | 2915/3753 [20:35<05:32,  2.52it/s]

?


Sentence 2927:  78%|███████▊  | 2926/3753 [20:39<05:28,  2.52it/s]

?


Sentence 2931:  78%|███████▊  | 2930/3753 [20:41<06:19,  2.17it/s]

?


Sentence 2950:  79%|███████▊  | 2949/3753 [20:47<04:42,  2.85it/s]

?


Sentence 2961:  79%|███████▉  | 2960/3753 [20:51<04:01,  3.28it/s]

?


Sentence 3002:  80%|███████▉  | 3001/3753 [21:07<03:45,  3.34it/s]

Loss: 5.888744999538176e-05


Sentence 3003:  80%|███████▉  | 3002/3753 [21:07<03:45,  3.33it/s]

?


Sentence 3017:  80%|████████  | 3016/3753 [21:14<04:20,  2.83it/s]

?


Sentence 3021:  80%|████████  | 3020/3753 [21:16<06:21,  1.92it/s]

?


Sentence 3032:  81%|████████  | 3031/3753 [21:22<05:02,  2.39it/s]

?


Sentence 3053:  81%|████████▏ | 3052/3753 [21:32<06:58,  1.67it/s]

?


Sentence 3072:  82%|████████▏ | 3071/3753 [21:39<03:15,  3.49it/s]

?


Sentence 3081:  82%|████████▏ | 3080/3753 [21:43<04:14,  2.65it/s]

?


Sentence 3138:  84%|████████▎ | 3137/3753 [22:06<04:02,  2.54it/s]

?


Sentence 3167:  84%|████████▍ | 3166/3753 [22:17<03:54,  2.50it/s]

?


Sentence 3197:  85%|████████▌ | 3196/3753 [22:27<02:42,  3.42it/s]

?


Sentence 3207:  85%|████████▌ | 3206/3753 [22:31<02:48,  3.24it/s]

?


Sentence 3213:  86%|████████▌ | 3212/3753 [22:33<02:29,  3.61it/s]

?


Sentence 3225:  86%|████████▌ | 3224/3753 [22:37<02:55,  3.02it/s]

?


Sentence 3235:  86%|████████▌ | 3234/3753 [22:40<03:32,  2.45it/s]

?


Sentence 3246:  86%|████████▋ | 3245/3753 [22:44<02:48,  3.01it/s]

?


Sentence 3252:  87%|████████▋ | 3251/3753 [22:46<03:54,  2.14it/s]

Loss: 0.6121928095817566


Sentence 3255:  87%|████████▋ | 3254/3753 [22:47<02:46,  3.00it/s]

?


Sentence 3263:  87%|████████▋ | 3262/3753 [22:51<03:43,  2.19it/s]

?


Sentence 3272:  87%|████████▋ | 3271/3753 [22:54<02:29,  3.22it/s]

?


Sentence 3304:  88%|████████▊ | 3303/3753 [23:08<03:42,  2.02it/s]

?


Sentence 3313:  88%|████████▊ | 3312/3753 [23:12<03:14,  2.26it/s]

?


Sentence 3318:  88%|████████▊ | 3317/3753 [23:15<03:34,  2.04it/s]

?


Sentence 3341:  89%|████████▉ | 3340/3753 [23:26<02:40,  2.58it/s]

?


Sentence 3365:  90%|████████▉ | 3364/3753 [23:34<02:51,  2.27it/s]

?


Sentence 3395:  90%|█████████ | 3394/3753 [23:44<02:08,  2.79it/s]

?


Sentence 3403:  91%|█████████ | 3402/3753 [23:48<02:27,  2.39it/s]

?


Sentence 3409:  91%|█████████ | 3408/3753 [23:49<01:52,  3.06it/s]

?


Sentence 3412:  91%|█████████ | 3411/3753 [23:51<02:18,  2.47it/s]

?


Sentence 3414:  91%|█████████ | 3413/3753 [23:52<02:10,  2.61it/s]

?


Sentence 3442:  92%|█████████▏| 3441/3753 [24:01<01:41,  3.09it/s]

?


Sentence 3443:  92%|█████████▏| 3442/3753 [24:02<01:54,  2.71it/s]

?


Sentence 3446:  92%|█████████▏| 3445/3753 [24:04<02:42,  1.90it/s]

?


Sentence 3476:  93%|█████████▎| 3475/3753 [24:14<01:17,  3.61it/s]

?


Sentence 3502:  93%|█████████▎| 3501/3753 [24:23<01:32,  2.72it/s]

Loss: 0.1740235686302185


Sentence 3519:  94%|█████████▎| 3518/3753 [24:29<01:15,  3.10it/s]

?


Sentence 3575:  95%|█████████▌| 3574/3753 [24:52<01:06,  2.70it/s]

?


Sentence 3587:  96%|█████████▌| 3586/3753 [24:57<01:13,  2.27it/s]

?


Sentence 3589:  96%|█████████▌| 3588/3753 [24:58<01:34,  1.75it/s]

?


Sentence 3665:  98%|█████████▊| 3664/3753 [25:28<00:30,  2.95it/s]

?


Sentence 3709:  99%|█████████▉| 3708/3753 [25:40<00:14,  3.10it/s]

?


Sentence 3750: 100%|█████████▉| 3749/3753 [25:52<00:01,  2.81it/s]

?


Sentence 3752: 100%|█████████▉| 3751/3753 [25:53<00:00,  2.42it/s]

Loss: 0.14921803772449493


Sentence 3753: 100%|██████████| 3753/3753 [25:54<00:00,  2.41it/s]


In [None]:
print('{:.4f}'.format(uas_tst(model_BB, word_vocab, tag_vocab, dep_dev_english_data)))

0.6553


In [None]:
def uas_tst(modelo, vocab_words, vocab_tags, gold_data):
    correct, total = 0, 0
    for sentence in gold_data:
      words,tags, heads = [], [], []

      for word, tag, head in (sentence):
        words.append(word)
        tags.append(tag)
        heads.append(head)
      #print(words)
      preds = predict_tst(modelo, vocab_words, vocab_tags, words, tags)
      #print(preds)

      for i in range(1,len(preds)):
        if heads[i] == preds[i]:
          correct +=1
        total +=1

    return correct/total

def predict_tst(modelo, vocab_words, vocab_tags, words, tags):
    for idx, (w, t) in enumerate(zip(words,tags)): #words string to words ids
      if w in vocab_words:
        words[idx] = vocab_words[w]
      else:
        words[idx] = vocab_words['<unk>']
      if t in vocab_tags:
        tags[idx] =vocab_tags[t]
      else:
        tags[idx] = vocab_tags['<pad>']


    words = torch.LongTensor(words).unsqueeze(0)
    tags = torch.LongTensor(tags).unsqueeze(0)

    parser = ArcStandardParser()
    (SH, LA, RA) = parser.MOVES
    config = parser.initial_config(words.shape[1])
    while not parser.is_final_config(config):
      idx_feats = get_feats(config, words) #take features #idx_feats = get_feats(config, word_ids) #long_tensor([vs0, vs1, vs2, b0]))
      #idx_feats = get_feats(config, words) #long_tensor([vs0, vs1, vs2, b0]))
      idx_feats = idx_feats.unsqueeze(0)
      valid_moves = parser.valid_moves(config)

      with torch.no_grad():
        output_move = torch.argmax(modelo.forward(words, tags, idx_feats)) #output_mlp = self.forward(words, tags, idx_feats)
      if(output_move in valid_moves):
        config = parser.next_config(config, output_move)
      else:
        config = parser.next_config(config, valid_moves[-1])

    return config[2]