In [None]:
!pip install conllu
!wget https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11234/1-3105/ud-treebanks-v2.5.tgz
!wget https://www.ida.liu.se/~TDDE09/commons/projectivize.py
!tar zxf ud-treebanks-v2.5.tgz

import os
import torch
import projectivize
import torch.nn as nn
from io import open
from conllu import parse_incr
import copy
import operator
import torch.nn.functional as F
import torch.optim as optim
import tqdm
import matplotlib.pyplot as plt
import numpy as np
import subprocess

Collecting conllu
  Downloading https://files.pythonhosted.org/packages/ae/be/be6959c3ff2dbfdd87de4be0ccdff577835b5d08b1d25bf7fd4aaf0d7add/conllu-4.4-py2.py3-none-any.whl
Installing collected packages: conllu
Successfully installed conllu-4.4
--2021-03-18 17:15:53--  https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11234/1-3105/ud-treebanks-v2.5.tgz
Resolving lindat.mff.cuni.cz (lindat.mff.cuni.cz)... 195.113.20.140
Connecting to lindat.mff.cuni.cz (lindat.mff.cuni.cz)|195.113.20.140|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 355216681 (339M) [application/x-gzip]
Saving to: ‘ud-treebanks-v2.5.tgz’


2021-03-18 17:16:15 (17.1 MB/s) - ‘ud-treebanks-v2.5.tgz’ saved [355216681/355216681]

--2021-03-18 17:16:15--  https://www.ida.liu.se/~TDDE09/commons/projectivize.py
Resolving www.ida.liu.se (www.ida.liu.se)... 130.236.57.103, 2001:6b0:17:2004::57:103
Connecting to www.ida.liu.se (www.ida.liu.se)|130.236.57.103|:443... connected.
HTTP request sent

In [None]:
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 print(device)

cpu


In [None]:
#polish train data = UD_Polish-PDB/pl_pdb-ud-train.conllu
#english train data = UD_English-GUM/en_gum-ud-train.conllu

#arabic train data = UD_Arabic-PADT/ar_padt-ud-train.conllu
#arabic dev data = UD_Arabic-PADT/ar_padt-ud-dev.conllu

def datafiles(filename):
  data_file = open(filename, "r", encoding="utf-8")
  ud_files = []
  for tokenlist in parse_incr(data_file):
    ud_files.append(tokenlist)
  data = []
  for sentence in ud_files:
    current_sentence = []
    for token in sentence:
      current_token = []
      current_token.append(token['form'])
      current_token.append(token['upostag'])
      current_sentence.append(tuple(current_token))
    data.append(current_sentence)
  return data

#Arabic
arabic_train_data = datafiles("ud-treebanks-v2.5/UD_Arabic-PADT/ar_padt-ud-train.conllu")
arabic_dev_data = datafiles("ud-treebanks-v2.5/UD_Arabic-PADT/ar_padt-ud-dev.conllu")
#English
english_train_data = datafiles("ud-treebanks-v2.5/UD_English-GUM/en_gum-ud-train.conllu")
english_dev_data = datafiles("ud-treebanks-v2.5/UD_English-GUM/en_gum-ud-dev.conllu")

In [None]:
print("Data Examples")
print(english_train_data[0])
print(arabic_train_data[0])
#We are not really using this dataset so move on haha

Data Examples
[('Aesthetic', 'ADJ'), ('Appreciation', 'NOUN'), ('and', 'CCONJ'), ('Spanish', 'ADJ'), ('Art', 'NOUN'), (':', 'PUNCT')]
[('برلين', 'X'), ('ترفض', 'VERB'), ('حصول', 'NOUN'), ('شركة', 'NOUN'), ('اميركية', 'ADJ'), ('على', 'ADP'), ('رخصة', 'NOUN'), ('تصنيع', 'NOUN'), ('دبابة', 'NOUN'), ('"', 'PUNCT'), ('ليوبارد', 'X'), ('"', 'PUNCT'), ('الالمانية', 'ADJ')]


In [None]:
#Dataset Class
class Dataset():

    ROOT = ('<root>', '<root>', 0)  # Pseudo-root

    def __init__(self, filename):
        self.filename = filename

    def __iter__(self):
        with open(self.filename, 'rt', encoding='utf-8') as lines:
            tmp = [Dataset.ROOT]
            for line in lines:
                if not line.startswith('#'):  # Skip lines with comments
                    line = line.rstrip()
                    if line:
                        columns = line.split('\t')
                        if columns[0].isdigit():  # Skip range tokens
                            tmp.append((columns[1], columns[3], int(columns[6])))
                    else:
                        yield tmp
                        tmp = [Dataset.ROOT]

In [None]:
dep_train_english_data = Dataset("ud-treebanks-v2.5/UD_English-GUM/en_gum-ud-train.conllu")
dep_dev_english_data = Dataset("ud-treebanks-v2.5/UD_English-GUM/en_gum-ud-dev.conllu")
#Example sentence list
print(list(dep_train_english_data)[0])

dep_train_arab_data = Dataset("ud-treebanks-v2.5/UD_Arabic-PADT/ar_padt-ud-train.conllu")
dep_dev_arab_data = Dataset("ud-treebanks-v2.5/UD_Arabic-PADT/ar_padt-ud-dev.conllu")
print(list(dep_train_arab_data)[0])

dep_train_esp_data = Dataset("ud-treebanks-v2.5/UD_Spanish-GSD/es_gsd-ud-train.conllu")
dep_dev_esp_data = Dataset("ud-treebanks-v2.5/UD_Spanish-GSD/es_gsd-ud-dev.conllu")
print(list(dep_train_esp_data)[0])

dep_train_svk_data = Dataset("ud-treebanks-v2.5/UD_Swedish-LinES/sv_lines-ud-train.conllu")
dep_dev_svk_data = Dataset("ud-treebanks-v2.5/UD_Swedish-LinES/sv_lines-ud-dev.conllu")
print(list(dep_train_svk_data)[0])

[('<root>', '<root>', 0), ('Aesthetic', 'ADJ', 2), ('Appreciation', 'NOUN', 0), ('and', 'CCONJ', 5), ('Spanish', 'ADJ', 5), ('Art', 'NOUN', 2), (':', 'PUNCT', 2)]
[('<root>', '<root>', 0), ('برلين', 'X', 2), ('ترفض', 'VERB', 0), ('حصول', 'NOUN', 2), ('شركة', 'NOUN', 3), ('اميركية', 'ADJ', 4), ('على', 'ADP', 7), ('رخصة', 'NOUN', 3), ('تصنيع', 'NOUN', 7), ('دبابة', 'NOUN', 8), ('"', 'PUNCT', 11), ('ليوبارد', 'X', 9), ('"', 'PUNCT', 11), ('الالمانية', 'ADJ', 9)]
[('<root>', '<root>', 0), ('Además', 'ADV', 4), ('se', 'PRON', 4), ('le', 'PRON', 4), ('pediría', 'VERB', 0), ('a', 'ADP', 7), ('las', 'DET', 7), ('empresas', 'NOUN', 4), ('interesadas', 'ADJ', 7), ('en', 'ADP', 10), ('prestar', 'VERB', 8), ('el', 'DET', 12), ('servicio', 'NOUN', 10), ('que', 'SCONJ', 15), ('se', 'PRON', 15), ('hagan', 'VERB', 4), ('cargo', 'NOUN', 15), ('de', 'ADP', 19), ('la', 'DET', 19), ('señalización', 'NOUN', 16), ('y', 'CCONJ', 22), ('la', 'DET', 22), ('cartelería', 'NOUN', 19), ('que', 'SCONJ', 24), ('cont

In [None]:
PAD = '<pad>'
UNK = '<unk>'
def make_vocabs_head_tagger(gold_data):
    words = {PAD:0, UNK:1}
    tags =  {PAD:0}

    word_i = 2
    tag_i = 1

    for sentence in gold_data:
      for word,tag, _ in sentence:
        if tag not in tags:
          tags[tag] = tag_i
          tag_i +=1
        if word not in words:
          words[word] = word_i
          word_i +=1

    return words, tags

In [None]:
word_vocab, tag_vocab = make_vocabs_head_tagger(dep_train_english_data)

In [None]:
#Arc Parser and oracle move implementation
class Parser(object):

    def predict(self, words, tags):
        raise NotImplementedError

class ArcStandardParser(Parser):
    #parser = ArcStandardParser()

    MOVES = tuple(range(3))

    SH, LA, RA = MOVES  # Parser moves are specified as integers.

    @staticmethod
    def initial_config(num_words):

        #the heads list will always be as long as the number of words in the sentence
        config = (0,[],[0]*num_words)
        return config

    @staticmethod
    def valid_moves(config):

        valid = []
        if config[0] < len(config[2]):
          valid.append(0)
        if len(config[1]) >= 2:
          valid.append(1)
          valid.append(2)
        return valid

    @staticmethod
    def next_config(config, move):

        #copy to avoid messing with the state of the current config
        new_config = copy.deepcopy(config)
        buffer = new_config[0]
        stack = new_config[1]
        heads = new_config[2]

        #SH, shift the buffer to the stack and add to the buffer
        if move == 0:

          stack.append(buffer)
          buffer +=1

        #LA, create an arc from the topmost to the second topmost words
        #in the stack. pop the second topmost from the stack
        elif move == 1:
          top_most = stack[-1]
          second_top_most = stack[-2]

          heads[second_top_most] = top_most
          stack.remove(second_top_most)

        #RA, create an arc from the second topmost to the topmost words
        #in the stack. pop the topmost from the stack
        elif move == 2:
          top_most = stack[-1]
          second_top_most = stack[-2]

          heads[top_most] = second_top_most
          stack.remove(top_most)


        next_conf=(buffer,stack, heads)


        return next_conf

    @staticmethod
    def is_final_config(config):
        # a config is always final if no more moves can be performed
        if not ArcStandardParser().valid_moves(config):
          return True



def oracle_moves(gold_heads):
    SH = 0
    LA = 1
    RA = 2

    moves, stack = [], []
    move, buffer = 0, 0
    moves_to_make = copy.copy(gold_heads[1:])
    heads = [0]*len(gold_heads)

    for _ in range((len(gold_heads)*2)-1):
      try:
        if len(stack)>=2 and gold_heads[stack[-2]] == stack[-1] and stack[-2] not in moves_to_make:

          yield (buffer,stack,heads), LA
          moves_to_make.remove(stack[-1])
          heads[stack[-2]] = stack[-1]
          stack.remove(stack[-2])

          moves.append(LA)
          move = LA

        elif len(stack)>=2 and gold_heads[stack[-1]] == stack[-2] and stack[-1] not in moves_to_make:

          yield (buffer,stack,heads), RA
          moves_to_make.remove(stack[-2])
          heads[stack[-1]] = stack[-2]
          stack.remove(stack[-1])

          moves.append(RA)
          move = RA

        else:
          yield (buffer,stack,heads), SH
          stack.append(buffer)
          buffer +=1
          moves.append(SH)
          move = SH
      except:
        pass

In [None]:
#biLSTM concatenate words and tags (xi=e(wi)◦e(pi)) so there is no need to add ps2,1,0 here
def get_feats(config, word_ids):
  b, s, h = config

  vs2, vs1, vs0 = [0,0,0]
  b0 = 0
  if b < len(h):
    b0 = b

  if(len(s) == 1):
    vs0 = s[-1]

  elif(len(s) == 2):
    vs0 = s[-1]
    vs1 = s[-2]

  elif(len(s) >= 3):
    vs0 = s[-1]
    vs1 = s[-2]
    vs2 = s[-3]

  feats_from_config = [vs2, vs1, vs0, b0] #Index of vs2, vs1, vs0, bs0 from config
  feats_torch = torch.LongTensor(feats_from_config) #torch.Size([1, 4])
  return torch.LongTensor(feats_torch)

In [None]:
def training_example(vocab_words, vocab_tags, gold_data):
    for sentence in gold_data: #[('<root>', '<root>', 0), ('Aesthetic', 'ADJ', 2), ('Appreciation', 'NOUN', 0), ('and', 'CCONJ', 5), ('Spanish', 'ADJ', 5), ('Art', 'NOUN', 2), (':', 'PUNCT', 2)]
        word_ids, tag_ids, gold_heads = [], [], []
        moves = []
        idxs = []
        #create id-lists for the current sentence
        for tup in (sentence):
            word_ids.append(vocab_words[tup[0]]) # Ids of ['<root>', 'Aesthetic'... ]
            tag_ids.append(vocab_tags[tup[1]]) # Ids of ['<root>', 'ADJ'... ]
            gold_heads.append(tup[2]) #[0, 2, ...]

        for config, move in oracle_moves(gold_heads): #for each sentence we get oracle moves
            idx_feats = get_feats(config, word_ids) #long_tensor([vs0, vs1, vs2, b0]))
            idxs.append(idx_feats)
            moves.append(move)

        yield torch.LongTensor(word_ids).unsqueeze(0), torch.LongTensor(tag_ids).unsqueeze(0), torch.stack(idxs), torch.LongTensor(moves)

In [None]:
#Example of one sentence in training example function
w_ids, t_ids, f_idx, mov = next(training_example(word_vocab, tag_vocab, dep_train_english_data))
print("word ids: ",w_ids)
print("tag ids: ",t_ids)
print("mlp features indexs: " ,f_idx.shape)
print("force trinning moves: ", mov.shape)

word ids:  tensor([[2, 3, 4, 5, 6, 7, 8]])
tag ids:  tensor([[1, 2, 3, 4, 2, 3, 5]])
mlp features indexs:  torch.Size([13, 4])
force trinning moves:  torch.Size([13])


In [None]:
#"Encoder" Implementation
class BiLSTM(nn.Module):
    def __init__(self, num_words, num_pos, word_emb_dim = 100, pos_emb_dim=25, hidden_dim = 125, n_layers=2):
        super().__init__()
        # for the BiLSTM
        self.ew = nn.Embedding(num_words, word_emb_dim)
        self.ep = nn.Embedding(num_pos, pos_emb_dim)
        self.bi_lstm = nn.LSTM(word_emb_dim + pos_emb_dim, hidden_dim, num_layers = n_layers, batch_first=True, bidirectional = True)

    def forward(self, word, pos):
        ew = self.ew(word)            # Shape: [Batch_size, n_words, word_emb_dim]
        ep = self.ep(pos)             # Shape: [Batch_size, n_words, pos_emb_dim]
        x = torch.cat((ew, ep), 2)    # Shape: [Batch_size, n_words, word_emb_dim + pos_emb_dim]
        output, _ = self.bi_lstm(x)   # Shape: [Batch_size, n_words, 2*(word_emb_dim + pos_emb_dim)]

        return output

In [None]:
#w_ids, t_ids, g_heads sentence example
model_lstm = BiLSTM(len(word_vocab), len(tag_vocab))
test_output = model_lstm.forward(w_ids, t_ids)
print("Shape BiLSTM feats: ",test_output.shape)

Shape BiLSTM feats:  torch.Size([1, 7, 250])


In [None]:
class MLP(nn.Module):
    def __init__(self, word_emb_dim = 100, pos_emb_dim=25, hidden_unit = 100, output_dim = 3):
        super().__init__()
        self.hidden = nn.Linear(4*2*(word_emb_dim + pos_emb_dim), hidden_unit)
        self.relu = nn.ReLU() # try relu
        self.output = nn.Linear(hidden_unit, output_dim)

    def forward(self, features):
        hn = self.hidden(features)
        out = self.output(self.relu(hn))

        return out

In [None]:
#Example of MLP
#feats_from_config is a list of list of index bc each list represent a sentence, in this case batch_size = 1 so [[ words ]]
#this is for no batch (we are using this one)
print("Example for no batch | biLSTM output shape: ", test_output.shape)
feats_from_config = [0, 3, 4, 5] #Index of vs2, vs1, vs0, bs0 from config
feats_torch = torch.LongTensor(feats_from_config) #torch.Size([1, 4])
test_input_mlp = test_output.index_select(1, feats_torch)
print("Shape input for mlp: " ,test_input_mlp.shape)
test_input_mlp = test_input_mlp.view(-1,test_input_mlp.size(1)*test_input_mlp.size(2))
print("Concat embeds shape: ", test_input_mlp.shape)

print("#################################")

#Batch size > 1 case
test_output2 = torch.cat((test_output,test_output), 0)
print("Example for batch feats | biLSTM output shape: ", test_output2.shape)
feats_from_config2 = [[0, 3, 4, 5], [0, 3, 4, 5]]
feats_torch2 = torch.LongTensor(feats_from_config2)
indexs = feats_torch2.unsqueeze(2).expand(feats_torch2.size(0), feats_torch2.size(1), test_output2.size(2))
test_input_mlp2 = torch.gather(test_output2, 1, indexs)
print("Shape input for mlp: " ,test_input_mlp2.shape)
test_input_mlp2 = test_input_mlp2.view(-1,test_input_mlp2.size(1)*test_input_mlp2.size(2))
print("Concat embeds shape: ",test_input_mlp2.shape)

print("################################")
#Case 3 (list of list of index for one sentence (batch=1))
#Every list of index represent the setting for a move, in this case 2
test_output3 = test_output
print("Example for no batch | biLSTM output shape: ", test_output3.shape)
feats_from_config3 = [[0, 3, 4, 5], [0, 3, 4, 5]]
feats_torch3 = torch.LongTensor(feats_from_config3) #torch.Size([2, 4])
test_input_mlp3 = [test_output3.index_select(1, idxs) for idxs in feats_torch3]
test_input_mlp3 = torch.stack(test_input_mlp3)
test_input_mlp3 = test_input_mlp3.squeeze(1)
print("Shape input for mlp: " ,test_input_mlp3.shape)
test_input_mlp3 = test_input_mlp3.view(-1,test_input_mlp3.size(1)*test_input_mlp3.size(2))
print("Concat embeds shape: ", test_input_mlp3.shape)

Example for no batch | biLSTM output shape:  torch.Size([1, 7, 250])
Shape input for mlp:  torch.Size([1, 4, 250])
Concat embeds shape:  torch.Size([1, 1000])
#################################
Example for batch feats | biLSTM output shape:  torch.Size([2, 7, 250])
Shape input for mlp:  torch.Size([2, 4, 250])
Concat embeds shape:  torch.Size([2, 1000])
################################
Example for no batch | biLSTM output shape:  torch.Size([1, 7, 250])
Shape input for mlp:  torch.Size([2, 4, 250])
Concat embeds shape:  torch.Size([2, 1000])


In [None]:
# example data features = test_input_mlp2
model_MLP = MLP()
test_output_mlp = model_MLP.forward(test_input_mlp)
print("Shape BiLSTM feats: ",test_output_mlp.shape)
test_output_mlp2 = model_MLP.forward(test_input_mlp2)
print("Shape BiLSTM feats: ",test_output_mlp2.shape)

Shape BiLSTM feats:  torch.Size([1, 3])
Shape BiLSTM feats:  torch.Size([2, 3])


In [None]:
class Blackbox(nn.Module): #need to change name lol haha
    def __init__(self, vocab_words, vocab_tags, num_words, num_pos):
        super().__init__()

        self.model_lstm = BiLSTM(num_words, num_pos)
        self.model_MLP = MLP()
        self.vocab_words = vocab_words
        self.vocab_tags = vocab_tags

    def forward(self, word_ids, pos_ids, idxs_feats):
        lstm_output = self.model_lstm.forward(word_ids, pos_ids) #biLSTM output shape:  torch.Size([1, 7, 250])
        #input_mlp = lstm_output.index_select(1, idxs_feats) #Shape input for mlp:  torch.Size([1, 4, 250])
        input_mlp = [lstm_output.index_select(1, idxs) for idxs in idxs_feats] #len(n) = moves
        input_mlp = torch.stack(input_mlp) #to tensor
        input_mlp = input_mlp.squeeze(1) #shape tesnro ([n, 4, 250]) where n: moves
        input_mlp = input_mlp.view(-1,input_mlp.size(1)*input_mlp.size(2)) #Concat embeds shape:  torch.Size([n, 1000])
        output_mlp = self.model_MLP.forward(input_mlp) #torch.Size([n, 3])

        return output_mlp

    def predict(self, words, tags):
        for idx, (w, t) in enumerate(zip(words,tags)): #words string to words ids
            if w in self.vocab_words:
                words[idx] = self.vocab_words[w]
            else:
                words[idx] = self.vocab_words['<unk>']
            if t in self.vocab_tags:
                tags[idx] = self.vocab_tags[t]
            else:
                tags[idx] = self.vocab_tags['<pad>']

        config = ArcStandardParser.initial_config(len(words))
        while not ArcStandardParser.is_final_config(config):
            idx_feats = get_feats(config, words) #take features #idx_feats = get_feats(config, word_ids) #long_tensor([vs0, vs1, vs2, b0]))
            valid_moves = ArcStandardParser.valid_moves(config)

            with torch.no_grad():
                output_move = torch.argmax(self.forward(words, tags, idx_feats)) #output_mlp = self.forward(words, tags, idx_feats)
            if(output_move in valid_moves):
                config = parser.next_config(config, output_move)
            else:
                config = parser.next_config(config, valid_moves[-1])

        return config[2]

In [None]:
import torch
import torch.nn.functional as F
import torch.optim as optim
import tqdm
torch.autograd.set_detect_anomaly(True)

def train_model(train_data, dev_data, n_epochs = 5, lim_ep = 4000, batch_size = 0, lr=1e-3):
    word_vocab, tag_vocab = make_vocabs_head_tagger(train_data)
    model = Blackbox(word_vocab, tag_vocab, len(word_vocab), len(tag_vocab)) #  model_lstm = BiLSTM(len(word_vocab), len(tag_vocab)) |  model_MLP = MLP()
    #model.to(device)
    # Initialize the optimizer
    optimizer = optim.Adam(model.parameters(), lr=lr)

    train_losses = []
    dev_losses = []
    dev_accuracies = []
    info = {'dev loss': 0, 'dev acc': 0}

    x = 0
    N_tqdm = len(list(train_data))

    with tqdm.tqdm(total=n_epochs * N_tqdm) as pbar: #len(dep_train_english_data) = 3753
        for t in range(n_epochs):
            x = 0
            model.train()
            # start training
            running_loss = 0
            t_data = training_example(word_vocab, tag_vocab, train_data)

            for w_ids, t_ids, f_idx, move in t_data: #This represet a sentence w_ids, t_ids, f_idx, mov
                pbar.set_description(f'Sentence {x+1}')
                optimizer.zero_grad()
                output_mlp = model.forward(w_ids, t_ids, f_idx)
                loss = F.cross_entropy(output_mlp, move)
                loss.backward() #retain_graph=True
                optimizer.step()


                running_loss += loss.item()
                x += 1
                if(x%250 == 0):
                    print("Loss: ", running_loss / x)


                pbar.update(1)

                train_losses.append(running_loss / x)

                if(x == lim_ep): #Train only lim_ep sentences
                  break

            UAS_t = uas_tst(model, word_vocab, tag_vocab, dev_data)
            print("Epoch: %s | AUS: %.2f" %(t, UAS_t * 100), "%")

    return model

In [None]:
parser = ArcStandardParser()

In [None]:
dep_train_cn_data = Dataset("ud-treebanks-v2.5/UD_Chinese-GSD/zh_gsd-ud-train.conllu") #3176
print(len(list(dep_train_cn_data)))
print(list(dep_train_cn_data)[0])
dep_train_cn_data = Dataset("ud-treebanks-v2.5/UD_Chinese-GSDSimp/zh_gsdsimp-ud-train.conllu") #3176
print(len(list(dep_train_cn_data)))
print(list(dep_train_cn_data)[0])

3997
[('<root>', '<root>', 0), ('看似', 'AUX', 2), ('簡單', 'ADJ', 5), ('，', 'PUNCT', 5), ('只', 'ADV', 5), ('是', 'VERB', 0), ('二', 'NUM', 7), ('選', 'VERB', 9), ('一', 'NUM', 7), ('做', 'VERB', 5), ('決擇', 'NOUN', 9), ('，', 'PUNCT', 5), ('但', 'ADV', 22), ('其實', 'ADV', 22), ('他們', 'PRON', 15), ('代表', 'VERB', 22), ('的', 'PART', 15), ('是', 'AUX', 22), ('你', 'PRON', 19), ('周遭', 'NOUN', 22), ('的', 'PART', 19), ('親朋', 'NOUN', 22), ('好友', 'NOUN', 5), ('，', 'PUNCT', 5), ('試', 'VERB', 5), ('著', 'PART', 24), ('給', 'VERB', 24), ('你', 'PRON', 26), ('不同', 'ADJ', 30), ('的', 'PART', 28), ('意見', 'NOUN', 26), ('，', 'PUNCT', 5), ('但', 'ADV', 39), ('追根究底', 'VERB', 39), ('，', 'PUNCT', 39), ('最後', 'NOUN', 36), ('決定', 'VERB', 39), ('的', 'PART', 36), ('還是', 'AUX', 39), ('自己', 'PRON', 5), ('。', 'PUNCT', 5)]
3997
[('<root>', '<root>', 0), ('看似', 'AUX', 2), ('简单', 'ADJ', 5), ('，', 'PUNCT', 5), ('只', 'ADV', 5), ('是', 'VERB', 0), ('二', 'NUM', 7), ('选', 'VERB', 9), ('一', 'NUM', 7), ('做', 'VERB', 5), ('决择', 'NOUN', 9), ('，

In [None]:
dep_train_cn_data = Dataset("ud-treebanks-v2.5/UD_Chinese-GSDSimp/zh_gsdsimp-ud-train.conllu") #3997
dep_dev_cn_data = Dataset("ud-treebanks-v2.5/UD_Chinese-GSDSimp/zh_gsdsimp-ud-dev.conllu")
model_BB = train_model(dep_train_cn_data, dep_dev_cn_data) #UAS:
#word_vocab, tag_vocab = make_vocabs_head_tagger(dep_train_svk_data)
#print('{:.4f}'.format(uas_tst(model_BB, word_vocab, tag_vocab, dep_dev_svk_data))) #71% |(1e-3)78.82 (2000) |

Sentence 251:   1%|▏         | 250/19985 [01:59<1:57:16,  2.80it/s]

Loss:  0.5553316417336464


Sentence 501:   3%|▎         | 500/19985 [03:46<2:37:08,  2.07it/s]

Loss:  0.4307987672071904


Sentence 751:   4%|▍         | 750/19985 [05:46<2:49:16,  1.89it/s]

Loss:  0.38671767008677127


Sentence 1001:   5%|▌         | 1000/19985 [07:43<2:32:57,  2.07it/s]

Loss:  0.3558015421582386


Sentence 1251:   6%|▋         | 1250/19985 [09:44<2:46:36,  1.87it/s]

Loss:  0.33584221936240793


Sentence 1501:   8%|▊         | 1500/19985 [11:49<2:04:24,  2.48it/s]

Loss:  0.3187138373708973


Sentence 1751:   9%|▉         | 1750/19985 [13:50<2:38:01,  1.92it/s]

Loss:  0.304690277234252


Sentence 2001:  10%|█         | 2000/19985 [15:48<2:41:05,  1.86it/s]

Loss:  0.29455721158208326


Sentence 2251:  11%|█▏        | 2250/19985 [17:54<3:50:19,  1.28it/s]

Loss:  0.28410017978958785


Sentence 2501:  13%|█▎        | 2500/19985 [19:52<2:07:45,  2.28it/s]

Loss:  0.27528274569977074


Sentence 2751:  14%|█▍        | 2750/19985 [21:51<1:50:03,  2.61it/s]

Loss:  0.26908448738994245


Sentence 3001:  15%|█▌        | 3000/19985 [23:45<2:08:33,  2.20it/s]

Loss:  0.262743641245955


Sentence 3251:  16%|█▋        | 3250/19985 [25:44<2:44:30,  1.70it/s]

Loss:  0.2566874595301656


Sentence 3501:  18%|█▊        | 3500/19985 [27:43<2:29:13,  1.84it/s]

Loss:  0.251341219143942


Sentence 3751:  19%|█▉        | 3750/19985 [29:51<2:34:37,  1.75it/s]

Loss:  0.24669198332404096


Sentence 1:  20%|██        | 3997/19985 [33:59<2:34:49,  1.72it/s]   

Epoch: 0 | AUS: 72.64 %


Sentence 251:  21%|██▏       | 4247/19985 [36:01<1:35:06,  2.76it/s]

Loss:  0.1740423187315464


Sentence 501:  23%|██▎       | 4497/19985 [37:51<2:08:42,  2.01it/s]

Loss:  0.15747541515156627


Sentence 751:  24%|██▍       | 4747/19985 [39:54<2:15:54,  1.87it/s]

Loss:  0.16005256711598487


Sentence 1001:  25%|██▌       | 4997/19985 [41:53<2:03:06,  2.03it/s]

Loss:  0.1565685399924405


Sentence 1251:  26%|██▋       | 5247/19985 [43:55<2:12:39,  1.85it/s]

Loss:  0.15513406617250294


Sentence 1501:  28%|██▊       | 5497/19985 [46:02<1:38:55,  2.44it/s]

Loss:  0.15204200197496295


Sentence 1751:  29%|██▉       | 5747/19985 [48:05<2:04:49,  1.90it/s]

Loss:  0.1493483151722633


Sentence 2001:  30%|███       | 5997/19985 [50:04<2:04:40,  1.87it/s]

Loss:  0.148144825120311


Sentence 2251:  31%|███▏      | 6247/19985 [52:11<2:55:22,  1.31it/s]

Loss:  0.1460986081080563


Sentence 2501:  33%|███▎      | 6497/19985 [54:11<1:39:21,  2.26it/s]

Loss:  0.14398101673063357


Sentence 2751:  34%|███▍      | 6747/19985 [56:10<1:23:01,  2.66it/s]

Loss:  0.14358127325212328


Sentence 3001:  35%|███▌      | 6997/19985 [58:06<1:39:34,  2.17it/s]

Loss:  0.14186778302907865


Sentence 3251:  36%|███▋      | 7247/19985 [1:00:07<2:06:28,  1.68it/s]

Loss:  0.1398969835027372


Sentence 3501:  38%|███▊      | 7497/19985 [1:02:05<1:48:31,  1.92it/s]

Loss:  0.13822644753828978


Sentence 3751:  39%|███▉      | 7747/19985 [1:04:12<1:55:02,  1.77it/s]

Loss:  0.1369628474404104


Sentence 1:  40%|████      | 7994/19985 [1:08:19<1:56:58,  1.71it/s]   

Epoch: 1 | AUS: 74.63 %


Sentence 251:  41%|████▏     | 8244/19985 [1:10:21<1:13:50,  2.65it/s]

Loss:  0.11559729740116745


Sentence 501:  43%|████▎     | 8494/19985 [1:12:11<1:36:16,  1.99it/s]

Loss:  0.09890669417695608


Sentence 751:  44%|████▍     | 8744/19985 [1:14:13<1:39:23,  1.88it/s]

Loss:  0.10246289778522139


Sentence 1001:  45%|████▌     | 8994/19985 [1:16:12<1:31:59,  1.99it/s]

Loss:  0.09990316938744218


Sentence 1251:  46%|████▋     | 9244/19985 [1:18:14<1:36:52,  1.85it/s]

Loss:  0.09823464306249516


Sentence 1501:  48%|████▊     | 9494/19985 [1:20:19<1:09:42,  2.51it/s]

Loss:  0.09553783697572604


Sentence 1751:  49%|████▉     | 9744/19985 [1:22:22<1:28:26,  1.93it/s]

Loss:  0.09398123332393879


Sentence 2001:  50%|█████     | 9994/19985 [1:24:20<1:27:25,  1.90it/s]

Loss:  0.09289333879136939


Sentence 2251:  51%|█████▏    | 10244/19985 [1:26:26<2:04:40,  1.30it/s]

Loss:  0.09158980080072554


Sentence 2501:  53%|█████▎    | 10494/19985 [1:28:25<1:08:34,  2.31it/s]

Loss:  0.0904287521949489


Sentence 2751:  54%|█████▍    | 10744/19985 [1:30:24<58:31,  2.63it/s]

Loss:  0.09035865828233892


Sentence 3001:  55%|█████▌    | 10994/19985 [1:32:19<1:07:59,  2.20it/s]

Loss:  0.08879996183223436


Sentence 3251:  56%|█████▋    | 11244/19985 [1:34:19<1:25:37,  1.70it/s]

Loss:  0.08724035817452666


Sentence 3501:  58%|█████▊    | 11494/19985 [1:36:17<1:15:32,  1.87it/s]

Loss:  0.08597094648573278


Sentence 3751:  59%|█████▉    | 11744/19985 [1:38:25<1:16:22,  1.80it/s]

Loss:  0.08504406225406273


Sentence 1:  60%|██████    | 11991/19985 [1:42:32<1:15:40,  1.76it/s]   

Epoch: 2 | AUS: 76.89 %


Sentence 251:  61%|██████▏   | 12241/19985 [1:44:34<47:27,  2.72it/s]

Loss:  0.07329105077017448


Sentence 501:  63%|██████▎   | 12491/19985 [1:46:24<1:01:02,  2.05it/s]

Loss:  0.061166460182717854


Sentence 751:  64%|██████▍   | 12741/19985 [1:48:27<1:03:56,  1.89it/s]

Loss:  0.060617596162551


Sentence 1001:  65%|██████▌   | 12991/19985 [1:50:26<57:02,  2.04it/s]

Loss:  0.06038444536702082


Sentence 1251:  66%|██████▋   | 13241/19985 [1:52:27<1:00:08,  1.87it/s]

Loss:  0.060739420297194736


Sentence 1501:  68%|██████▊   | 13491/19985 [1:54:34<47:27,  2.28it/s]

Loss:  0.06062805778237816


Sentence 1751:  69%|██████▉   | 13741/19985 [1:56:37<54:08,  1.92it/s]

Loss:  0.05991397633160107


Sentence 2001:  70%|███████   | 13991/19985 [1:58:34<52:46,  1.89it/s]

Loss:  0.059439813832572325


Sentence 2251:  71%|███████▏  | 14241/19985 [2:00:40<1:13:30,  1.30it/s]

Loss:  0.0592017794390825


Sentence 2501:  73%|███████▎  | 14491/19985 [2:02:40<39:48,  2.30it/s]

Loss:  0.05884764594430016


Sentence 2751:  74%|███████▍  | 14741/19985 [2:04:39<33:40,  2.59it/s]

Loss:  0.05881335460367501


Sentence 3001:  75%|███████▌  | 14991/19985 [2:06:34<37:43,  2.21it/s]

Loss:  0.058003425056692016


Sentence 3251:  76%|███████▋  | 15241/19985 [2:08:34<46:50,  1.69it/s]

Loss:  0.05729255679311665


Sentence 3501:  78%|███████▊  | 15491/19985 [2:10:32<39:27,  1.90it/s]

Loss:  0.0565058097896269


Sentence 3751:  79%|███████▉  | 15741/19985 [2:12:40<41:23,  1.71it/s]

Loss:  0.055982586144047185


Sentence 1:  80%|████████  | 15988/19985 [2:16:47<37:51,  1.76it/s]   

Epoch: 3 | AUS: 76.60 %


Sentence 251:  81%|████████▏ | 16238/19985 [2:18:49<22:55,  2.72it/s]

Loss:  0.05487902237683738


Sentence 501:  83%|████████▎ | 16488/19985 [2:20:40<29:02,  2.01it/s]

Loss:  0.04517757301399615


Sentence 751:  84%|████████▍ | 16738/19985 [2:22:42<29:49,  1.81it/s]

Loss:  0.04609660120690629


Sentence 1001:  85%|████████▌ | 16988/19985 [2:24:42<24:45,  2.02it/s]

Loss:  0.04452973360406395


Sentence 1251:  86%|████████▋ | 17238/19985 [2:26:43<24:19,  1.88it/s]

Loss:  0.0449705250099687


Sentence 1501:  88%|████████▊ | 17488/19985 [2:28:50<16:47,  2.48it/s]

Loss:  0.044526616228390443


Sentence 1751:  89%|████████▉ | 17738/19985 [2:30:53<19:46,  1.89it/s]

Loss:  0.04317274301802341


Sentence 2001:  90%|█████████ | 17988/19985 [2:32:51<17:59,  1.85it/s]

Loss:  0.04344271285698244


Sentence 2251:  91%|█████████▏| 18238/19985 [2:34:58<22:17,  1.31it/s]

Loss:  0.04314271591526075


Sentence 2501:  93%|█████████▎| 18488/19985 [2:36:58<10:45,  2.32it/s]

Loss:  0.042372205706861586


Sentence 2751:  94%|█████████▍| 18738/19985 [2:38:56<07:56,  2.62it/s]

Loss:  0.042270012396517814


Sentence 3001:  95%|█████████▌| 18988/19985 [2:40:52<07:41,  2.16it/s]

Loss:  0.041835642813666786


Sentence 3251:  96%|█████████▋| 19238/19985 [2:42:52<07:28,  1.67it/s]

Loss:  0.04108536047067866


Sentence 3501:  98%|█████████▊| 19488/19985 [2:44:54<04:39,  1.78it/s]

Loss:  0.04040570567264831


Sentence 3751:  99%|█████████▉| 19738/19985 [2:47:02<02:17,  1.80it/s]

Loss:  0.040488850576418496


Sentence 3997: 100%|██████████| 19985/19985 [2:51:09<00:00,  1.95it/s]

Epoch: 4 | AUS: 76.86 %





In [None]:
dep_train_cn_data = Dataset("ud-treebanks-v2.5/UD_Chinese-GSD/zh_gsd-ud-train.conllu") #3997
dep_dev_cn_data = Dataset("ud-treebanks-v2.5/UD_Chinese-GSD/zh_gsd-ud-dev.conllu")
model_BB = train_model(dep_train_cn_data, dep_dev_cn_data) #UAS:
#word_vocab, tag_vocab = make_vocabs_head_tagger(dep_train_svk_data)
#print('{:.4f}'.format(uas_tst(model_BB, word_vocab, tag_vocab, dep_dev_svk_data))) #71% |(1e-3)78.82 (2000) |

Sentence 251:   1%|▏         | 250/19985 [02:01<1:57:37,  2.80it/s]

Loss:  0.5547375439405441


Sentence 501:   3%|▎         | 500/19985 [03:49<2:36:23,  2.08it/s]

Loss:  0.4341847336702049


Sentence 751:   4%|▍         | 750/19985 [05:49<2:52:32,  1.86it/s]

Loss:  0.3901776408478618


Sentence 1001:   5%|▌         | 1000/19985 [07:45<2:30:46,  2.10it/s]

Loss:  0.3589687773287296


Sentence 1251:   6%|▋         | 1250/19985 [09:46<2:44:32,  1.90it/s]

Loss:  0.34058809508830307


Sentence 1501:   8%|▊         | 1500/19985 [11:53<2:02:43,  2.51it/s]

Loss:  0.3235423563197255


Sentence 1751:   9%|▉         | 1750/19985 [13:56<2:38:48,  1.91it/s]

Loss:  0.309563197478652


Sentence 2001:  10%|█         | 2000/19985 [15:53<2:38:56,  1.89it/s]

Loss:  0.29931217243522407


Sentence 2251:  11%|█▏        | 2250/19985 [17:59<3:51:30,  1.28it/s]

Loss:  0.2899353385178579


Sentence 2501:  13%|█▎        | 2500/19985 [19:59<2:07:08,  2.29it/s]

Loss:  0.28098566380888224


Sentence 2751:  14%|█▍        | 2750/19985 [21:59<1:47:13,  2.68it/s]

Loss:  0.27517710781910204


Sentence 3001:  15%|█▌        | 3000/19985 [23:54<2:13:53,  2.11it/s]

Loss:  0.2690540498327464


Sentence 3251:  16%|█▋        | 3250/19985 [25:54<2:50:55,  1.63it/s]

Loss:  0.26323647574134745


Sentence 3501:  18%|█▊        | 3500/19985 [27:52<2:26:05,  1.88it/s]

Loss:  0.25818228298744983


Sentence 3751:  19%|█▉        | 3750/19985 [30:00<2:32:45,  1.77it/s]

Loss:  0.2541632487838467


Sentence 1:  20%|██        | 3997/19985 [34:09<2:32:37,  1.75it/s]   

Epoch: 0 | AUS: 72.94 %


Sentence 251:  21%|██▏       | 4247/19985 [36:11<1:37:56,  2.68it/s]

Loss:  0.17603512149490416


Sentence 501:  23%|██▎       | 4497/19985 [38:02<2:08:38,  2.01it/s]

Loss:  0.16170094054937362


Sentence 751:  24%|██▍       | 4747/19985 [40:05<2:17:10,  1.85it/s]

Loss:  0.16445111471678442


Sentence 1001:  25%|██▌       | 4997/19985 [42:06<2:05:59,  1.98it/s]

Loss:  0.1615638423395576


Sentence 1251:  26%|██▋       | 5247/19985 [44:08<2:11:02,  1.87it/s]

Loss:  0.16086608552066609


Sentence 1501:  28%|██▊       | 5497/19985 [46:15<1:36:39,  2.50it/s]

Loss:  0.1577804372496127


Sentence 1751:  29%|██▉       | 5747/19985 [48:19<2:09:33,  1.83it/s]

Loss:  0.15522046115768276


Sentence 2001:  30%|███       | 5997/19985 [50:18<2:01:57,  1.91it/s]

Loss:  0.15371503091510386


Sentence 2251:  31%|███▏      | 6247/19985 [52:25<3:03:19,  1.25it/s]

Loss:  0.15113087318258153


Sentence 2501:  33%|███▎      | 6497/19985 [54:26<1:42:10,  2.20it/s]

Loss:  0.14931027829181404


Sentence 2751:  34%|███▍      | 6747/19985 [56:25<1:23:17,  2.65it/s]

Loss:  0.14874708154221827


Sentence 3001:  35%|███▌      | 6997/19985 [58:21<1:38:22,  2.20it/s]

Loss:  0.14731855720778306


Sentence 3251:  36%|███▋      | 7247/19985 [1:00:21<2:08:59,  1.65it/s]

Loss:  0.1455707276635445


Sentence 3501:  38%|███▊      | 7497/19985 [1:02:19<1:49:16,  1.90it/s]

Loss:  0.1441118576372641


Sentence 3751:  39%|███▉      | 7747/19985 [1:04:28<1:54:13,  1.79it/s]

Loss:  0.14338613886265084


Sentence 1:  40%|████      | 7994/19985 [1:08:38<1:55:07,  1.74it/s]   

Epoch: 1 | AUS: 75.56 %


Sentence 251:  41%|████▏     | 8244/19985 [1:10:40<1:11:55,  2.72it/s]

Loss:  0.11740857599908486


Sentence 501:  43%|████▎     | 8494/19985 [1:12:31<1:34:34,  2.03it/s]

Loss:  0.1035697184462333


Sentence 751:  44%|████▍     | 8744/19985 [1:14:34<1:39:52,  1.88it/s]

Loss:  0.10594639354604685


Sentence 1001:  45%|████▌     | 8994/19985 [1:16:35<1:30:28,  2.02it/s]

Loss:  0.10334246942088066


Sentence 1251:  46%|████▋     | 9244/19985 [1:18:38<1:35:43,  1.87it/s]

Loss:  0.10149251653217943


Sentence 1501:  48%|████▊     | 9494/19985 [1:20:45<1:10:41,  2.47it/s]

Loss:  0.09870693612609951


Sentence 1751:  49%|████▉     | 9744/19985 [1:22:48<1:30:03,  1.90it/s]

Loss:  0.09755468439870414


Sentence 2001:  50%|█████     | 9994/19985 [1:24:48<1:28:02,  1.89it/s]

Loss:  0.09650424780208414


Sentence 2251:  51%|█████▏    | 10244/19985 [1:26:55<2:05:39,  1.29it/s]

Loss:  0.09498384552991612


Sentence 2501:  53%|█████▎    | 10494/19985 [1:28:55<1:10:34,  2.24it/s]

Loss:  0.0941609203332162


Sentence 2751:  54%|█████▍    | 10744/19985 [1:30:55<59:03,  2.61it/s]

Loss:  0.09392569944064069


Sentence 3001:  55%|█████▌    | 10994/19985 [1:32:51<1:09:53,  2.14it/s]

Loss:  0.09245230504842766


Sentence 3251:  56%|█████▋    | 11244/19985 [1:34:52<1:26:35,  1.68it/s]

Loss:  0.0912427509850688


Sentence 3501:  58%|█████▊    | 11494/19985 [1:36:51<1:18:49,  1.80it/s]

Loss:  0.09000865046820088


Sentence 3751:  59%|█████▉    | 11744/19985 [1:38:59<1:16:38,  1.79it/s]

Loss:  0.08969224938963308


Sentence 1:  60%|██████    | 11991/19985 [1:43:08<1:16:21,  1.74it/s]   

Epoch: 2 | AUS: 76.33 %


Sentence 251:  61%|██████▏   | 12241/19985 [1:45:10<46:45,  2.76it/s]

Loss:  0.07474843831581529


Sentence 501:  63%|██████▎   | 12491/19985 [1:47:01<1:00:50,  2.05it/s]

Loss:  0.06254128598818352


Sentence 751:  64%|██████▍   | 12741/19985 [1:49:04<1:04:30,  1.87it/s]

Loss:  0.06357782893691426


Sentence 1001:  65%|██████▌   | 12991/19985 [1:51:05<57:56,  2.01it/s]

Loss:  0.062084426467135925


Sentence 1251:  66%|██████▋   | 13241/19985 [1:53:07<1:01:24,  1.83it/s]

Loss:  0.06118587235600862


Sentence 1501:  68%|██████▊   | 13491/19985 [1:55:15<48:16,  2.24it/s]

Loss:  0.060607244989851705


Sentence 1751:  69%|██████▉   | 13741/19985 [1:57:20<54:57,  1.89it/s]

Loss:  0.060144481808942926


Sentence 2001:  70%|███████   | 13991/19985 [1:59:20<53:24,  1.87it/s]

Loss:  0.06005730341955132


Sentence 2251:  71%|███████▏  | 14241/19985 [2:01:27<1:15:18,  1.27it/s]

Loss:  0.059402366873517166


Sentence 2501:  73%|███████▎  | 14491/19985 [2:03:29<41:34,  2.20it/s]

Loss:  0.059426523089551486


Sentence 2751:  74%|███████▍  | 14741/19985 [2:05:30<34:17,  2.55it/s]

Loss:  0.059579788974555084


Sentence 3001:  75%|███████▌  | 14991/19985 [2:07:26<37:37,  2.21it/s]

Loss:  0.05886525842798983


Sentence 3251:  76%|███████▋  | 15241/19985 [2:09:26<48:07,  1.64it/s]

Loss:  0.0582179109630776


Sentence 3501:  78%|███████▊  | 15491/19985 [2:11:25<40:20,  1.86it/s]

Loss:  0.0576488362393502


Sentence 3751:  79%|███████▉  | 15741/19985 [2:13:36<42:02,  1.68it/s]

Loss:  0.05761480840546453


Sentence 1:  80%|████████  | 15988/19985 [2:17:45<38:31,  1.73it/s]   

Epoch: 3 | AUS: 75.66 %


Sentence 251:  81%|████████▏ | 16238/19985 [2:19:48<22:44,  2.75it/s]

Loss:  0.05863592389363294


Sentence 501:  83%|████████▎ | 16488/19985 [2:21:40<28:47,  2.02it/s]

Loss:  0.04678292124208292


Sentence 751:  84%|████████▍ | 16738/19985 [2:23:44<30:02,  1.80it/s]

Loss:  0.04627394819437359


Sentence 1001:  85%|████████▌ | 16988/19985 [2:25:44<24:25,  2.05it/s]

Loss:  0.04606367525527594


Sentence 1251:  86%|████████▋ | 17238/19985 [2:27:47<25:17,  1.81it/s]

Loss:  0.0459936815258141


Sentence 1501:  88%|████████▊ | 17488/19985 [2:29:54<17:02,  2.44it/s]

Loss:  0.044834005486662436


Sentence 1751:  89%|████████▉ | 17738/19985 [2:31:58<19:37,  1.91it/s]

Loss:  0.04418237126825235


Sentence 2001:  90%|█████████ | 17988/19985 [2:33:57<17:50,  1.87it/s]

Loss:  0.04375300561868858


Sentence 2251:  91%|█████████▏| 18238/19985 [2:36:04<22:37,  1.29it/s]

Loss:  0.04339116001455012


Sentence 2501:  93%|█████████▎| 18488/19985 [2:38:05<11:12,  2.23it/s]

Loss:  0.043104757549316676


Sentence 2751:  94%|█████████▍| 18738/19985 [2:40:05<07:59,  2.60it/s]

Loss:  0.04372502814516983


Sentence 3001:  95%|█████████▌| 18988/19985 [2:42:01<07:45,  2.14it/s]

Loss:  0.04368892354011632


Sentence 3251:  96%|█████████▋| 19238/19985 [2:44:02<07:40,  1.62it/s]

Loss:  0.04298525741101115


Sentence 3501:  98%|█████████▊| 19488/19985 [2:46:01<04:27,  1.86it/s]

Loss:  0.042714012193468466


Sentence 3751:  99%|█████████▉| 19738/19985 [2:48:11<02:21,  1.75it/s]

Loss:  0.0425251116497219


Sentence 3997: 100%|██████████| 19985/19985 [2:52:21<00:00,  1.93it/s]

Epoch: 4 | AUS: 76.20 %





In [None]:
dep_train_english_data = Dataset("ud-treebanks-v2.5/UD_English-GUM/en_gum-ud-train.conllu") #3753 sentences
dep_dev_english_data = Dataset("ud-treebanks-v2.5/UD_English-GUM/en_gum-ud-dev.conllu")
model_BB = train_model(dep_train_english_data, dep_dev_english_data) #UAS: 0.71 | now: 72.95 | 76.66 (lr -3) |81.50% (2x2000)
#word_vocab, tag_vocab = make_vocabs_head_tagger(dep_train_english_data)
#print('{:.4f}'.format(uas_tst(model_BB, word_vocab, tag_vocab, dep_dev_english_data)))

Sentence 251:   1%|▏         | 250/18765 [02:07<1:47:02,  2.88it/s]

Loss:  0.47540055483579635


Sentence 501:   3%|▎         | 500/18765 [04:22<2:18:45,  2.19it/s]

Loss:  0.37458727664966135


Sentence 751:   4%|▍         | 750/18765 [06:05<1:49:15,  2.75it/s]

Loss:  0.32802767768575963


Sentence 1002:   5%|▌         | 1001/18765 [07:32<1:18:03,  3.79it/s]

Loss:  0.287286238661065


Sentence 1251:   7%|▋         | 1250/18765 [08:42<1:20:34,  3.62it/s]

Loss:  0.2581784121117322


Sentence 1501:   8%|▊         | 1500/18765 [10:17<2:10:24,  2.21it/s]

Loss:  0.24856770227686503


Sentence 1751:   9%|▉         | 1750/18765 [11:45<1:53:35,  2.50it/s]

Loss:  0.2381202198035649


Sentence 2001:  11%|█         | 2000/18765 [13:19<1:04:46,  4.31it/s]

Loss:  0.2316162664155563


Sentence 2252:  12%|█▏        | 2251/18765 [15:03<1:48:29,  2.54it/s]

Loss:  0.22797479871405651


Sentence 2501:  13%|█▎        | 2500/18765 [17:03<2:32:21,  1.78it/s]

Loss:  0.22467290074779886


Sentence 2751:  15%|█▍        | 2750/18765 [18:51<2:56:10,  1.52it/s]

Loss:  0.2250631017584449


Sentence 3002:  16%|█▌        | 3001/18765 [20:29<1:16:30,  3.43it/s]

Loss:  0.2212558494562448


Sentence 3251:  17%|█▋        | 3250/18765 [22:04<1:39:44,  2.59it/s]

Loss:  0.2171173227444345


Sentence 3501:  19%|█▊        | 3500/18765 [23:38<1:26:26,  2.94it/s]

Loss:  0.21208625533893272


Sentence 3751:  20%|█▉        | 3750/18765 [25:04<1:32:33,  2.70it/s]

Loss:  0.2050746192370231


Sentence 2:  20%|██        | 3754/18765 [27:42<197:33:12, 47.38s/it]

Epoch: 0 | AUS: 80.23 %


Sentence 251:  21%|██▏       | 4003/18765 [29:52<1:26:10,  2.85it/s]

Loss:  0.15709746051271214


Sentence 501:  23%|██▎       | 4253/18765 [32:09<1:55:54,  2.09it/s]

Loss:  0.1516620014691871


Sentence 751:  24%|██▍       | 4503/18765 [33:53<1:23:56,  2.83it/s]

Loss:  0.146919145592304


Sentence 1001:  25%|██▌       | 4753/18765 [35:21<1:09:10,  3.38it/s]

Loss:  0.13313138713952502


Sentence 1251:  27%|██▋       | 5003/18765 [36:33<1:04:43,  3.54it/s]

Loss:  0.12158709066389711


Sentence 1501:  28%|██▊       | 5253/18765 [38:10<1:41:58,  2.21it/s]

Loss:  0.12160910210601408


Sentence 1751:  29%|██▉       | 5503/18765 [39:38<1:31:24,  2.42it/s]

Loss:  0.11929685030706605


Sentence 2001:  31%|███       | 5753/18765 [41:12<51:52,  4.18it/s]

Loss:  0.11970433350283838


Sentence 2252:  32%|███▏      | 6004/18765 [42:58<1:23:27,  2.55it/s]

Loss:  0.12093014350833174


Sentence 2501:  33%|███▎      | 6253/18765 [44:58<1:58:01,  1.77it/s]

Loss:  0.12172744016367579


Sentence 2751:  35%|███▍      | 6503/18765 [46:49<2:22:23,  1.44it/s]

Loss:  0.12387983348616928


Sentence 3002:  36%|███▌      | 6754/18765 [48:26<57:01,  3.51it/s]

Loss:  0.12306395215216147


Sentence 3251:  37%|███▋      | 7003/18765 [50:00<1:13:29,  2.67it/s]

Loss:  0.1216516215945559


Sentence 3501:  39%|███▊      | 7253/18765 [51:33<1:05:28,  2.93it/s]

Loss:  0.11940589567998508


Sentence 3751:  40%|███▉      | 7503/18765 [52:59<1:11:54,  2.61it/s]

Loss:  0.11608725516774646


Sentence 2:  40%|████      | 7507/18765 [55:35<146:46:20, 46.93s/it]

Epoch: 1 | AUS: 81.59 %


Sentence 251:  41%|████▏     | 7756/18765 [57:46<1:03:03,  2.91it/s]

Loss:  0.10176421561700226


Sentence 501:  43%|████▎     | 8006/18765 [1:00:03<1:22:27,  2.17it/s]

Loss:  0.09866449984749591


Sentence 751:  44%|████▍     | 8256/18765 [1:01:48<1:03:18,  2.77it/s]

Loss:  0.09365582275881457


Sentence 1002:  45%|████▌     | 8507/18765 [1:03:19<47:43,  3.58it/s]

Loss:  0.08434849263349332


Sentence 1251:  47%|████▋     | 8756/18765 [1:04:32<49:35,  3.36it/s]

Loss:  0.0768138122959182


Sentence 1501:  48%|████▊     | 9006/18765 [1:06:10<1:11:56,  2.26it/s]

Loss:  0.07639718668108461


Sentence 1751:  49%|████▉     | 9256/18765 [1:07:38<1:02:10,  2.55it/s]

Loss:  0.07554086597237233


Sentence 2001:  51%|█████     | 9506/18765 [1:09:13<36:16,  4.25it/s]

Loss:  0.0757319251295381


Sentence 2252:  52%|█████▏    | 9757/18765 [1:10:57<58:33,  2.56it/s]

Loss:  0.07725322050202243


Sentence 2501:  53%|█████▎    | 10006/18765 [1:12:56<1:21:01,  1.80it/s]

Loss:  0.07833568684331862


Sentence 2751:  55%|█████▍    | 10256/18765 [1:14:45<1:33:54,  1.51it/s]

Loss:  0.07946697562767055


Sentence 3002:  56%|█████▌    | 10507/18765 [1:16:24<39:41,  3.47it/s]

Loss:  0.0787133163295422


Sentence 3251:  57%|█████▋    | 10756/18765 [1:18:00<52:08,  2.56it/s]

Loss:  0.07788453895385275


Sentence 3501:  59%|█████▊    | 11006/18765 [1:19:36<44:31,  2.90it/s]

Loss:  0.07613504864215993


Sentence 3751:  60%|█████▉    | 11256/18765 [1:21:03<46:33,  2.69it/s]

Loss:  0.0740065568126802


Sentence 2:  60%|██████    | 11260/18765 [1:23:42<99:15:01, 47.61s/it]

Epoch: 2 | AUS: 80.36 %


Sentence 251:  61%|██████▏   | 11509/18765 [1:25:54<42:48,  2.82it/s]

Loss:  0.07085582476904938


Sentence 501:  63%|██████▎   | 11759/18765 [1:28:13<56:02,  2.08it/s]

Loss:  0.06754750394266808


Sentence 751:  64%|██████▍   | 12009/18765 [1:30:00<40:44,  2.76it/s]

Loss:  0.06118689433973896


Sentence 1002:  65%|██████▌   | 12260/18765 [1:31:31<30:09,  3.59it/s]

Loss:  0.054213222448917464


Sentence 1251:  67%|██████▋   | 12509/18765 [1:32:46<31:35,  3.30it/s]

Loss:  0.050407887470587595


Sentence 1501:  68%|██████▊   | 12759/18765 [1:34:25<45:46,  2.19it/s]

Loss:  0.05013426886064355


Sentence 1751:  69%|██████▉   | 13009/18765 [1:35:55<38:03,  2.52it/s]

Loss:  0.049109211124474354


Sentence 2001:  71%|███████   | 13259/18765 [1:37:30<21:35,  4.25it/s]

Loss:  0.04947246637172193


Sentence 2252:  72%|███████▏  | 13510/18765 [1:39:16<35:18,  2.48it/s]

Loss:  0.050516228144665366


Sentence 2501:  73%|███████▎  | 13759/18765 [1:41:17<51:08,  1.63it/s]

Loss:  0.051701283529555324


Sentence 2751:  75%|███████▍  | 14009/18765 [1:43:10<52:59,  1.50it/s]

Loss:  0.052704263776522875


Sentence 3002:  76%|███████▌  | 14260/18765 [1:44:49<22:03,  3.40it/s]

Loss:  0.052413112888722364


Sentence 3251:  77%|███████▋  | 14509/18765 [1:46:26<26:49,  2.65it/s]

Loss:  0.05174249782336173


Sentence 3501:  79%|███████▊  | 14759/18765 [1:48:00<22:46,  2.93it/s]

Loss:  0.05091616306441735


Sentence 3751:  80%|███████▉  | 15009/18765 [1:49:27<23:16,  2.69it/s]

Loss:  0.049738523742088335


Sentence 2:  80%|████████  | 15013/18765 [1:52:08<50:08:06, 48.10s/it]

Epoch: 3 | AUS: 81.86 %


Sentence 251:  81%|████████▏ | 15262/18765 [1:54:19<20:36,  2.83it/s]

Loss:  0.049708128386011595


Sentence 501:  83%|████████▎ | 15512/18765 [1:56:38<25:41,  2.11it/s]

Loss:  0.04764850045937283


Sentence 751:  84%|████████▍ | 15762/18765 [1:58:25<18:17,  2.74it/s]

Loss:  0.043252595207777376


Sentence 1002:  85%|████████▌ | 16013/18765 [1:59:56<12:35,  3.64it/s]

Loss:  0.03785480542451825


Sentence 1251:  87%|████████▋ | 16262/18765 [2:01:10<12:10,  3.42it/s]

Loss:  0.03527276894540917


Sentence 1501:  88%|████████▊ | 16512/18765 [2:02:48<17:13,  2.18it/s]

Loss:  0.035537506574063564


Sentence 1751:  89%|████████▉ | 16762/18765 [2:04:16<13:01,  2.56it/s]

Loss:  0.03470848441783843


Sentence 2001:  91%|█████████ | 17012/18765 [2:05:51<06:54,  4.23it/s]

Loss:  0.03492248279257559


Sentence 2252:  92%|█████████▏| 17263/18765 [2:07:37<09:57,  2.51it/s]

Loss:  0.03627175835434063


Sentence 2501:  93%|█████████▎| 17512/18765 [2:09:37<12:43,  1.64it/s]

Loss:  0.03715523957722058


Sentence 2751:  95%|█████████▍| 17762/18765 [2:11:28<11:19,  1.48it/s]

Loss:  0.037166944194619446


Sentence 3002:  96%|█████████▌| 18013/18765 [2:13:07<02:57,  4.24it/s]

Loss:  0.03715865582486641


Sentence 3251:  97%|█████████▋| 18262/18765 [2:14:43<03:11,  2.62it/s]

Loss:  0.037161517033872396


Sentence 3501:  99%|█████████▊| 18512/18765 [2:16:19<01:27,  2.89it/s]

Loss:  0.036555856259566206


Sentence 3751: 100%|█████████▉| 18762/18765 [2:17:45<00:01,  2.67it/s]

Loss:  0.03566186669803062


Sentence 3753: 100%|██████████| 18765/18765 [2:20:22<00:00,  2.23it/s]

Epoch: 4 | AUS: 82.66 %





In [None]:
dep_train_arab_data = Dataset("ud-treebanks-v2.5/UD_Arabic-PADT/ar_padt-ud-train.conllu") #6075 senteces
dep_dev_arab_data = Dataset("ud-treebanks-v2.5/UD_Arabic-PADT/ar_padt-ud-dev.conllu")
model_BB = train_model(dep_train_arab_data, dep_dev_arab_data) #UAS:
#word_vocab, tag_vocab = make_vocabs_head_tagger(dep_train_arab_data)
#print('{:.4f}'.format(uas_tst(model_BB, word_vocab, tag_vocab, dep_dev_arab_data))) #70.62% | now: 73.58% |(1e-3) 74.35 (2000) | 79.36%(16150)

Sentence 2:   0%|          | 1/4000 [00:00<21:02,  3.17it/s]

Loss: 1.0983740091323853


Sentence 252:   6%|▋         | 251/4000 [02:51<35:39,  1.75it/s]

Loss: 0.25778454542160034


Sentence 502:  13%|█▎        | 501/4000 [05:42<41:46,  1.40it/s]

Loss: 0.19700010120868683


Sentence 752:  19%|█▉        | 751/4000 [08:22<31:06,  1.74it/s]

Loss: 0.3189098834991455


Sentence 1002:  25%|██▌       | 1001/4000 [11:08<30:44,  1.63it/s]

Loss: 0.16002917289733887


Sentence 1252:  31%|███▏      | 1251/4000 [13:19<31:01,  1.48it/s]

Loss: 0.3687783479690552


Sentence 1503:  38%|███▊      | 1502/4000 [15:42<08:48,  4.73it/s]

Loss: 0.045794229954481125


Sentence 1752:  44%|████▍     | 1751/4000 [19:11<45:15,  1.21s/it]

Loss: 0.4014382064342499


Sentence 2002:  50%|█████     | 2001/4000 [22:46<18:13,  1.83it/s]

Loss: 0.22065836191177368


Sentence 2252:  56%|█████▋    | 2251/4000 [27:06<38:22,  1.32s/it]

Loss: 0.34691348671913147


Sentence 2502:  63%|██████▎   | 2501/4000 [31:29<17:27,  1.43it/s]

Loss: 0.1975989043712616


Sentence 2752:  69%|██████▉   | 2751/4000 [35:00<16:48,  1.24it/s]

Loss: 0.2710994482040405


Sentence 3002:  75%|███████▌  | 3001/4000 [37:53<19:23,  1.17s/it]

Loss: 0.34668880701065063


Sentence 3252:  81%|████████▏ | 3251/4000 [41:14<12:19,  1.01it/s]

Loss: 0.2672382593154907


Sentence 3502:  88%|████████▊ | 3501/4000 [44:01<02:44,  3.03it/s]

Loss: 0.007796199060976505


Sentence 3752:  94%|█████████▍| 3751/4000 [46:51<02:01,  2.05it/s]

Loss: 0.09407507628202438


Sentence 4002: : 4001it [50:20,  1.76it/s]

Loss: 0.2603321969509125


Sentence 4252: : 4251it [53:15,  1.71it/s]

Loss: 0.1254045069217682


Sentence 4502: : 4501it [56:07,  1.37it/s]

Loss: 0.1334618330001831


Sentence 4752: : 4751it [58:51,  1.66it/s]

Loss: 0.3083362281322479


Sentence 5002: : 5001it [1:01:42,  1.56it/s]

Loss: 0.12268555164337158


Sentence 5252: : 5251it [1:03:57,  1.45it/s]

Loss: 0.2366122305393219


Sentence 5503: : 5502it [1:06:22,  4.53it/s]

Loss: 0.009067932143807411


Sentence 5752: : 5751it [1:09:53,  1.20s/it]

Loss: 0.2850192189216614


Sentence 6002: : 6001it [1:13:29,  1.78it/s]

Loss: 0.1630689799785614


Sentence 6252: : 6251it [1:17:51,  1.33s/it]

Loss: 0.2294647991657257


Sentence 6502: : 6501it [1:22:11,  1.44it/s]

Loss: 0.12771251797676086


Sentence 6752: : 6751it [1:25:42,  1.25it/s]

Loss: 0.20736552774906158


Sentence 7002: : 7001it [1:28:34,  1.15s/it]

Loss: 0.3046208322048187


Sentence 7252: : 7251it [1:31:55,  1.01s/it]

Loss: 0.20371520519256592


Sentence 7502: : 7501it [1:34:42,  3.00it/s]

Loss: 0.0036650430411100388


Sentence 7752: : 7751it [1:37:33,  2.05it/s]

Loss: 0.07097461074590683


Sentence 8002: : 8001it [1:41:02,  1.45it/s]

Loss: 0.2096313089132309


Sentence 8252: : 8251it [1:44:20,  1.33it/s]

Loss: 1.047959327697754


Sentence 8502: : 8501it [1:47:20,  2.29it/s]

Loss: 0.00570244574919343


Sentence 8752: : 8751it [1:50:54,  2.06it/s]

Loss: 0.06990441679954529


Sentence 9002: : 9001it [1:53:39,  1.72it/s]

Loss: 0.14258192479610443


Sentence 9252: : 9251it [1:56:06,  1.52it/s]

Loss: 0.06731906533241272


Sentence 9502: : 9501it [1:58:34,  1.89it/s]

Loss: 0.20173297822475433


Sentence 9752: : 9751it [2:01:08,  1.99it/s]

Loss: 0.14360758662223816


Sentence 10002: : 10001it [2:03:47,  1.52it/s]

Loss: 0.29624491930007935


Sentence 10252: : 10251it [2:06:32,  1.54it/s]

Loss: 0.21832315623760223


Sentence 10502: : 10501it [2:09:29,  1.67it/s]

Loss: 0.08445069938898087


Sentence 10752: : 10751it [2:12:16,  1.48it/s]

Loss: 0.13525211811065674


Sentence 11002: : 11001it [2:15:00,  1.49it/s]

Loss: 0.01509829517453909


Sentence 11252: : 11251it [2:17:35,  3.82it/s]

Loss: 0.08670779317617416


Sentence 11502: : 11501it [2:20:03,  1.58it/s]

Loss: 0.09865832328796387


Sentence 11752: : 11751it [2:22:50,  1.02s/it]

Loss: 0.1141655296087265


Sentence 12002: : 12001it [2:26:26,  1.82it/s]

Loss: 0.10075794160366058


Sentence 12252: : 12251it [2:30:31,  1.29it/s]

Loss: 0.1975765973329544


Sentence 12502: : 12501it [2:35:07,  1.31s/it]

Loss: 0.2763630747795105


Sentence 12752: : 12751it [2:38:41,  2.33it/s]

Loss: 0.013890402391552925


Sentence 13002: : 13001it [2:41:31,  1.12it/s]

Loss: 0.28770866990089417


Sentence 13252: : 13251it [2:44:48,  1.30it/s]

Loss: 0.114558145403862


Sentence 13502: : 13501it [2:47:45,  2.01it/s]

Loss: 0.04166341572999954


Sentence 13752: : 13751it [2:50:24,  1.82it/s]

Loss: 0.18012374639511108


Sentence 14002: : 14001it [2:53:55,  1.17it/s]

Loss: 0.03327888250350952


Sentence 14252: : 14251it [2:56:59,  2.13it/s]

Loss: 0.0061399745754897594


Sentence 14502: : 14501it [3:00:11,  1.62it/s]

Loss: 0.3188972473144531


Sentence 14752: : 14751it [3:03:29,  1.34it/s]

Loss: 0.13297486305236816


Sentence 15002: : 15001it [3:06:35,  1.70it/s]

Loss: 0.17330019176006317


Sentence 15252: : 15251it [3:09:03,  2.10it/s]

Loss: 0.13577710092067719


Sentence 15502: : 15501it [3:11:27,  1.54it/s]

Loss: 0.18889813125133514


Sentence 15752: : 15751it [3:13:49,  1.67it/s]

Loss: 9.466408664593473e-05


Sentence 16002: : 16001it [3:16:35,  1.84it/s]

Loss: 0.06886310130357742


Sentence 16150: : 16150it [3:18:04,  1.36it/s]


0.7936


In [None]:
dep_train_esp_data = Dataset("ud-treebanks-v2.5/UD_Spanish-GSD/es_gsd-ud-train.conllu") #14187
dep_dev_esp_data = Dataset("ud-treebanks-v2.5/UD_Spanish-GSD/es_gsd-ud-dev.conllu")
model_BB = train_model(dep_train_esp_data, dep_dev_esp_data) #UAS:
#word_vocab, tag_vocab = make_vocabs_head_tagger(dep_train_esp_data)
#print('{:.4f}'.format(uas_tst(model_BB, word_vocab, tag_vocab, dep_dev_esp_data))) #72% | now: 80.11% | 81.26% (2000) |

Sentence 251:   0%|          | 250/70935 [02:46<15:16:00,  1.29it/s]

Loss:  0.44064745987951753


Sentence 501:   1%|          | 500/70935 [05:27<10:59:14,  1.78it/s]

Loss:  0.3461833526622504


Sentence 751:   1%|          | 750/70935 [07:56<13:32:08,  1.44it/s]

Loss:  0.3094776276126504


Sentence 1001:   1%|▏         | 1000/70935 [10:34<12:48:01,  1.52it/s]

Loss:  0.28476249831635503


Sentence 1251:   2%|▏         | 1250/70935 [12:55<10:32:28,  1.84it/s]

Loss:  0.2662609063392505


Sentence 1501:   2%|▏         | 1500/70935 [15:21<12:14:59,  1.57it/s]

Loss:  0.2534913334483281


Sentence 1751:   2%|▏         | 1750/70935 [17:52<12:01:14,  1.60it/s]

Loss:  0.24111390392445692


Sentence 2001:   3%|▎         | 2000/70935 [20:28<13:42:33,  1.40it/s]

Loss:  0.23368667572483537


Sentence 2251:   3%|▎         | 2250/70935 [23:08<13:23:33,  1.42it/s]

Loss:  0.2267612222225871


Sentence 2501:   4%|▎         | 2500/70935 [25:43<15:54:06,  1.20it/s]

Loss:  0.21954199851057493


Sentence 2751:   4%|▍         | 2750/70935 [28:19<15:03:41,  1.26it/s]

Loss:  0.21529146622184833


Sentence 3001:   4%|▍         | 3000/70935 [30:55<20:26:12,  1.08s/it]

Loss:  0.2111773417649092


Sentence 3251:   5%|▍         | 3250/70935 [33:30<10:27:06,  1.80it/s]

Loss:  0.2070773440726603


Sentence 3501:   5%|▍         | 3500/70935 [36:01<9:41:12,  1.93it/s]

Loss:  0.203292993252598


Sentence 3751:   5%|▌         | 3750/70935 [38:30<11:05:29,  1.68it/s]

Loss:  0.19942159639812695


Sentence 4000:   6%|▌         | 4000/70935 [41:03<13:31:56,  1.37it/s]

Loss:  0.19568865952981288


Sentence 1:   6%|▌         | 4000/70935 [48:35<13:31:56,  1.37it/s]   

Epoch: 0 | AUS: 85.31 %


Sentence 251:   6%|▌         | 4250/70935 [51:24<14:58:50,  1.24it/s]

Loss:  0.1400055589065887


Sentence 501:   6%|▋         | 4500/70935 [54:11<10:42:30,  1.72it/s]

Loss:  0.13466524661914445


Sentence 751:   7%|▋         | 4750/70935 [56:48<13:32:16,  1.36it/s]

Loss:  0.13415898921635622


Sentence 1001:   7%|▋         | 5000/70935 [59:30<12:30:49,  1.46it/s]

Loss:  0.1331137862491887


Sentence 1251:   7%|▋         | 5250/70935 [1:01:56<10:39:04,  1.71it/s]

Loss:  0.1299653384723235


Sentence 1501:   8%|▊         | 5500/70935 [1:04:26<11:46:54,  1.54it/s]

Loss:  0.12934279271131768


Sentence 1751:   8%|▊         | 5750/70935 [1:07:03<11:56:46,  1.52it/s]

Loss:  0.12643438440089813


Sentence 2001:   8%|▊         | 6000/70935 [1:09:43<13:25:14,  1.34it/s]

Loss:  0.12508508476935823


Sentence 2251:   9%|▉         | 6250/70935 [1:12:29<12:45:09,  1.41it/s]

Loss:  0.12328317095713767


Sentence 2501:   9%|▉         | 6500/70935 [1:15:07<15:23:37,  1.16it/s]

Loss:  0.12140651107646408


Sentence 2751:  10%|▉         | 6750/70935 [1:17:45<14:12:02,  1.26it/s]

Loss:  0.12138288990354737


Sentence 3001:  10%|▉         | 7000/70935 [1:20:26<19:27:56,  1.10s/it]

Loss:  0.12094098367448411


Sentence 3251:  10%|█         | 7250/70935 [1:23:04<9:52:04,  1.79it/s]

Loss:  0.12000487840821286


Sentence 3501:  11%|█         | 7500/70935 [1:25:36<9:03:25,  1.95it/s]

Loss:  0.11904298630643877


Sentence 3751:  11%|█         | 7750/70935 [1:28:07<10:44:15,  1.63it/s]

Loss:  0.11744552020790094


Sentence 4000:  11%|█▏        | 8000/70935 [1:30:41<12:54:25,  1.35it/s]

Loss:  0.11615707620497778


Sentence 1:  11%|█▏        | 8000/70935 [1:38:19<12:54:25,  1.35it/s]   

Epoch: 1 | AUS: 86.37 %


Sentence 251:  12%|█▏        | 8250/70935 [1:41:11<14:08:54,  1.23it/s]

Loss:  0.09924722832860425


Sentence 501:  12%|█▏        | 8500/70935 [1:44:00<10:12:57,  1.70it/s]

Loss:  0.09425126143579837


Sentence 751:  12%|█▏        | 8750/70935 [1:46:38<12:39:56,  1.36it/s]

Loss:  0.09215421291281624


Sentence 1001:  13%|█▎        | 9000/70935 [1:49:22<11:40:31,  1.47it/s]

Loss:  0.09102222284061282


Sentence 1251:  13%|█▎        | 9250/70935 [1:51:50<9:39:05,  1.78it/s]

Loss:  0.08875048885350116


Sentence 1501:  13%|█▎        | 9500/70935 [1:54:22<11:24:48,  1.50it/s]

Loss:  0.08871037450785904


Sentence 1751:  14%|█▎        | 9750/70935 [1:56:58<11:08:03,  1.53it/s]

Loss:  0.08730281743598088


Sentence 2001:  14%|█▍        | 10000/70935 [1:59:40<12:34:21,  1.35it/s]

Loss:  0.08595301665057377


Sentence 2251:  14%|█▍        | 10250/70935 [2:02:24<12:16:01,  1.37it/s]

Loss:  0.08489778159508811


Sentence 2501:  15%|█▍        | 10500/70935 [2:05:03<14:21:51,  1.17it/s]

Loss:  0.0833101606441156


Sentence 2751:  15%|█▌        | 10750/70935 [2:07:43<13:56:59,  1.20it/s]

Loss:  0.08310307130958806


Sentence 3001:  16%|█▌        | 11000/70935 [2:10:24<18:09:16,  1.09s/it]

Loss:  0.08269801751013346


Sentence 3251:  16%|█▌        | 11250/70935 [2:13:03<9:16:49,  1.79it/s]

Loss:  0.08204415847037727


Sentence 3501:  16%|█▌        | 11500/70935 [2:15:36<8:30:12,  1.94it/s]

Loss:  0.08093859905355723


Sentence 3751:  17%|█▋        | 11750/70935 [2:18:07<9:49:42,  1.67it/s]

Loss:  0.07953276397288306


Sentence 4000:  17%|█▋        | 12000/70935 [2:20:41<12:06:57,  1.35it/s]

Loss:  0.07823679658843019


Sentence 1:  17%|█▋        | 12000/70935 [2:28:19<12:06:57,  1.35it/s]   

Epoch: 2 | AUS: 86.62 %


Sentence 251:  17%|█▋        | 12250/70935 [2:31:11<13:11:32,  1.24it/s]

Loss:  0.06950070715570837


Sentence 501:  18%|█▊        | 12500/70935 [2:34:00<9:35:28,  1.69it/s]

Loss:  0.06501913187877108


Sentence 751:  18%|█▊        | 12750/70935 [2:36:38<11:48:19,  1.37it/s]

Loss:  0.06325819075363082


Sentence 1001:  18%|█▊        | 13000/70935 [2:39:22<10:55:31,  1.47it/s]

Loss:  0.06304673480665861


Sentence 1251:  19%|█▊        | 13250/70935 [2:41:51<8:57:21,  1.79it/s]

Loss:  0.060398830916768204


Sentence 1501:  19%|█▉        | 13500/70935 [2:44:23<10:35:16,  1.51it/s]

Loss:  0.06017972213365586


Sentence 1751:  19%|█▉        | 13750/70935 [2:46:59<10:28:43,  1.52it/s]

Loss:  0.05877992250645184


Sentence 2001:  20%|█▉        | 14000/70935 [2:49:42<11:39:12,  1.36it/s]

Loss:  0.058238961404915246


Sentence 2251:  20%|██        | 14250/70935 [2:52:27<11:23:34,  1.38it/s]

Loss:  0.05811067282988713


Sentence 2501:  20%|██        | 14500/70935 [2:55:07<13:43:55,  1.14it/s]

Loss:  0.05681818245354116


Sentence 2751:  21%|██        | 14750/70935 [2:57:45<12:36:46,  1.24it/s]

Loss:  0.05736318063791276


Sentence 3001:  21%|██        | 15000/70935 [3:00:24<16:51:07,  1.08s/it]

Loss:  0.05710575821871726


Sentence 3251:  21%|██▏       | 15250/70935 [3:03:02<8:43:20,  1.77it/s]

Loss:  0.056592902052782494


Sentence 3501:  22%|██▏       | 15500/70935 [3:05:35<7:52:08,  1.96it/s]

Loss:  0.055862649690281874


Sentence 3751:  22%|██▏       | 15750/70935 [3:08:05<9:02:24,  1.70it/s]

Loss:  0.05515813922485495


Sentence 4000:  23%|██▎       | 16000/70935 [3:10:39<11:31:53,  1.32it/s]

Loss:  0.0544082315858673


Sentence 1:  23%|██▎       | 16000/70935 [3:18:12<11:31:53,  1.32it/s]   

Epoch: 3 | AUS: 86.59 %


Sentence 251:  23%|██▎       | 16250/70935 [3:21:01<12:14:52,  1.24it/s]

Loss:  0.0507926118483374


Sentence 501:  23%|██▎       | 16500/70935 [3:23:48<9:04:22,  1.67it/s]

Loss:  0.04744966349970855


Sentence 751:  24%|██▎       | 16750/70935 [3:26:25<10:55:43,  1.38it/s]

Loss:  0.04544328224888644


Sentence 1001:  24%|██▍       | 17000/70935 [3:29:09<10:05:19,  1.49it/s]

Loss:  0.04405457433571064


Sentence 1251:  24%|██▍       | 17250/70935 [3:31:36<8:24:50,  1.77it/s]

Loss:  0.04278020623651828


Sentence 1501:  25%|██▍       | 17500/70935 [3:34:09<9:58:48,  1.49it/s]

Loss:  0.04201104780514591


Sentence 1751:  25%|██▌       | 17750/70935 [3:36:44<9:36:01,  1.54it/s]

Loss:  0.04140945579165522


Sentence 2001:  25%|██▌       | 18000/70935 [3:39:24<10:45:53,  1.37it/s]

Loss:  0.04187138696429918


Sentence 2251:  26%|██▌       | 18250/70935 [3:42:09<10:24:44,  1.41it/s]

Loss:  0.04136347522945405


Sentence 2501:  26%|██▌       | 18500/70935 [3:44:49<12:30:33,  1.16it/s]

Loss:  0.04045863235616521


Sentence 2751:  26%|██▋       | 18750/70935 [3:47:27<11:26:35,  1.27it/s]

Loss:  0.040988579559181865


Sentence 3001:  27%|██▋       | 19000/70935 [3:50:06<15:40:02,  1.09s/it]

Loss:  0.040863402619228925


Sentence 3251:  27%|██▋       | 19250/70935 [3:52:44<8:13:08,  1.75it/s]

Loss:  0.040503605966180974


Sentence 3501:  27%|██▋       | 19500/70935 [3:55:16<7:14:41,  1.97it/s]

Loss:  0.03998453444461695


Sentence 3751:  28%|██▊       | 19750/70935 [3:57:47<8:25:55,  1.69it/s]

Loss:  0.039293514705632575


Sentence 4000:  28%|██▊       | 20000/70935 [4:00:21<10:45:47,  1.31it/s]

Loss:  0.03866852368081392


Sentence 4000:  28%|██▊       | 20000/70935 [4:07:53<10:31:20,  1.34it/s]

Epoch: 4 | AUS: 86.57 %





In [None]:
dep_train_esp_data = Dataset("ud-treebanks-v2.5/UD_Spanish-GSD/es_gsd-ud-train.conllu") #14187
dep_dev_esp_data = Dataset("ud-treebanks-v2.5/UD_Spanish-GSD/es_gsd-ud-dev.conllu")
model_BB = train_model(dep_train_esp_data, dep_dev_esp_data, n_epochs = 2, lim_ep=10000) #UAS:
#word_vocab, tag_vocab = make_vocabs_head_tagger(dep_train_esp_data)
#print('{:.4f}'.format(uas_tst(model_BB, word_vocab, tag_vocab, dep_dev_esp_data))) #72% | now: 80.11% | 81.26% (2000) |

Sentence 251:   1%|          | 250/28374 [02:22<5:17:36,  1.48it/s]

Loss:  0.442774000748992


Sentence 501:   2%|▏         | 500/28374 [04:41<3:39:17,  2.12it/s]

Loss:  0.3450293645914644


Sentence 751:   3%|▎         | 750/28374 [06:52<4:37:14,  1.66it/s]

Loss:  0.3076120958638688


Sentence 1001:   4%|▎         | 1000/28374 [09:08<4:25:17,  1.72it/s]

Loss:  0.2830589287281036


Sentence 1251:   4%|▍         | 1250/28374 [11:11<3:35:55,  2.09it/s]

Loss:  0.2641209123412147


Sentence 1501:   5%|▌         | 1500/28374 [13:19<4:08:36,  1.80it/s]

Loss:  0.2518787119492578


Sentence 1751:   6%|▌         | 1750/28374 [15:31<4:15:08,  1.74it/s]

Loss:  0.23924868045561015


Sentence 2001:   7%|▋         | 2000/28374 [17:47<4:35:11,  1.60it/s]

Loss:  0.23195190665405244


Sentence 2251:   8%|▊         | 2250/28374 [20:07<4:34:06,  1.59it/s]

Loss:  0.22535707576624636


Sentence 2501:   9%|▉         | 2500/28374 [22:23<5:21:01,  1.34it/s]

Loss:  0.21805310424226337


Sentence 2751:  10%|▉         | 2750/28374 [24:39<4:54:48,  1.45it/s]

Loss:  0.21376968321542847


Sentence 3001:  11%|█         | 3000/28374 [26:56<6:39:43,  1.06it/s]

Loss:  0.21018992563709615


Sentence 3251:  11%|█▏        | 3250/28374 [29:11<3:24:36,  2.05it/s]

Loss:  0.2061611773784165


Sentence 3501:  12%|█▏        | 3500/28374 [31:22<3:02:14,  2.27it/s]

Loss:  0.20242139169153026


Sentence 3751:  13%|█▎        | 3750/28374 [33:31<3:31:50,  1.94it/s]

Loss:  0.19852234106583833


Sentence 4001:  14%|█▍        | 4000/28374 [35:45<4:22:34,  1.55it/s]

Loss:  0.19492465196772538


Sentence 4251:  15%|█▍        | 4250/28374 [38:02<3:27:27,  1.94it/s]

Loss:  0.1918219711428152


Sentence 4501:  16%|█▌        | 4500/28374 [40:13<3:55:58,  1.69it/s]

Loss:  0.1890580937319901


Sentence 4751:  17%|█▋        | 4750/28374 [42:33<4:48:03,  1.37it/s]

Loss:  0.18731712245235319


Sentence 5001:  18%|█▊        | 5000/28374 [44:48<3:50:10,  1.69it/s]

Loss:  0.1843751890195068


Sentence 5251:  19%|█▊        | 5250/28374 [47:04<4:05:53,  1.57it/s]

Loss:  0.1824454978044433


Sentence 5501:  19%|█▉        | 5500/28374 [49:25<3:57:33,  1.60it/s]

Loss:  0.18073741634718185


Sentence 5751:  20%|██        | 5750/28374 [51:41<3:12:18,  1.96it/s]

Loss:  0.17935403627706895


Sentence 6001:  21%|██        | 6000/28374 [54:00<3:57:05,  1.57it/s]

Loss:  0.17766558903827293


Sentence 6251:  22%|██▏       | 6250/28374 [56:15<3:00:06,  2.05it/s]

Loss:  0.17611927102995106


Sentence 6501:  23%|██▎       | 6500/28374 [58:38<3:55:09,  1.55it/s]

Loss:  0.1745571931926847


Sentence 6751:  24%|██▍       | 6750/28374 [1:00:59<3:30:21,  1.71it/s]

Loss:  0.17302230482348843


Sentence 7001:  25%|██▍       | 7000/28374 [1:03:17<2:32:26,  2.34it/s]

Loss:  0.17146929318769252


Sentence 7251:  26%|██▌       | 7250/28374 [1:05:34<4:11:27,  1.40it/s]

Loss:  0.16938077091778517


Sentence 7501:  26%|██▋       | 7500/28374 [1:07:50<3:39:03,  1.59it/s]

Loss:  0.16844441881348612


Sentence 7751:  27%|██▋       | 7750/28374 [1:10:08<3:39:33,  1.57it/s]

Loss:  0.16740729531242077


Sentence 8001:  28%|██▊       | 8000/28374 [1:12:26<3:24:17,  1.66it/s]

Loss:  0.16619276650830217


Sentence 8251:  29%|██▉       | 8250/28374 [1:14:43<2:38:15,  2.12it/s]

Loss:  0.16454896561125165


Sentence 8501:  30%|██▉       | 8500/28374 [1:17:04<4:33:53,  1.21it/s]

Loss:  0.1632904659226393


Sentence 8751:  31%|███       | 8750/28374 [1:19:21<3:45:44,  1.45it/s]

Loss:  0.1621181957295996


Sentence 9001:  32%|███▏      | 9000/28374 [1:21:41<3:43:54,  1.44it/s]

Loss:  0.16108148133729183


Sentence 9251:  33%|███▎      | 9250/28374 [1:24:01<2:41:18,  1.98it/s]

Loss:  0.15995441118487458


Sentence 9501:  33%|███▎      | 9500/28374 [1:26:24<3:04:46,  1.70it/s]

Loss:  0.1591295043920216


Sentence 9751:  34%|███▍      | 9750/28374 [1:28:43<2:42:28,  1.91it/s]

Loss:  0.15836145971418145


Sentence 10000:  35%|███▌      | 10000/28374 [1:30:59<3:06:30,  1.64it/s]

Loss:  0.15772670656634727


Sentence 1:  35%|███▌      | 10000/28374 [1:37:21<3:06:30,  1.64it/s]    

Epoch: 0 | AUS: 87.64 %


Sentence 251:  36%|███▌      | 10250/28374 [1:39:55<3:41:41,  1.36it/s]

Loss:  0.12348066278873011


Sentence 501:  37%|███▋      | 10500/28374 [1:42:26<2:35:23,  1.92it/s]

Loss:  0.11999063602671958


Sentence 751:  38%|███▊      | 10750/28374 [1:44:48<3:15:57,  1.50it/s]

Loss:  0.11926186558565435


Sentence 1001:  39%|███▉      | 11000/28374 [1:47:20<3:03:02,  1.58it/s]

Loss:  0.11985298787729698


Sentence 1251:  40%|███▉      | 11250/28374 [1:49:37<2:29:09,  1.91it/s]

Loss:  0.11715369958040538


Sentence 1501:  41%|████      | 11500/28374 [1:51:57<2:50:17,  1.65it/s]

Loss:  0.11690929251202518


Sentence 1751:  41%|████▏     | 11750/28374 [1:54:19<2:43:57,  1.69it/s]

Loss:  0.11442836448109925


Sentence 2001:  42%|████▏     | 12000/28374 [1:56:47<3:04:45,  1.48it/s]

Loss:  0.11410449370836795


Sentence 2251:  43%|████▎     | 12250/28374 [1:59:18<2:57:16,  1.52it/s]

Loss:  0.11391920410983665


Sentence 2501:  44%|████▍     | 12500/28374 [2:01:45<3:28:23,  1.27it/s]

Loss:  0.11220444627561083


Sentence 2751:  45%|████▍     | 12750/28374 [2:04:11<3:08:19,  1.38it/s]

Loss:  0.11260500743435643


Sentence 3001:  46%|████▌     | 13000/28374 [2:06:37<4:16:27,  1.00s/it]

Loss:  0.11272131560743583


Sentence 3251:  47%|████▋     | 13250/28374 [2:09:01<2:11:50,  1.91it/s]

Loss:  0.11234641232468018


Sentence 3501:  48%|████▊     | 13500/28374 [2:11:23<1:59:49,  2.07it/s]

Loss:  0.11211782489874375


Sentence 3751:  48%|████▊     | 13750/28374 [2:13:42<2:15:09,  1.80it/s]

Loss:  0.11126251055625616


Sentence 4001:  49%|████▉     | 14000/28374 [2:16:05<2:43:48,  1.46it/s]

Loss:  0.11033659584518682


Sentence 4251:  50%|█████     | 14250/28374 [2:18:30<2:08:49,  1.83it/s]

Loss:  0.10980705148635281


Sentence 4501:  51%|█████     | 14500/28374 [2:20:49<2:25:31,  1.59it/s]

Loss:  0.1089775865477083


Sentence 4751:  52%|█████▏    | 14750/28374 [2:23:19<2:57:51,  1.28it/s]

Loss:  0.1092214984420785


Sentence 5001:  53%|█████▎    | 15000/28374 [2:25:44<2:17:33,  1.62it/s]

Loss:  0.1084593718473232


Sentence 5251:  54%|█████▎    | 15250/28374 [2:28:12<2:31:18,  1.45it/s]

Loss:  0.10826825842980456


Sentence 5501:  55%|█████▍    | 15500/28374 [2:30:44<2:24:25,  1.49it/s]

Loss:  0.10790738246640301


Sentence 5751:  56%|█████▌    | 15750/28374 [2:33:11<1:56:14,  1.81it/s]

Loss:  0.10795382311645992


Sentence 6001:  56%|█████▋    | 16000/28374 [2:35:40<2:16:36,  1.51it/s]

Loss:  0.10780595077577285


Sentence 6251:  57%|█████▋    | 16250/28374 [2:38:08<1:54:42,  1.76it/s]

Loss:  0.10741583194482082


Sentence 6501:  58%|█████▊    | 16500/28374 [2:40:53<2:28:30,  1.33it/s]

Loss:  0.10703517916131386


Sentence 6751:  59%|█████▉    | 16750/28374 [2:43:48<2:19:59,  1.38it/s]

Loss:  0.10647209392060877


Sentence 7001:  60%|█████▉    | 17000/28374 [2:46:34<1:41:42,  1.86it/s]

Loss:  0.10582758773115503


Sentence 7251:  61%|██████    | 17250/28374 [2:49:19<2:33:05,  1.21it/s]

Loss:  0.10489942090107572


Sentence 7501:  62%|██████▏   | 17500/28374 [2:52:06<2:06:31,  1.43it/s]

Loss:  0.10444825529979086


Sentence 7751:  63%|██████▎   | 17750/28374 [2:54:37<1:59:27,  1.48it/s]

Loss:  0.10433521184218238


Sentence 8001:  63%|██████▎   | 18000/28374 [2:57:08<1:54:24,  1.51it/s]

Loss:  0.10392188508783011


Sentence 8251:  64%|██████▍   | 18250/28374 [2:59:36<1:26:36,  1.95it/s]

Loss:  0.10314192902412182


Sentence 8501:  65%|██████▌   | 18500/28374 [3:02:10<2:27:56,  1.11it/s]

Loss:  0.10265923061026479


Sentence 8751:  66%|██████▌   | 18750/28374 [3:04:40<2:01:12,  1.32it/s]

Loss:  0.10232065025828965


Sentence 9001:  67%|██████▋   | 19000/28374 [3:07:13<1:56:39,  1.34it/s]

Loss:  0.10208962202176573


Sentence 9251:  68%|██████▊   | 19250/28374 [3:09:46<1:23:29,  1.82it/s]

Loss:  0.10173702673234888


Sentence 9501:  69%|██████▊   | 19500/28374 [3:12:24<1:33:33,  1.58it/s]

Loss:  0.10151474636402992


Sentence 9751:  70%|██████▉   | 19750/28374 [3:14:56<1:22:50,  1.74it/s]

Loss:  0.1012933226662773


Sentence 10000:  70%|███████   | 20000/28374 [3:17:23<1:33:16,  1.50it/s]

Loss:  0.10114526392016596


Sentence 10000:  70%|███████   | 20000/28374 [3:24:13<1:25:30,  1.63it/s]

Epoch: 1 | AUS: 88.18 %





In [None]:
dep_train_svk_data = Dataset("ud-treebanks-v2.5/UD_Swedish-LinES/sv_lines-ud-train.conllu") #3176
dep_dev_svk_data = Dataset("ud-treebanks-v2.5/UD_Swedish-LinES/sv_lines-ud-dev.conllu")
model_BB = train_model(dep_train_svk_data, dep_dev_svk_data) #UAS:
#word_vocab, tag_vocab = make_vocabs_head_tagger(dep_train_svk_data)
#print('{:.4f}'.format(uas_tst(model_BB, word_vocab, tag_vocab, dep_dev_svk_data))) #71% |(1e-3)78.82 (2000) |

Sentence 252:   2%|▏         | 251/15880 [01:19<1:32:41,  2.81it/s]

Loss:  0.44293348531611265


Sentence 501:   3%|▎         | 500/15880 [02:52<1:31:26,  2.80it/s]

Loss:  0.3616340642780997


Sentence 751:   5%|▍         | 750/15880 [04:26<1:09:14,  3.64it/s]

Loss:  0.31967914740958564


Sentence 1001:   6%|▋         | 1000/15880 [05:50<1:47:33,  2.31it/s]

Loss:  0.29003396558761596


Sentence 1251:   8%|▊         | 1250/15880 [07:31<1:27:12,  2.80it/s]

Loss:  0.2750796355754137


Sentence 1501:   9%|▉         | 1500/15880 [09:15<1:29:08,  2.69it/s]

Loss:  0.26388299365504647


Sentence 1751:  11%|█         | 1750/15880 [10:38<1:14:44,  3.15it/s]

Loss:  0.25486685922961416


Sentence 2001:  13%|█▎        | 2000/15880 [12:10<1:25:32,  2.70it/s]

Loss:  0.2461477169576683


Sentence 2251:  14%|█▍        | 2250/15880 [14:05<1:01:09,  3.71it/s]

Loss:  0.24168097872944136


Sentence 2501:  16%|█▌        | 2500/15880 [15:50<55:44,  4.00it/s]

Loss:  0.23490137545322068


Sentence 2752:  17%|█▋        | 2751/15880 [17:24<1:08:03,  3.22it/s]

Loss:  0.22818113344797694


Sentence 3001:  19%|█▉        | 3000/15880 [18:55<1:18:40,  2.73it/s]

Loss:  0.22254259942012142


Sentence 2:  20%|██        | 3177/15880 [23:00<193:56:57, 54.96s/it]

Epoch: 0 | AUS: 81.37 %


Sentence 252:  22%|██▏       | 3427/15880 [24:23<1:15:19,  2.76it/s]

Loss:  0.13359529445995577


Sentence 501:  23%|██▎       | 3676/15880 [25:58<1:12:17,  2.81it/s]

Loss:  0.13771883465338033


Sentence 751:  25%|██▍       | 3926/15880 [27:34<56:40,  3.52it/s]

Loss:  0.13704481442358035


Sentence 1001:  26%|██▋       | 4176/15880 [28:58<1:28:09,  2.21it/s]

Loss:  0.13109733252361183


Sentence 1251:  28%|██▊       | 4426/15880 [30:41<1:09:31,  2.75it/s]

Loss:  0.13148116322085263


Sentence 1501:  29%|██▉       | 4676/15880 [32:26<1:11:15,  2.62it/s]

Loss:  0.13232134591340824


Sentence 1751:  31%|███       | 4926/15880 [33:51<59:52,  3.05it/s]

Loss:  0.1316687198674772


Sentence 2001:  33%|███▎      | 5176/15880 [35:24<1:07:28,  2.64it/s]

Loss:  0.12975507680376178


Sentence 2251:  34%|███▍      | 5426/15880 [37:20<45:22,  3.84it/s]

Loss:  0.13048995553174367


Sentence 2501:  36%|███▌      | 5676/15880 [39:01<42:48,  3.97it/s]

Loss:  0.12873335019149235


Sentence 2752:  37%|███▋      | 5927/15880 [40:36<51:40,  3.21it/s]

Loss:  0.12639906165292697


Sentence 3001:  39%|███▉      | 6176/15880 [42:06<58:02,  2.79it/s]

Loss:  0.12503379857249214


Sentence 2:  40%|████      | 6353/15880 [46:13<146:07:24, 55.22s/it]

Epoch: 1 | AUS: 82.34 %


Sentence 252:  42%|████▏     | 6603/15880 [47:36<55:02,  2.81it/s]

Loss:  0.09184216059109894


Sentence 501:  43%|████▎     | 6852/15880 [49:11<52:39,  2.86it/s]

Loss:  0.09238439479159388


Sentence 751:  45%|████▍     | 7102/15880 [50:47<40:58,  3.57it/s]

Loss:  0.09246993599572427


Sentence 1001:  46%|████▋     | 7352/15880 [52:10<1:03:04,  2.25it/s]

Loss:  0.08814419156161148


Sentence 1251:  48%|████▊     | 7602/15880 [53:53<50:58,  2.71it/s]

Loss:  0.08926348490111631


Sentence 1501:  49%|████▉     | 7852/15880 [55:39<50:27,  2.65it/s]

Loss:  0.09112605213456906


Sentence 1751:  51%|█████     | 8102/15880 [57:05<42:35,  3.04it/s]

Loss:  0.08953609690974917


Sentence 2001:  53%|█████▎    | 8352/15880 [58:38<46:47,  2.68it/s]

Loss:  0.08709731583200528


Sentence 2251:  54%|█████▍    | 8602/15880 [1:00:35<31:46,  3.82it/s]

Loss:  0.08782687904784083


Sentence 2501:  56%|█████▌    | 8852/15880 [1:02:17<29:36,  3.96it/s]

Loss:  0.08598855508868983


Sentence 2752:  57%|█████▋    | 9103/15880 [1:03:54<35:26,  3.19it/s]

Loss:  0.08430433727906249


Sentence 3001:  59%|█████▉    | 9352/15880 [1:05:25<39:36,  2.75it/s]

Loss:  0.08330313575918656


Sentence 2:  60%|██████    | 9529/15880 [1:09:32<97:56:51, 55.52s/it]

Epoch: 2 | AUS: 83.52 %


Sentence 252:  62%|██████▏   | 9779/15880 [1:10:56<37:24,  2.72it/s]

Loss:  0.06176574573410835


Sentence 501:  63%|██████▎   | 10028/15880 [1:12:32<33:50,  2.88it/s]

Loss:  0.060666474108887994


Sentence 751:  65%|██████▍   | 10278/15880 [1:14:08<26:34,  3.51it/s]

Loss:  0.06123594763973839


Sentence 1001:  66%|██████▋   | 10528/15880 [1:15:32<40:03,  2.23it/s]

Loss:  0.05765118216425617


Sentence 1251:  68%|██████▊   | 10778/15880 [1:17:15<30:48,  2.76it/s]

Loss:  0.05880543287540859


Sentence 1501:  69%|██████▉   | 11028/15880 [1:19:02<31:31,  2.57it/s]

Loss:  0.05990714608810413


Sentence 1751:  71%|███████   | 11278/15880 [1:20:27<24:50,  3.09it/s]

Loss:  0.0585949213297802


Sentence 2001:  73%|███████▎  | 11528/15880 [1:22:01<27:03,  2.68it/s]

Loss:  0.05683845582072684


Sentence 2251:  74%|███████▍  | 11778/15880 [1:23:58<17:42,  3.86it/s]

Loss:  0.0577751350760317


Sentence 2501:  76%|███████▌  | 12028/15880 [1:25:41<16:14,  3.95it/s]

Loss:  0.05713222113871052


Sentence 2752:  77%|███████▋  | 12279/15880 [1:27:17<19:03,  3.15it/s]

Loss:  0.056157241825900865


Sentence 3001:  79%|███████▉  | 12528/15880 [1:28:48<20:30,  2.72it/s]

Loss:  0.055949749604740494


Sentence 2:  80%|████████  | 12705/15880 [1:32:56<48:58:40, 55.53s/it]

Epoch: 3 | AUS: 82.43 %


Sentence 252:  82%|████████▏ | 12955/15880 [1:34:19<17:25,  2.80it/s]

Loss:  0.03775701379596535


Sentence 501:  83%|████████▎ | 13204/15880 [1:35:54<15:27,  2.89it/s]

Loss:  0.03924631226632493


Sentence 751:  85%|████████▍ | 13454/15880 [1:37:31<11:38,  3.47it/s]

Loss:  0.039514728863978915


Sentence 1001:  86%|████████▋ | 13704/15880 [1:38:55<16:10,  2.24it/s]

Loss:  0.03788620384968095


Sentence 1251:  88%|████████▊ | 13954/15880 [1:40:38<11:48,  2.72it/s]

Loss:  0.03916560549250503


Sentence 1501:  89%|████████▉ | 14204/15880 [1:42:24<10:59,  2.54it/s]

Loss:  0.040507239891720816


Sentence 1751:  91%|█████████ | 14454/15880 [1:43:49<07:46,  3.06it/s]

Loss:  0.03946404412016575


Sentence 2001:  93%|█████████▎| 14704/15880 [1:45:24<07:25,  2.64it/s]

Loss:  0.038176389757868065


Sentence 2251:  94%|█████████▍| 14954/15880 [1:47:20<04:04,  3.79it/s]

Loss:  0.03991948931671323


Sentence 2501:  96%|█████████▌| 15204/15880 [1:49:02<02:50,  3.96it/s]

Loss:  0.03937142055706881


Sentence 2752:  97%|█████████▋| 15455/15880 [1:50:39<02:12,  3.20it/s]

Loss:  0.03917843671839811


Sentence 3001:  99%|█████████▉| 15704/15880 [1:52:10<01:03,  2.76it/s]

Loss:  0.03915995189891199


Sentence 3176: 100%|██████████| 15880/15880 [1:56:16<00:00,  2.28it/s]

Epoch: 4 | AUS: 82.74 %





In [None]:
def uas_tst(modelo, vocab_words, vocab_tags, gold_data):
    correct, total = 0, 0
    for sentence in gold_data:
      words,tags, heads = [], [], []

      for word, tag, head in (sentence):
        words.append(word)
        tags.append(tag)
        heads.append(head)
      #print(words)
      preds = predict_tst(modelo, vocab_words, vocab_tags, words, tags)
      #print(preds)

      for i in range(1,len(preds)):
        if heads[i] == preds[i]:
          correct +=1
        total +=1

    return correct/total

def predict_tst(modelo, vocab_words, vocab_tags, words, tags):
    for idx, (w, t) in enumerate(zip(words,tags)): #words string to words ids
      if w in vocab_words:
        words[idx] = vocab_words[w]
      else:
        words[idx] = vocab_words['<unk>']
      if t in vocab_tags:
        tags[idx] =vocab_tags[t]
      else:
        tags[idx] = vocab_tags['<pad>']


    words = torch.LongTensor(words).unsqueeze(0)
    tags = torch.LongTensor(tags).unsqueeze(0)

    parser = ArcStandardParser()
    (SH, LA, RA) = parser.MOVES
    config = parser.initial_config(words.shape[1])
    while not parser.is_final_config(config):
      idx_feats = get_feats(config, words) #take features #idx_feats = get_feats(config, word_ids) #long_tensor([vs0, vs1, vs2, b0]))
      #idx_feats = get_feats(config, words) #long_tensor([vs0, vs1, vs2, b0]))
      idx_feats = idx_feats.unsqueeze(0)
      valid_moves = parser.valid_moves(config)

      with torch.no_grad():
        output_move = torch.argmax(modelo.forward(words, tags, idx_feats)) #output_mlp = self.forward(words, tags, idx_feats)
      if(output_move in valid_moves):
        config = parser.next_config(config, output_move)
      else:
        config = parser.next_config(config, valid_moves[-1])

    return config[2]

In [None]:
dep_train_arabic_data = Dataset("projectivized_arabic_train.conllu")
dep_dev_arabic_data = Dataset("projectivized_arabic_dev.conllu")
word_vocab, tag_vocab = make_vocabs_head_tagger(dep_train_arabic_data)
model_BB = train_model(dep_train_arabic_data) #UAS:
print('{:.4f}'.format(uas_tst(model_BB, word_vocab, tag_vocab, dep_dev_arabic_data))) #71%