In [1]:
from functools import partial
from datasets import load_dataset
import torch
import torch.nn as nn

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class ArcStandard:
    def __init__(self, sentence):

        # sentence is the input for which we want to build our Arc-Standard
        self.sentence = sentence

        # here we create the buffer having an array of indexes with the same length as the sentence
        # basically, each word has its own index in this buffer
        # we have initialized the buffer having all the words in the sentence

        self.buffer = [i for i in range(len(self.sentence))] 

        # initialize the stack empty 
        
        self.stack = []

        # representation of the tree
        # every word will have a -1 assigned -> no father has been assigned yet

        self.arcs = [-1 for _ in range(len(self.sentence))]

        # three shift moves to initialize the stack

        # means that in the stack now is the ROOT
        # self.shift() it calls a method that implements this operation; we will look at it after 

        self.shift() 

        # means that in the stack now is the ROOT and He; sigma1 is He and sigma2 is the ROOT

        self.shift() 

        # until now we cannnot attach He to ROOT, if we cannot do that we add another element to the stack;
        # so, perform another shift
        # now, in the stack we have ROOT, He, began
        if len(self.sentence) > 2:
          self.shift()

    def shift(self):
       
     b1 = self.buffer[0]
     self.buffer = self.buffer[1:]
     self.stack.append(b1)
    
    def left_arc(self): 

     o1 = self.stack.pop()
     o2 = self.stack.pop()
     self.arcs[o2] = o1
     self.stack.append(o1)

     if len(self.stack) < 2 and len(self.buffer) > 0:
        self.shift()
    

    def right_arc(self):
     o1 = self.stack.pop()
     o2 = self.stack.pop()
     self.arcs[o1] = o2
     self.stack.append(o2)

     if len(self.stack) <2 and len(self.buffer) > 0:
        self.shift()
    
    def is_tree_final(self):
     return len(self.stack) == 1 and len(self.buffer) == 0
    

    def print_configuration(self):

      s = [self.sentence[i] for i in self.stack]
      b = [self.sentence[i] for i in self.buffer]
      print(s,b)
      print(self.arcs)
      

In [3]:
sentence = ["<ROOT>", "He","began","to","write","again","."]
gold = [-1, 2, 0, 4, 2, 4, 2]

parser = ArcStandard(sentence)
parser.print_configuration()

['<ROOT>', 'He', 'began'] ['to', 'write', 'again', '.']
[-1, -1, -1, -1, -1, -1, -1]


In [4]:
parser.left_arc()
parser.print_configuration()

['<ROOT>', 'began'] ['to', 'write', 'again', '.']
[-1, 2, -1, -1, -1, -1, -1]


In [5]:
parser.shift()
parser.print_configuration()

['<ROOT>', 'began', 'to'] ['write', 'again', '.']
[-1, 2, -1, -1, -1, -1, -1]


In [6]:
parser.right_arc()
parser.print_configuration()

# we are asking the parser to do this action of the right_arc; "to" has as a paretn "write" and "began"

['<ROOT>', 'began'] ['write', 'again', '.']
[-1, 2, -1, 2, -1, -1, -1]


In [7]:
class Oracle:
 def __init__(self, parser, gold_tree):
  self.parser = parser
  self.gold = gold_tree

 def is_left_arc_gold(self):
  
  # we can do the left arc if sigma2 is the children of the sigma1
  # this means that u have assigned to sigma2 all of its children and is ok to assing its parent -> static oracle
  
  # here we get our sigma1 and sigma2

  o1 = self.parser.stack[len(self.parser.stack)-1]
  o2 = self.parser.stack[len(self.parser.stack)-2]


  if self.gold[o2] == o1:
   return True
  return False

 def is_shift_gold(self):
 
  if len(self.parser.buffer) == 0:
   return False
 
  if (self.is_left_arc_gold() or self.is_right_arc_gold()):
   return False
 
  return True

 def is_right_arc_gold(self):
   o1 = self.parser.stack[len(self.parser.stack)-1]
   o2 = self.parser.stack[len(self.parser.stack)-2]

   if self.gold[o1] != o2:
    return False
   
   # we need to check that no children of sigma1 are in the rest of the buffer
   
   for i in self.parser.buffer:
    if self.gold[i] == o1:
     return False
   return True

In [8]:
sentence = ["<ROOT>", "He","began","to","write","again","."]
gold = [-1, 2, 0, 4, 2, 4, 2]

parser = ArcStandard(sentence)
oracle = Oracle(parser, gold)

parser.print_configuration()

['<ROOT>', 'He', 'began'] ['to', 'write', 'again', '.']
[-1, -1, -1, -1, -1, -1, -1]


In [9]:
print("Left Arc: " ,oracle.is_left_arc_gold())
print("Right Arc: ",oracle.is_right_arc_gold())
print("Shift: ",oracle.is_shift_gold())


Left Arc:  True
Right Arc:  False
Shift:  False


In [10]:
# oracle tells us that hte next move is the left_arc wqe do it and ask again the Oracle
parser.left_arc()
parser.print_configuration()

['<ROOT>', 'began'] ['to', 'write', 'again', '.']
[-1, 2, -1, -1, -1, -1, -1]


In [11]:
print("Left Arc: " ,oracle.is_left_arc_gold())
print("Right Arc: ",oracle.is_right_arc_gold())
print("Shift: ",oracle.is_shift_gold())

Left Arc:  False
Right Arc:  False
Shift:  True


In [12]:
while not parser.is_tree_final():
    if oracle.is_shift_gold():
        parser.shift()
    elif oracle.is_left_arc_gold():
        parser.left_arc()
    elif oracle.is_right_arc_gold():
        parser.right_arc()
        
print(parser.arcs)
print(gold)
    

[-1, 2, 0, 4, 2, 4, 2]
[-1, 2, 0, 4, 2, 4, 2]


As for now we have build the parser and Oracle, we need to use them to guide a neural network model. We will give the sentence to BiLSTM to obtain some contextual representation and then use a Feed Forward to score the parser configuration and what is the best transition to do as a next step.

Dataset 

In [13]:
dataset = load_dataset('universal_dependencies', 'en_lines', split = 'train')

Found cached dataset universal_dependencies (C:/Users/roven/.cache/huggingface/datasets/universal_dependencies/en_lines/2.7.0/1ac001f0e8a0021f19388e810c94599f3ac13cc45d6b5b8c69f7847b2188bdf7)


In [14]:
# info about the length of dataset
print(len(dataset))

# what kind of information is stored in this dataset

print(dataset[1].keys())

# print a sentence and its tokens and gold dependency tree

print(dataset[1]["tokens"])
print(dataset[1]["head"])



3176
dict_keys(['idx', 'text', 'tokens', 'lemmas', 'upos', 'xpos', 'feats', 'head', 'deprel', 'deps', 'misc'])
['About', 'ANSI', 'SQL', 'query', 'mode']
['5', '5', '2', '5', '0']


In [15]:
# returns whether a tree is projective or not

def is_projective(tree):
    for i in range(len(tree)):
        if tree[i]==-1:
            continue
        left = min(i, tree[i])
        right = max(i, tree[i])

        for j in range(0, left):
            if tree[j] > left and tree[j] < right:
                return False
        for j in range(left+1, right):
            if tree[j] < left or tree[j] > right:
                return False
        for j in range(right+1, len(tree)):
            if tree[j] > left  and tree[j] < right:
                return False
    return True


In [16]:
def create_dict(dataset, threshold = 3):
    dic = {}

    for sample in dataset:
        for word in sample['tokens']:
            if word in dic:
                dic[word] += 1
            else:
                dic[word] = 1
    
    map = {}
    map["<pad>"] = 0
    map["<ROOT>"] = 1
    map["<unk>"] = 2

    next_indx = 3
    for word in dic.keys():
        if dic[word] >= threshold:
            map[word] = next_indx
            next_indx += 1
    return map

In [17]:
train_dataset = load_dataset('universal_dependencies', 'en_lines', split = 'train')
dev_dataset = load_dataset('universal_dependencies', 'en_lines', split = 'validation')
test_dataset = load_dataset('universal_dependencies', 'en_lines', split = 'test')

# remove non-projective sentences: heads in the gold 
# tree are strings, we convert them to int

train_dataset =[sample for sample in train_dataset if is_projective([-1] + [int(head) for head in sample['head']])]

# create embedding dictionary

emb_dictionary = create_dict(train_dataset)


print("Number of samples:")
print("Train:\t", len(train_dataset))
print("Dev:\t", len(dev_dataset))
print("Test:\t", len(test_dataset))

Found cached dataset universal_dependencies (C:/Users/roven/.cache/huggingface/datasets/universal_dependencies/en_lines/2.7.0/1ac001f0e8a0021f19388e810c94599f3ac13cc45d6b5b8c69f7847b2188bdf7)
Found cached dataset universal_dependencies (C:/Users/roven/.cache/huggingface/datasets/universal_dependencies/en_lines/2.7.0/1ac001f0e8a0021f19388e810c94599f3ac13cc45d6b5b8c69f7847b2188bdf7)
Found cached dataset universal_dependencies (C:/Users/roven/.cache/huggingface/datasets/universal_dependencies/en_lines/2.7.0/1ac001f0e8a0021f19388e810c94599f3ac13cc45d6b5b8c69f7847b2188bdf7)


Number of samples:
Train:	 2922
Dev:	 1032
Test:	 1035


In [18]:
def process_sample(sample, get_gold_path = False):

  # put sentence and gold tree in our format
  sentence = ["<ROOT>"] + sample["tokens"]
  gold = [-1] + [int(i) for i in sample["head"]]  #heads in the gold tree are strings, we convert them to int
  
  # embedding ids of sentence words
  enc_sentence = [emb_dictionary[word] if word in emb_dictionary else emb_dictionary["<unk>"] for word in sentence]

  # gold_path and gold_moves are parallel arrays whose elements refer to parsing steps
  gold_path = []   # record two topmost stack tokens and first buffer token for current step
  gold_moves = []  # contains oracle (canonical) move for current step: 0 is left, 1 right, 2 shift

  if get_gold_path:  # only for training
    parser = ArcStandard(sentence)
    oracle = Oracle(parser, gold)

    while not parser.is_tree_final():
      
      # save configuration
      configuration = [parser.stack[len(parser.stack)-2], parser.stack[len(parser.stack)-1]]
      if len(parser.buffer) == 0:
        configuration.append(-1)
      else:
        configuration.append(parser.buffer[0])  
      gold_path.append(configuration)

      # save gold move
      if oracle.is_left_arc_gold():  
        gold_moves.append(0)
        parser.left_arc()
      elif oracle.is_right_arc_gold():
        parser.right_arc()
        gold_moves.append(1)
      elif oracle.is_shift_gold():
        parser.shift()
        gold_moves.append(2)

  return enc_sentence, gold_path, gold_moves, gold

    # gold_path stores the configurations of the stack and the buffer
    # gold_moves stores the correct gold move at each configuration

    


In [19]:
def prepare_batch(batch_data, get_gold_path = False):
    data = [process_sample(s, get_gold_path = get_gold_path) for s in batch_data]

    sentences = [s[0] for s in data]
    paths = [s[1] for s in data]
    moves = [s[2] for s in data]
    trees = [s[3] for s in data]

    return sentences, paths, moves, trees

In [20]:
BATCH_SIZE = 32

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size= BATCH_SIZE, shuffle = True, collate_fn = partial(prepare_batch, get_gold_path = True))
dev_dataloader = torch.utils.data.DataLoader(dev_dataset, batch_size= BATCH_SIZE, shuffle = True, collate_fn = partial(prepare_batch))
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size= BATCH_SIZE, shuffle = True, collate_fn = partial(prepare_batch))

In [21]:
# define hyperparameters of NN
EMBEDDING_SIZE = 200
LSTM_SIZE = 200
LSTM_LAYERS = 1
MLP_SIZE = 200
DROPOUT = 0.2
EPOCHS = 15
LR = 0.001


In [None]:
"""
class Net(nn.Module):

    def __init__(self, device):
        super(Net, self).__init__()
        self.device = device
        self.embeddings = nn.Embedding(len(emb_dictionary), EMBEDDING_SIZE, padding_idx = emb_dictionary["<pad>"])

        # initialize bi-LSTM

        self.lstm = nn.LSTM(EMBEDDING_SIZE, LSTM_SIZE, num_layers = LSTM_LAYERS, bidirectional = True, dropout = DROPOUT)

        # initialize feedforward

        self.w1 = torch.nn.Linear(6*LSTM_SIZE, MLP_SIZE, bias=True)
        self.activation = torch.nn.Tanh()
        self.w2 = torch.nn.Linear(MLP_SIZE, 3, bias = True)
        self.softmax = torch.nn. Softmax(dim = -1)

        self.dropout = torch.nn.Dropout(DROPOUT)


    def forward(self, x, paths):
            # get the embeddings
            # x - sentence, index of the embeddings (batch of sentences in this case)
            # paths - parser configuration

        x = [self.dropout(self.embeddings(torch.tensor(i).to(self.device))) for i in x]

            # run bi-lstm
            # gets as input the embeddings and then as output we will have some embeddings that are contextualized

        h = self.lstm_pass(x)

            # for each parser configuration that we need to score we arrange from the
            # output of the bi - lstm the correct input for the feedforward
            # we have the sentence in contextualized representation and we want to get all the parser configuration;
            # 2 elemetns of the stack and 1 of the buffer, we will get these 3 embeddings, concatenate them together and then 
            #  pass them into feed forward (multilayer perceptron)

        mlp_input = self.get_mlp_input(paths, h)

            # run the feedforward and get the scores for each possible action
            # returns 3 float numbers 
        out = self.mlp(mlp_input)

        return out 
        

    def lstm_pass(self, x):
        x = torch.nn.utils.rnn.pack_sequence(x, enforce_sorted = False)
        h, (h_0, c_0) = self.lstm(x)
        h, h_sizes = torch.nn.utils.rnn.pad_packed_sequence(h)
        return h
        

    def get_mlp_input(self, configurations, h):
        mlp_input = []
        zero_tensor = torch.zeros(2*LSTM_SIZE, requires_grad=False).to(self.device)
        for i in range(len(configurations)):
            for j in configurations[i]:
                mlp_input.append(torch.cat([zero_tensor if j[0]==-1 else h[j[0]][i], zero_tensor if j[1]==-1 else h[j[1]][i], zero_tensor if j[2]==-1 else h[j[2]][i]]))
        mlp_input = torch.stack(mlp_input).to(self.device)
            
        return mlp_input
        
    def mlp(self, x):
        return self.softmax(self.w2(self.dropout(self.activation(self.w1(self.dropout(x))))))
        

        # we use this function at inference time. We run the parser and at each step 
        # we pick as next move the one with the highest score assigned by  the model
        # do a move, get a parser configuration and predict the next

    def infere(self, x):

        parsers = [ArcStandard(i) for i in x]

        x = [self.embeddings(torch.tensor(i).to(self.device)) for i in x]
            
        h = self.lstm_pass(x)

        while not self.parsed_all(parsers):

                # get the current configuration and score next move

            configurations = self.get_configurations(parsers)
            mlp_input = self.get_mlp_input(configurations, h)
            mlp_out = self.mlp(mlp_input)

                # take the next parsing step
                # this method will chose which one of the moves to do next; 
                # here also we add contraints in order to not take wrong actions

            self.parse_step(parsers, mlp_out)

                # return the predicted dependency tree

        return [parser.arcs for parser in parsers]
            
    def get_configurations(self, parsers):
        configurations = []

        for parser in parsers:
            if parser.is_tree_final():
                conf = [-1, -1, -1]
            else:
                conf = [parser.stack[len(parser.stack)-2], parser.stack[len(parser.stack)-1]]
                if len(parser.buffer) == 0:
                        conf.append(-1)
                else:
                    conf.append(parser.buffer[0])
            configurations.append([conf])
        return configurations

    def parsed_all(self, parsers):
        for parser in parsers:
            if not parser.is_tree_final():
                return False
        return True

              # In this function we select and perform the next move according to the scores obtained.
              # We need to be careful and select correct moves, e.g. don't do a shift if the buffer
              # is empty or a left arc if σ2 is the ROOT. For clarity sake we didn't implement
              # these checks in the parser so we must do them here. This renders the function quite ugly

    def parse_step(self, parsers, moves):

        moves_argm = moves.argmax(-1)
        for i in range(len(parsers)):
            if parsers[i].is_tree_final():
                continue
            else:
                if moves_argm[i] == 0:
                    if parsers[i].stack[len(parsers[i].stack)-2] != 0:
                        parsers[i].left_arc()
                    else:
                        if len(parsers[i].buffer) > 0:
                            parsers[i].shift()
                        else:
                            parsers[i].right_arc()
                elif moves_argm[i] ==1:
                    if parsers[i].stack[len(parsers[i].stack)-2] ==0 and len(parsers[i].buffer) >0:
                        parsers[i].shift()
                    else:
                        parsers[i].right_arc()
                elif moves_argm[i] == 2:
                    if len(parsers[i].buffer) > 0:
                        parsers[i].shift()
                    else:
                        if moves[i][0] > moves[i][1]:
                            if parsers[i].stack[len(parsers[i].stack)-2] != 0:

                                parsers[i].left_arc()
                            else:
                                parsers[i].right_arc()
                        else:
                            parsers[i].right_arc()

                                    
                

"""                            


In [23]:
class Net(nn.Module):

  def __init__(self, device):
    super(Net, self).__init__()
    self.device = device
    self.embeddings = nn.Embedding(len(emb_dictionary), EMBEDDING_SIZE, padding_idx=emb_dictionary["<pad>"])
    
    # initialize bi-LSTM
    self.lstm = nn.LSTM(EMBEDDING_SIZE, LSTM_SIZE, num_layers = LSTM_LAYERS, bidirectional=True, dropout=DROPOUT)

    # initialize feedforward
    self.w1 = torch.nn.Linear(6*LSTM_SIZE, MLP_SIZE, bias=True)
    self.activation = torch.nn.Tanh()
    self.w2 = torch.nn.Linear(MLP_SIZE, 3, bias=True)
    self.softmax = torch.nn.Softmax(dim=-1)

    self.dropout = torch.nn.Dropout(DROPOUT)
  
  
  def forward(self, x, paths):
    # get the embeddings 
    x = [self.dropout(self.embeddings(torch.tensor(i).to(self.device))) for i in x]

    # run the bi-lstm
    h = self.lstm_pass(x)

    # for each parser configuration that we need to score we arrange from the
    # output of the bi-lstm the correct input for the feedforward
    mlp_input = self.get_mlp_input(paths, h)

    # run the feedforward and get the scores for each possible action
    out = self.mlp(mlp_input)

    return out

  def lstm_pass(self, x):
    x = torch.nn.utils.rnn.pack_sequence(x, enforce_sorted=False)
    h, (h_0, c_0) = self.lstm(x)
    h, h_sizes = torch.nn.utils.rnn.pad_packed_sequence(h) # size h: (length_sentences, batch, output_hidden_units)
    return h

  def get_mlp_input(self, configurations, h):
    mlp_input = []
    zero_tensor = torch.zeros(2*LSTM_SIZE, requires_grad=False).to(self.device)
    for i in range(len(configurations)): # for every sentence in the batch
      for j in configurations[i]: # for each configuration of a sentence 
        mlp_input.append(torch.cat([zero_tensor if j[0]==-1 else h[j[0]][i], zero_tensor if j[1]==-1 else h[j[1]][i], zero_tensor if j[2]==-1 else h[j[2]][i]]))
    mlp_input = torch.stack(mlp_input).to(self.device)
    return mlp_input

  def mlp(self, x):
    return self.softmax(self.w2(self.dropout(self.activation(self.w1(self.dropout(x))))))

  # we use this function at inference time. We run the parser and at each step 
  # we pick as next move the one with the highest score assigned by the model
  def infere(self, x):

    parsers = [ArcStandard(i) for i in x]

    x = [self.embeddings(torch.tensor(i).to(self.device)) for i in x]

    h = self.lstm_pass(x)

    while not self.parsed_all(parsers):
      # get the current configuration and score next moves
      configurations = self.get_configurations(parsers)
      mlp_input = self.get_mlp_input(configurations, h)
      mlp_out = self.mlp(mlp_input)
      # take the next parsing step
      self.parse_step(parsers, mlp_out)

    # return the predicted dependency tree
    return [parser.arcs for parser in parsers]

  def get_configurations(self, parsers):
    configurations = []

    for parser in parsers:
      if parser.is_tree_final():
        conf = [-1, -1, -1]
      else:
        conf = [parser.stack[len(parser.stack)-2], parser.stack[len(parser.stack)-1]]
        if len(parser.buffer) == 0:
          conf.append(-1)
        else:
          conf.append(parser.buffer[0])  
      configurations.append([conf])

    return configurations

  def parsed_all(self, parsers):
    for parser in parsers:
      if not parser.is_tree_final():
        return False
    return True

  # In this function we select and perform the next move according to the scores obtained.
  # We need to be careful and select correct moves, e.g. don't do a shift if the buffer
  # is empty or a left arc if σ2 is the ROOT. For clarity sake we didn't implement
  # these checks in the parser so we must do them here. This renders the function quite ugly
  def parse_step(self, parsers, moves):
    moves_argm = moves.argmax(-1)
    for i in range(len(parsers)):
      if parsers[i].is_tree_final():
        continue
      else:
        if moves_argm[i] == 0:
          if parsers[i].stack[len(parsers[i].stack)-2] != 0:
            parsers[i].left_arc()
          else:
            if len(parsers[i].buffer) > 0:
              parsers[i].shift()
            else:
              parsers[i].right_arc()
        elif moves_argm[i] == 1:
          if parsers[i].stack[len(parsers[i].stack)-2] == 0 and len(parsers[i].buffer)>0:
            parsers[i].shift()
          else:
            parsers[i].right_arc()
        elif moves_argm[i] == 2:
          if len(parsers[i].buffer) > 0:
            parsers[i].shift()
          else:
            if moves[i][0] > moves[i][1]:
              if parsers[i].stack[len(parsers[i].stack)-2] != 0:
                parsers[i].left_arc()
              else:
                parsers[i].right_arc()
            else:
              parsers[i].right_arc()

In [28]:
def evaluate(gold, preds): 
  total = 0
  correct = 0

  for g, p in zip(gold, preds):
    for i in range(1,len(g)):
      total += 1
      if g[i] == p[i]:
        correct += 1

  return correct/total

In [29]:
def train(model, dataloader, criterion, optimizer):
  model.train()
  total_loss = 0
  count = 0

  for batch in dataloader:
    optimizer.zero_grad()
    sentences, paths, moves, trees = batch

    out = model(sentences, paths)
    labels = torch.tensor(sum(moves, [])).to(device) #sum(moves, []) flatten the array
    loss = criterion(out, labels)

    count +=1
    total_loss += loss.item()

    loss.backward()
    optimizer.step()
  
  return total_loss/count

def test(model, dataloader):
  model.eval()

  gold = []
  preds = []

  for batch in dataloader:
    sentences, paths, moves, trees = batch
    with torch.no_grad():
      pred = model.infere(sentences)

      gold += trees
      preds += pred
  
  return evaluate(gold, preds)

In [31]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)
model = Net(device)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)


for epoch in range(EPOCHS):
  avg_train_loss = train(model, train_dataloader, criterion, optimizer)
  val_uas = test(model, dev_dataloader)

  print("Epoch: {:3d} | avg_train_loss: {:5.3f} | dev_uas: {:5.3f} |".format( epoch, avg_train_loss, val_uas))

Device: cpu
Epoch:   0 | avg_train_loss: 0.828 | dev_uas: 0.590 |
Epoch:   1 | avg_train_loss: 0.738 | dev_uas: 0.646 |
Epoch:   2 | avg_train_loss: 0.716 | dev_uas: 0.672 |
Epoch:   3 | avg_train_loss: 0.704 | dev_uas: 0.685 |
Epoch:   4 | avg_train_loss: 0.693 | dev_uas: 0.693 |
Epoch:   5 | avg_train_loss: 0.685 | dev_uas: 0.703 |
Epoch:   6 | avg_train_loss: 0.678 | dev_uas: 0.709 |
Epoch:   7 | avg_train_loss: 0.673 | dev_uas: 0.708 |
Epoch:   8 | avg_train_loss: 0.669 | dev_uas: 0.715 |
Epoch:   9 | avg_train_loss: 0.665 | dev_uas: 0.719 |
Epoch:  10 | avg_train_loss: 0.662 | dev_uas: 0.718 |
Epoch:  11 | avg_train_loss: 0.658 | dev_uas: 0.722 |
Epoch:  12 | avg_train_loss: 0.653 | dev_uas: 0.730 |
Epoch:  13 | avg_train_loss: 0.649 | dev_uas: 0.731 |
Epoch:  14 | avg_train_loss: 0.646 | dev_uas: 0.733 |


In [32]:
test_uas = test(model, test_dataloader)
print("test_uas: {:5.3f}".format(test_uas))

test_uas: 0.736
