# Script Setup

In [None]:
# Mount drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Install necessary packages
!pip install -r drive/MyDrive/nlp_sp/env/requirements.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# Import block
import torch
import torch.nn as nn
from transformers import BertModel
from transformers import AutoTokenizer
from typing import Dict, List
import random
from tqdm import tqdm
import numpy as np
from numpy import logical_and, sum as t_sum
import pandas as pd
from typing import Dict, List
from sklearn.model_selection import train_test_split


In [None]:
# Device setup for CUDA

'''

Important: Every tensor, layer, and model needs to be sent to the same device using to()
Ex: 
  ten = torch.ones(4,5).to(device)

'''

# Get the best device to run on
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)


cuda


# Hyperparameters

## Define hyparameters near top to make changing them easier

In [None]:
# Number of training loops
epochs = 500

# Learning rate - should be very small when using Adam
LR = .0001

# Dropout probability
dropout_prob = 0.2

# Batch size
batch_size = 128

# Data Preprocessing

## Data Format

**Data:** `arguments-training/validation/testing.tsv`
(5220 arguments)
- Argument ID
- Conclusion 
- Stance (e.g., in favor, against)
- Premise (justification for conclusion)

**Labels:** `labels-training/validation/testing.tsv` 
(20 binary value labels per argument)
- Argument ID
- Self-direction: thought
- Self-direction: action
- Stimulation
- Hedonism
- Achievement
- Power: dominance
- Power: resources
- Face
- Security: personal
- Security: societal
- Tradition
- Conformity: rules
- Conformity: interpersonal
- Humility
- Benevolence: caring
- Benevolence: dependability
- Universalism: concern
- Universalism: nature
- Universalism: tolerance
- Universalism: objectivity

**Access:** https://doi.org/10.5281/zenodo.6814563

## Load Data

In [None]:
# training arguments
train_args_df = pd.read_csv('/content/drive/MyDrive/nlp_sp/data/arguments-training.tsv', sep='\t')
# view structure
train_args_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5220 entries, 0 to 5219
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Argument ID  5220 non-null   object
 1   Conclusion   5220 non-null   object
 2   Stance       5220 non-null   object
 3   Premise      5220 non-null   object
dtypes: object(4)
memory usage: 163.2+ KB


In [None]:
# training labels
train_labs_df = pd.read_csv('/content/drive/MyDrive/nlp_sp/data/labels-training.tsv', sep='\t')
# view structure
train_labs_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5220 entries, 0 to 5219
Data columns (total 21 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   Argument ID                 5220 non-null   object
 1   Self-direction: thought     5220 non-null   int64 
 2   Self-direction: action      5220 non-null   int64 
 3   Stimulation                 5220 non-null   int64 
 4   Hedonism                    5220 non-null   int64 
 5   Achievement                 5220 non-null   int64 
 6   Power: dominance            5220 non-null   int64 
 7   Power: resources            5220 non-null   int64 
 8   Face                        5220 non-null   int64 
 9   Security: personal          5220 non-null   int64 
 10  Security: societal          5220 non-null   int64 
 11  Tradition                   5220 non-null   int64 
 12  Conformity: rules           5220 non-null   int64 
 13  Conformity: interpersonal   5220 non-null   int6

## Data Prep

In [None]:
# convert multiple label columns to one label list column
train_labs_df['labels'] = train_labs_df.loc[:, 'Self-direction: thought':'Universalism: objectivity'].values.tolist()

In [None]:
# label distribution for full training data
print('Self-direction: thought =', sum(train_labs_df['Self-direction: thought']))
print('Self-direction: action =', sum(train_labs_df['Self-direction: action']))
print('Stimulation =', sum(train_labs_df['Stimulation']))
print('Hedonism =', sum(train_labs_df['Hedonism']))
print('Achievement = ', sum(train_labs_df['Achievement']))
print('Power: dominance =', sum(train_labs_df['Power: dominance']))
print('Power: resources =', sum(train_labs_df['Power: resources']))
print('Face =', sum(train_labs_df['Face']))
print('Security: personal =', sum(train_labs_df['Security: personal']))
print('Security: societal =', sum(train_labs_df['Security: societal']))
print('Tradition =', sum(train_labs_df['Tradition']))
print('Conformity: rules =', sum(train_labs_df['Conformity: rules']))
print('Conformity: interpersonal =', sum(train_labs_df['Conformity: interpersonal']))
print('Humility =', sum(train_labs_df['Humility']))
print('Benevolence: caring =', sum(train_labs_df['Benevolence: caring']))
print('Benevolence: dependability =', sum(train_labs_df['Benevolence: dependability']))
print('Universalism: concern =', sum(train_labs_df['Universalism: concern']))
print('Universalism: nature =', sum(train_labs_df['Universalism: nature']))
print('Universalism: tolerance =', sum(train_labs_df['Universalism: tolerance']))
print('Universalism: objectivity =', sum(train_labs_df['Universalism: objectivity']))

print('\nTotal number of samples = ', len(train_labs_df))

Self-direction: thought = 913
Self-direction: action = 1332
Stimulation = 312
Hedonism = 202
Achievement =  1400
Power: dominance = 461
Power: resources = 566
Face = 374
Security: personal = 1961
Security: societal = 1627
Tradition = 598
Conformity: rules = 1222
Conformity: interpersonal = 217
Humility = 438
Benevolence: caring = 1500
Benevolence: dependability = 766
Universalism: concern = 1992
Universalism: nature = 358
Universalism: tolerance = 709
Universalism: objectivity = 937

Total number of samples =  5220


In [None]:
# combine dfs to add label list to data dictionary
train_merged_df = pd.merge(train_args_df, train_labs_df, on='Argument ID')
train_merged_df = train_merged_df.drop(columns=['Self-direction: thought',
                                                'Self-direction: action',
                                                'Stimulation',
                                                'Hedonism',
                                                'Achievement',
                                                'Power: dominance',
                                                'Power: resources',
                                                'Face',
                                                'Security: personal',
                                                'Security: societal',
                                                'Tradition',
                                                'Conformity: rules',
                                                'Conformity: interpersonal',
                                                'Humility',
                                                'Benevolence: caring',
                                                'Benevolence: dependability',
                                                'Universalism: concern',
                                                'Universalism: nature',
                                                'Universalism: tolerance',
                                                'Universalism: objectivity'])

In [None]:
# view structure
train_merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5220 entries, 0 to 5219
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Argument ID  5220 non-null   object
 1   Conclusion   5220 non-null   object
 2   Stance       5220 non-null   object
 3   Premise      5220 non-null   object
 4   labels       5220 non-null   object
dtypes: object(5)
memory usage: 244.7+ KB


## Train/Val Split

In [None]:
# split train data into 80/20 train/val
train_data, val_data = train_test_split(train_merged_df, test_size=0.2, random_state=4)

In [None]:
# convert each row to a dictionary -> List[Dict]
train_data = train_data.to_dict(orient='records')
val_data = val_data.to_dict(orient='records')
full_data = train_merged_df.to_dict(orient='records')
# print examples
print('training example:\n', train_data[0])
print('validation example:\n', val_data[0])
print('full example:\n', full_data[0])

## Tokenization

In [None]:
# function to load samples from HuggingFace dataset to be batched and encoded

class BatchTokenizer:
    """Tokenizes and pads a batch of input sentences."""
    """HuggingFace docs: https://huggingface.co/transformers/v3.0.2/preprocessing.html"""

    def __init__(self):
        """Initializes the tokenizer

        Args:
            pad_symbol (Optional[str], optional): The symbol for a pad. Defaults to "<P>".
        """
        self.hf_tokenizer = AutoTokenizer.from_pretrained("prajjwal1/bert-small")
    
    # HuggingFace tokenizer will join data with sentence separator token
    # and match batches of tokenized and encoded sentences
    def get_sep_token(self,):
        return self.hf_tokenizer.sep_token

    # call method can only take a pair of inputs, but we have three
    # conclusion batch, stance batch, and premise batch
    # so we create a hack
    #def __call__(self, con_batch: List[str], stan_batch: List[str], prem_batch: List[str]) -> List[List[str]]:

    def __call__(self, con_stan_batch: List[str], prem_batch: List[str]) -> List[List[str]]:  
        """Uses the huggingface tokenizer to tokenize and pad a batch.

        We return a dictionary of tensors per the huggingface model specification.

        Args:
            batch (List[str]): A List of sentence strings

        Returns:
            Dict: The dictionary of token specifications provided by HuggingFace
        """
        # The HF tokenizer will PAD for us, and additionally combine 
        # the two sentences deimited by the [SEP] token.
        enc = self.hf_tokenizer(
            con_stan_batch,
            prem_batch,
            #stan_batch,
            #prem_batch,
            padding=True,
            return_token_type_ids=False, # ignore with hack
            return_tensors='pt'
        )

        return enc

In [None]:
# define tokenizer
tokenizer = BatchTokenizer()

In [None]:
# example of use case for batch tokenizer without triplet hack (only two input types acceptable)
token_ex = tokenizer(*[['this is the conclusion with more words', 'this is also a conclusion'], ['this is the premise', 'this is the second premise']])
print(f"{token_ex}\n")
tokenizer.hf_tokenizer.batch_decode(token_ex['input_ids'])

{'input_ids': tensor([[  101,  2023,  2003,  1996,  7091,  2007,  2062,  2616,   102,  2023,
          2003,  1996, 18458,   102],
        [  101,  2023,  2003,  2036,  1037,  7091,   102,  2023,  2003,  1996,
          2117, 18458,   102,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]])}



['[CLS] this is the conclusion with more words [SEP] this is the premise [SEP]',
 '[CLS] this is also a conclusion [SEP] this is the second premise [SEP] [PAD]']

In [None]:
# example of use case for batch tokenizer with triplet hack
token_ex2 = tokenizer(*[['this is the conclusion with more words [SEP] and a stance against', 'this is also a conclusion [SEP] with another stance that is in favor of'], ['this is the premise', 'this is the second premise']])
print(f"{token_ex2}\n")
tokenizer.hf_tokenizer.batch_decode(token_ex2['input_ids'])

{'input_ids': tensor([[  101,  2023,  2003,  1996,  7091,  2007,  2062,  2616,   102,  1998,
          1037, 11032,  2114,   102,  2023,  2003,  1996, 18458,   102,     0,
             0,     0],
        [  101,  2023,  2003,  2036,  1037,  7091,   102,  2007,  2178, 11032,
          2008,  2003,  1999,  5684,  1997,   102,  2023,  2003,  1996,  2117,
         18458,   102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}



['[CLS] this is the conclusion with more words [SEP] and a stance against [SEP] this is the premise [SEP] [PAD] [PAD] [PAD]',
 '[CLS] this is also a conclusion [SEP] with another stance that is in favor of [SEP] this is the second premise [SEP]']

## Batch

In [None]:
# function to generate triple-wise inputs

def generate_triplewise_input(dataset: List[Dict]) -> (List[str], List[str], List[str], List[str], List[List[int]]):
    """
    group all argument components and corresponding labels of the datapoints
    a datapoint is now a dictionary of 
    argument id, conclusion, stance, premise, and label list
    """

    # extract each observation from dictionary; save to list
    d_vals = []
    for i in range(len(dataset)):
        d_vals.append(list(dataset[i].values()))

    # store data items in lists by three categories by id
    id_lst = []    
    conclusion_lst = []
    stance_lst = []
    premise_lst = []

    # store labels in list of lists of 20 labels
    label_lst = []

    # generate separate lists from each observation
    for i in range(len(d_vals)):
        id_lst.append(d_vals[i][0])
        conclusion_lst.append(d_vals[i][1])
        stance_lst.append(d_vals[i][2])
        premise_lst.append(d_vals[i][3])
        label_lst.append(d_vals[i][4])

    # add [SEP] token before every stance in list
    stance_lst = [' [SEP] ' + s for s in stance_lst]

    return id_lst, conclusion_lst, stance_lst, premise_lst, label_lst

In [None]:
# apply function to generate triple-wise inputs and labels for batching

# training data
train_ids, train_conclusions, train_stances, train_premises, train_labels = generate_triplewise_input(train_data)

# validation data
val_ids, val_conclusions, val_stances, val_premises, val_labels = generate_triplewise_input(val_data)

# full data
full_ids, full_conclusions, full_stances, full_premises, full_labels = generate_triplewise_input(full_data)

In [None]:
# temporarily combine conclusions and stances separate with [SEP]
# use hack to merge tokenized conclusion batch, stance batch, and premise batch

# training data
train_conclusions_stances = []
for i in range(len(train_conclusions)):
  train_conclusions_stances.append(train_conclusions[i] + train_stances[i])

# validation data
val_conclusions_stances = []
for i in range(len(val_conclusions)):
  val_conclusions_stances.append(val_conclusions[i] + val_stances[i])  

In [None]:
# define functions to chunk data for batches

# for train labels
def chunk(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i: i+n]

# for train features
def chunk_multi(lst1, lst2, n):
    for i in range(0, len(lst1), n):
        yield lst1[i: i+n], lst2[i: i+n]

In [None]:
# apply function to batch input data 
# tokenize and encode simultaneously since we are using HuggingFace

# batch
train_input_batches = [b for b in chunk_multi(train_conclusions_stances, train_premises, batch_size)]
val_size = 1
val_input_batches = [b for b in chunk_multi(val_conclusions_stances, val_premises, val_size)]

# tokenize + encode
train_input_batches = [tokenizer(*batch).to(device) for batch in train_input_batches]
val_input_batches = [tokenizer(*batch).to(device) for batch in val_input_batches]

In [None]:
# check training data example
print(train_input_batches[0])
encoded_tst = tokenizer.hf_tokenizer.batch_decode(train_input_batches[0]['input_ids'])
encoded_tst[0]

{'input_ids': tensor([[  101,  2188, 29477,  ...,     0,     0,     0],
        [  101,  2057,  2323,  ...,     0,     0,     0],
        [  101,  2057,  2323,  ...,     0,     0,     0],
        ...,
        [  101,  2057,  2323,  ...,     0,     0,     0],
        [  101,  2057,  2323,  ...,     0,     0,     0],
        [  101,  2057,  2323,  ...,     0,     0,     0]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]], device='cuda:0')}


'[CLS] homeopathy brings more harm than good [SEP] against [SEP] homeopathy uses natural remedies that have little to no side affects on the body. [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'

In [None]:
# define function to batch class labels
# a single observation's label is a list of 20 labels

def encode_labels(labels: List[List[int]]) -> torch.FloatTensor:
    """Turns the batch of labels into a tensor

    Args:
        labels (List[List[int]]): List of all lists of labels in batch

    Returns:
        torch.FloatTensor: Tensor of all lists of labels in batch
    """
    
    return torch.LongTensor(labels)


In [None]:
# apply function to batch labels in same order as inputs
# batch
train_label_batches = [b for b in chunk(train_labels, batch_size)]
val_label_batches = [b for b in chunk(val_labels, val_size)]
# tokenize + encode
train_label_batches = [encode_labels(batch).to(device) for batch in train_label_batches]
val_label_batches = [encode_labels(batch).to(device) for batch in val_label_batches]

# Model

Below is the code to define our model as well as the training loop.

## Functions to Make Predictions

In [None]:
def make_prediction(logits: torch.Tensor) -> torch.Tensor:
  # This is equivalent to a threshold of 0.5
  return torch.round(logits)

def predict(model: torch.nn.Module, sents: torch.Tensor) -> List:
    logits = model(sents)
    return make_prediction(logits.cpu())

## Model Definition

In [None]:

# Function to initialize weights for the chain classifiers
def init_weights(layer):
    if isinstance(layer, nn.Linear):
        torch.nn.init.xavier_normal_(layer.weight)

class NLIClassifier(torch.nn.Module):
    def __init__(self, output_size: int, hidden_size: int, dropout_prob: float):
      
      # Basic initialization
      super().__init__()
      self.output_size = output_size
      self.hidden_size = hidden_size

      # Additional args
      self.dropout_prob = dropout_prob

      # Initialize BERT, which we use instead of a single embedding layer.
      self.bert = BertModel.from_pretrained("prajjwal1/bert-small").to(device)
      
      # Comment out these lines to unfreeze BERT params
      for param in self.bert.parameters():
          param.requires_grad = False
          
      # Get BERT's hiddem dim
      self.bert_hidden_dimension = self.bert.config.hidden_size
      
      
      # Single linear layer to project to hidden size
      self.hidden_layer = torch.nn.Linear(self.bert_hidden_dimension, self.hidden_size).to(device)
      
      # Use RELU regularization
      # TODO: Could try others
      self.relu = torch.nn.ReLU()

      '''

      We are doing multi-label classification using a chain classifier.
      For details, see: https://en.wikipedia.org/wiki/Multi-label_classification

      Setup a classifier chain for the 20 labels.
      To simplify code, just store them in a list and run through them sequentially.
      They will be interpreted in the same order as the training data:

      Self-direction: thought
      Self-direction: action
      Stimulation
      Hedonism
      Achievement
      Power: dominance
      Power: resources
      Face
      Security: personal
      Security: societal
      Tradition
      Conformity: rules
      Conformity: interpersonal
      Humility
      Benevolence: caring
      Benevolence: dependability
      Universalism: concern
      Universalism: nature
      Universalism: tolerance
      Universalism: objectivity

      '''

      self.chain = []
      for i in range(self.output_size):

        # To make it a chain, the prediction from the previous classifier is 
        # appended to the input and used as the input for the next classifier

        # Initialize each chain classifier
        # TODO: Try more layers per classifier
        # TODO: Could try bigger BERT model, but that would require more changes
        # TODO: Could unfreeze BERT weights
        
        # TODO: Hyperparameter tunings


        t = nn.Sequential(
            nn.Dropout(p=self.dropout_prob),
            nn.Linear(in_features=self.hidden_size + i, out_features = 1),
            #nn.LogSoftmax(dim=2)
            nn.Sigmoid()
        )
        self.chain.append(t.to(device))
        # Initialize the weights
        for c in self.chain:
          c.apply(init_weights)

    def encode_text(
        self,
        symbols: Dict
    ) -> torch.Tensor:
        """Use BERT to create contextulized embeddings and get the output 
            from the pooling layer (i.e. embedding for CLR)

        Args:
            symbols (Dict): The Dict of token specifications provided by the HuggingFace tokenizer

        Returns:
            torch.Tensor: Encoding of CLR for the given input
        """

        # Run through BERT for contextualized embeddings
        encoded_sequence = self.bert(**symbols)
        # TODO: Get the [CLS] token using the `pooler_output` from 
        #      The BertModel output. See here: https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertModel
        #      and check the returns for the forward method.
        # We want to return a tensor of the form batch_size x 1 x bert_hidden_dimension
        
        # Pooler output is initially (batch_size, bert_hidden_dimension)
        pool_out = torch.unsqueeze(encoded_sequence['pooler_output'], dim=1)
        return pool_out

    def forward(
        self,
        symbols: Dict,
    ) -> torch.Tensor:
        """_summary_

        Args:
            symbols (Dict): The Dict of token specifications provided by the HuggingFace tokenizer

        Returns:
            torch.Tensor: _description_
        """
        encoded_sents = self.encode_text(symbols)
        output = self.hidden_layer(encoded_sents)
        output = self.relu(output)
        
        # output is of size (batch_size, hidden_layer)

        # Run through the classifier chain

        cur_input = output
        logits = []

        for classifier in self.chain:

          # Get output of next in chain
          o = classifier(cur_input)

          # Save the logits for training
          logits.append(o)

          # Make a prediction so we can append it to the next input
          # TODO: Could also append raw logits, potentially
          pred = make_prediction(o)

          # Append the previous prediction to the input for the next classifier
          cur_input = torch.cat([cur_input, pred], dim=2)

        # Preds contains 20 tensors, each batch_size x 1 x 1
        # We need to return one tensor that is 128 x 20
        stack = logits[0].squeeze(dim=1)
        for logit in logits[1:]:
          stack = torch.cat([stack, logit.squeeze(dim=1)], dim=-1)
        
        return stack

## Evaluation

### Metric Functions

In [None]:
def precision(predicted_labels, true_labels):
    """
    Precision is True Positives / All Positives Predictions
    """

    # Each pred/true pair is a list of 20 values, so need to go one level deeper

    all_pos = 0
    true_pos = 0
    for i in range(len(predicted_labels)):
      cur_pred = predicted_labels[i]
      cur_true = true_labels[i]

      # Count both true_pos and false_pos
      all_pos += sum(cur_pred)

      # Get true_pos only
      for j in range(len(cur_pred)):
        if (cur_pred[j] == 1 and cur_pred[j] == cur_true[j]):
          true_pos += 1

    if all_pos:
        return true_pos/all_pos   
    else:
        return 0.


def recall(predicted_labels, true_labels, which_label=1):
    """
    Recall is True Positives / All Positive Labels
    """

    false_neg = 0
    true_pos = 0
    for i in range(len(predicted_labels)):
      cur_pred = predicted_labels[i]
      cur_true = true_labels[i]
    
      for j in range(len(cur_pred)):
        # Get true_pos
        if (cur_pred[j] == 1 and cur_pred[j] == cur_true[j]):
          true_pos += 1

        # Get false_neg
        if (cur_pred[j] == 0 and cur_true[j] == 1):
          false_neg += 1
      
    denom = false_neg + true_pos
    if denom:
        return true_pos/denom
    else:
        return 0.

def f1_score(
    predicted_labels: List[int],
    true_labels: List[int]
):
    """
    F1 score is the harmonic mean of precision and recall
    """
    P = precision(predicted_labels, true_labels)
    R = recall(predicted_labels, true_labels)
    if P and R:
        return 2*P*R/(P+R)
    else:
        return 0.


## Training Loop

In [None]:
def training_loop(
    num_epochs,
    train_features,
    train_labels,
    dev_features,
    dev_labels,
    optimizer,
    model,
    possible_labels
):
    print("Training...")
    dev_f1_scores = []
    #loss_func = torch.nn.BCEWithLogitsLoss()
    loss_func = torch.nn.BCELoss()

    # Send the data to the device first
    #train_features = train_features.to(device)
    #train_labels = train_labels.to(device)
    #dev_features = dev_features.to(device)
    #dev_labels = dev_labels.to(device)

    batches = list(zip(train_features, train_labels))
    random.shuffle(batches)
    for i in range(num_epochs):
        losses = []
        for features, labels in tqdm(batches):
            # Empty the dynamic computation graph
            optimizer.zero_grad()
            preds = model(features)
            loss = loss_func(preds, labels.float())
            
            # Backpropogate the loss through our model
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
        
        print(f"epoch {i}, loss: {sum(losses)/len(losses)}")
        # Estimate the f1 score for the development set
        print("Evaluating dev...")
        all_preds = []
        all_labels = []
        for sents, labels in tqdm(zip(dev_features, dev_labels), total=len(dev_features)):
            pred = predict(model, sents)
            all_preds.extend(pred.cpu().detach().numpy())
            all_labels.extend(list(labels.cpu().numpy()))
        dev_f1 = f1_score(all_preds, all_labels)
        print(f"Dev F1 {dev_f1}")
        dev_f1_scores.append(dev_f1)

    # Print the best dev_f1 score for result reporting
    print(f"Best dev F1 score: {np.max(dev_f1_scores)}")
    # Return the trained model
    return model

# Training Phase

## Setup

In [None]:
# Number of labels (should be 20)
possible_labels = len(train_labels[0])
if possible_labels != 20:
  raise RuntimeError(f"Instead of 20 possible labels, we found {possible_labels}.")

# Intialize model
model = NLIClassifier(output_size=possible_labels, hidden_size = 512, dropout_prob=dropout_prob)

# Optimizer
optimizer = torch.optim.AdamW(model.parameters(), LR)

Some weights of the model checkpoint at prajjwal1/bert-small were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## Train the Model

In [None]:
# Start the training
trained_model = training_loop(
    epochs,
    train_input_batches,
    train_label_batches,
    val_input_batches,
    val_label_batches,
    optimizer,
    model,
    list(range(possible_labels))
)


Training...


100%|██████████| 33/33 [00:03<00:00,  8.75it/s]


epoch 0, loss: 0.4985069587375178
Evaluating dev...


100%|██████████| 1044/1044 [00:15<00:00, 67.44it/s]


Dev F1 0.046758767268862904


100%|██████████| 33/33 [00:02<00:00, 12.41it/s]


epoch 1, loss: 0.4214862539912715
Evaluating dev...


100%|██████████| 1044/1044 [00:07<00:00, 131.21it/s]


Dev F1 0.06366047745358089


100%|██████████| 33/33 [00:02<00:00, 12.45it/s]


epoch 2, loss: 0.41345627651070105
Evaluating dev...


100%|██████████| 1044/1044 [00:07<00:00, 131.01it/s]


Dev F1 0.08558091286307054


100%|██████████| 33/33 [00:02<00:00, 12.29it/s]


epoch 3, loss: 0.4079286546418161
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.75it/s]


Dev F1 0.10805300713557593


100%|██████████| 33/33 [00:02<00:00, 12.39it/s]


epoch 4, loss: 0.4029262454220743
Evaluating dev...


100%|██████████| 1044/1044 [00:07<00:00, 130.69it/s]


Dev F1 0.14128256513026052


100%|██████████| 33/33 [00:02<00:00, 12.32it/s]


epoch 5, loss: 0.39754320816560224
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 129.19it/s]


Dev F1 0.1665847665847666


100%|██████████| 33/33 [00:02<00:00, 12.31it/s]


epoch 6, loss: 0.39438114563624066
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 108.52it/s]


Dev F1 0.1810719459198455


100%|██████████| 33/33 [00:02<00:00, 12.26it/s]


epoch 7, loss: 0.39152943997672107
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.63it/s]


Dev F1 0.2031063321385902


100%|██████████| 33/33 [00:02<00:00, 12.29it/s]


epoch 8, loss: 0.3876104842532765
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 112.33it/s]


Dev F1 0.22519352568613651


100%|██████████| 33/33 [00:02<00:00, 12.18it/s]


epoch 9, loss: 0.38560413501479407
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.43it/s]


Dev F1 0.23035921205098497


100%|██████████| 33/33 [00:02<00:00, 12.28it/s]


epoch 10, loss: 0.38200563102057483
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.86it/s]


Dev F1 0.2413078517154041


100%|██████████| 33/33 [00:02<00:00, 12.26it/s]


epoch 11, loss: 0.38048371040459833
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.17it/s]


Dev F1 0.2632532850022655


100%|██████████| 33/33 [00:02<00:00, 12.13it/s]


epoch 12, loss: 0.37790492899490125
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 130.33it/s]


Dev F1 0.26642582975841045


100%|██████████| 33/33 [00:02<00:00, 12.09it/s]


epoch 13, loss: 0.37702171549652563
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.28it/s]


Dev F1 0.2692220969560316


100%|██████████| 33/33 [00:02<00:00, 12.15it/s]


epoch 14, loss: 0.3745832551609386
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 129.20it/s]


Dev F1 0.2721760610824164


100%|██████████| 33/33 [00:02<00:00, 12.06it/s]


epoch 15, loss: 0.37356207316572015
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 129.88it/s]


Dev F1 0.2896703296703297


100%|██████████| 33/33 [00:02<00:00, 12.06it/s]


epoch 16, loss: 0.37126524791573035
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 130.10it/s]


Dev F1 0.296602787456446


100%|██████████| 33/33 [00:02<00:00, 12.12it/s]


epoch 17, loss: 0.3701308478008617
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 130.38it/s]


Dev F1 0.303968600087222


100%|██████████| 33/33 [00:02<00:00, 12.13it/s]


epoch 18, loss: 0.3690056800842285
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 129.46it/s]


Dev F1 0.30504036657211436


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 19, loss: 0.36741204604958044
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 108.50it/s]


Dev F1 0.3184549356223176


100%|██████████| 33/33 [00:02<00:00, 12.06it/s]


epoch 20, loss: 0.3661130495143659
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.98it/s]


Dev F1 0.3168998923573735


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 21, loss: 0.3655563141360427
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.95it/s]


Dev F1 0.3220878421387651


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 22, loss: 0.3644835271618583
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.19it/s]


Dev F1 0.32427807486631016


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 23, loss: 0.363489135648265
Evaluating dev...


100%|██████████| 1044/1044 [00:07<00:00, 131.66it/s]


Dev F1 0.33163913595933925


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 24, loss: 0.3624410873109644
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.73it/s]


Dev F1 0.3357848287455348


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 25, loss: 0.3617501276912111
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 129.10it/s]


Dev F1 0.3397489539748954


100%|██████████| 33/33 [00:02<00:00, 12.07it/s]


epoch 26, loss: 0.36099656603553076
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 130.45it/s]


Dev F1 0.3494252873563218


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 27, loss: 0.3603721423582597
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.35it/s]


Dev F1 0.3439197659841204


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 28, loss: 0.3589041874264226
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.72it/s]


Dev F1 0.3416579223504722


100%|██████████| 33/33 [00:02<00:00, 12.06it/s]


epoch 29, loss: 0.35888070771188446
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.31it/s]


Dev F1 0.3622112211221122


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 30, loss: 0.35707135092128406
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.76it/s]


Dev F1 0.35362559734053606


100%|██████████| 33/33 [00:02<00:00, 12.03it/s]


epoch 31, loss: 0.35651467514760565
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.27it/s]


Dev F1 0.3653885585400862


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 32, loss: 0.35585467381910846
Evaluating dev...


100%|██████████| 1044/1044 [00:10<00:00, 103.39it/s]


Dev F1 0.36236647493837304


100%|██████████| 33/33 [00:02<00:00, 12.03it/s]


epoch 33, loss: 0.3550825353824731
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.23it/s]


Dev F1 0.3623721881390593


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 34, loss: 0.3548500926205606
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.98it/s]


Dev F1 0.36928104575163395


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 35, loss: 0.3543651040756341
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 129.57it/s]


Dev F1 0.37410659587502554


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 36, loss: 0.35417783982826
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 129.98it/s]


Dev F1 0.36911102007374025


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 37, loss: 0.35298238107652374
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 130.35it/s]


Dev F1 0.3828396322778345


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 38, loss: 0.35195429848902154
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.60it/s]


Dev F1 0.3723101908241982


100%|██████████| 33/33 [00:02<00:00, 12.01it/s]


epoch 39, loss: 0.352446765610666
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 130.02it/s]


Dev F1 0.3782467532467533


100%|██████████| 33/33 [00:02<00:00, 12.05it/s]


epoch 40, loss: 0.3500712044311292
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.28it/s]


Dev F1 0.37872944996955554


100%|██████████| 33/33 [00:02<00:00, 12.02it/s]


epoch 41, loss: 0.3511228281440157
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.78it/s]


Dev F1 0.38307349665924273


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 42, loss: 0.3499434870300871
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.09it/s]


Dev F1 0.3834737264340152


100%|██████████| 33/33 [00:02<00:00, 11.87it/s]


epoch 43, loss: 0.34996917934128735
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.83it/s]


Dev F1 0.3800282999797857


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 44, loss: 0.34975688836791297
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.16it/s]


Dev F1 0.3813338674143801


100%|██████████| 33/33 [00:02<00:00, 11.83it/s]


epoch 45, loss: 0.34925304849942523
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 117.48it/s]


Dev F1 0.3828502415458937


100%|██████████| 33/33 [00:02<00:00, 12.09it/s]


epoch 46, loss: 0.3491837201696454
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.18it/s]


Dev F1 0.39233685891039716


100%|██████████| 33/33 [00:02<00:00, 11.87it/s]


epoch 47, loss: 0.34790261496197095
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.93it/s]


Dev F1 0.39992041384799043


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 48, loss: 0.3477435933821129
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.54it/s]


Dev F1 0.39242273180458626


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 49, loss: 0.3474328996557178
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.22it/s]


Dev F1 0.38740725887307


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 50, loss: 0.34663076382694824
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.23it/s]


Dev F1 0.39205561072492556


100%|██████████| 33/33 [00:02<00:00, 11.87it/s]


epoch 51, loss: 0.3470975467652986
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.89it/s]


Dev F1 0.3879824910465579


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 52, loss: 0.3461264922763362
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.31it/s]


Dev F1 0.4034844585230647


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 53, loss: 0.34596794933983777
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.32it/s]


Dev F1 0.3940906368536634


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 54, loss: 0.345256715109854
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.77it/s]


Dev F1 0.39999999999999997


100%|██████████| 33/33 [00:02<00:00, 11.83it/s]


epoch 55, loss: 0.3448001742362976
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.91it/s]


Dev F1 0.3978643464504647


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 56, loss: 0.34527880856485077
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.29it/s]


Dev F1 0.40229885057471265


100%|██████████| 33/33 [00:02<00:00, 11.83it/s]


epoch 57, loss: 0.34390345938277966
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 105.83it/s]


Dev F1 0.41243604879968515


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 58, loss: 0.34414211096185626
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.88it/s]


Dev F1 0.4053469628464715


100%|██████████| 33/33 [00:02<00:00, 12.02it/s]


epoch 59, loss: 0.34346476468172943
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.46it/s]


Dev F1 0.41231855629658687


100%|██████████| 33/33 [00:02<00:00, 12.01it/s]


epoch 60, loss: 0.3431023052244475
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.26it/s]


Dev F1 0.40464475496949426


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 61, loss: 0.34298154892343463
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.90it/s]


Dev F1 0.41302216120808


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 62, loss: 0.34304608991651825
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.37it/s]


Dev F1 0.4191780821917808


100%|██████████| 33/33 [00:02<00:00, 12.03it/s]


epoch 63, loss: 0.34173302126653265
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.45it/s]


Dev F1 0.4098842910374583


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 64, loss: 0.3416642284754551
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.36it/s]


Dev F1 0.410487184504011


100%|██████████| 33/33 [00:02<00:00, 12.00it/s]


epoch 65, loss: 0.3414507715991049
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.53it/s]


Dev F1 0.4088836010311323


100%|██████████| 33/33 [00:02<00:00, 12.03it/s]


epoch 66, loss: 0.34053115591858374
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.71it/s]


Dev F1 0.41611263199061405


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 67, loss: 0.34034002007860126
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.36it/s]


Dev F1 0.42493688094775683


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 68, loss: 0.34032065760005603
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.33it/s]


Dev F1 0.4162077104642014


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 69, loss: 0.34002093474070233
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.68it/s]


Dev F1 0.4053469628464715


100%|██████████| 33/33 [00:02<00:00, 12.00it/s]


epoch 70, loss: 0.34020863699190546
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 109.30it/s]


Dev F1 0.42025019546520714


100%|██████████| 33/33 [00:02<00:00, 12.09it/s]


epoch 71, loss: 0.3402295211950938
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.05it/s]


Dev F1 0.41441441441441446


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 72, loss: 0.3389114827820749
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.29it/s]


Dev F1 0.4208471598672653


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 73, loss: 0.33945228746443085
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.75it/s]


Dev F1 0.41934854690852347


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 74, loss: 0.33822477586341626
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.34it/s]


Dev F1 0.4141572154868987


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 75, loss: 0.33809518452846643
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.05it/s]


Dev F1 0.42686221536086455


100%|██████████| 33/33 [00:02<00:00, 11.87it/s]


epoch 76, loss: 0.3379388243863077
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.53it/s]


Dev F1 0.41483198146002315


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 77, loss: 0.3378073690515576
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.33it/s]


Dev F1 0.42341993020550606


100%|██████████| 33/33 [00:02<00:00, 11.87it/s]


epoch 78, loss: 0.33841071887449786
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.55it/s]


Dev F1 0.4264905806952806


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 79, loss: 0.33610608090053906
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.58it/s]


Dev F1 0.41452991452991456


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 80, loss: 0.33740703626112506
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.80it/s]


Dev F1 0.42318059299191374


100%|██████████| 33/33 [00:02<00:00, 12.07it/s]


epoch 81, loss: 0.33719829176411487
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.87it/s]


Dev F1 0.4226545944904643


100%|██████████| 33/33 [00:02<00:00, 12.00it/s]


epoch 82, loss: 0.33641459905739984
Evaluating dev...


100%|██████████| 1044/1044 [00:10<00:00, 102.18it/s]


Dev F1 0.4206656346749226


100%|██████████| 33/33 [00:02<00:00, 12.11it/s]


epoch 83, loss: 0.3366579521786083
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.29it/s]


Dev F1 0.4118558698179


100%|██████████| 33/33 [00:02<00:00, 11.86it/s]


epoch 84, loss: 0.3358940662759723
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.00it/s]


Dev F1 0.41842461776659573


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 85, loss: 0.33548175204883923
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.32it/s]


Dev F1 0.4276875483372003


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 86, loss: 0.33499375888795563
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.33it/s]


Dev F1 0.4256717572008506


100%|██████████| 33/33 [00:02<00:00, 11.85it/s]


epoch 87, loss: 0.33611855543021
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.69it/s]


Dev F1 0.427057236461746


100%|██████████| 33/33 [00:02<00:00, 12.00it/s]


epoch 88, loss: 0.33494845123002026
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.71it/s]


Dev F1 0.41867704280155643


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 89, loss: 0.33440013094381854
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.05it/s]


Dev F1 0.43268302105466483


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 90, loss: 0.3351319233576457
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.98it/s]


Dev F1 0.42958150200649725


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 91, loss: 0.3342065594413064
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.99it/s]


Dev F1 0.4257120862201694


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 92, loss: 0.33517803387208417
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.29it/s]


Dev F1 0.43031123139377536


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 93, loss: 0.33384846196030127
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.77it/s]


Dev F1 0.4279123414071511


100%|██████████| 33/33 [00:02<00:00, 12.03it/s]


epoch 94, loss: 0.33428375377799524
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.76it/s]


Dev F1 0.42178447276940906


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 95, loss: 0.3338188375487472
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.90it/s]


Dev F1 0.431144683323649


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 96, loss: 0.3330563514521628
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.29it/s]


Dev F1 0.4256548536209553


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 97, loss: 0.3333391122745745
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.78it/s]


Dev F1 0.42826718296224586


100%|██████████| 33/33 [00:02<00:00, 11.88it/s]


epoch 98, loss: 0.33288035609505395
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.53it/s]


Dev F1 0.4279946164199192


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 99, loss: 0.33173786329500604
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.05it/s]


Dev F1 0.4344184275652009


100%|██████████| 33/33 [00:02<00:00, 12.01it/s]


epoch 100, loss: 0.33271431110121985
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.86it/s]


Dev F1 0.4244631901840491


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 101, loss: 0.3319327641617168
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.48it/s]


Dev F1 0.42730085073472546


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 102, loss: 0.33162727229522937
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.53it/s]


Dev F1 0.4336283185840708


100%|██████████| 33/33 [00:02<00:00, 12.01it/s]


epoch 103, loss: 0.33197676954847394
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.22it/s]


Dev F1 0.43289877300613494


100%|██████████| 33/33 [00:02<00:00, 12.08it/s]


epoch 104, loss: 0.3318538214221145
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.90it/s]


Dev F1 0.4282968089196463


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 105, loss: 0.33062772768916504
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.05it/s]


Dev F1 0.43533002670736365


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 106, loss: 0.3316392076737953
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.48it/s]


Dev F1 0.4345999618101967


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 107, loss: 0.3319193323453267
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 104.53it/s]


Dev F1 0.4320610687022901


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 108, loss: 0.3321977167418509
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.82it/s]


Dev F1 0.43957300800609994


100%|██████████| 33/33 [00:02<00:00, 11.85it/s]


epoch 109, loss: 0.3304568953586347
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.39it/s]


Dev F1 0.4289596273291925


100%|██████████| 33/33 [00:02<00:00, 11.88it/s]


epoch 110, loss: 0.3307604636206771
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.22it/s]


Dev F1 0.4400381315538608


100%|██████████| 33/33 [00:02<00:00, 12.00it/s]


epoch 111, loss: 0.33055316047234967
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.88it/s]


Dev F1 0.43532763532763535


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 112, loss: 0.3295923083117514
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.98it/s]


Dev F1 0.4375


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 113, loss: 0.33018348433754663
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.75it/s]


Dev F1 0.4375


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 114, loss: 0.3301721955790664
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.91it/s]


Dev F1 0.43999236786872736


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 115, loss: 0.3297214833172885
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 129.49it/s]


Dev F1 0.43712918660287087


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 116, loss: 0.33005099314631836
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.95it/s]


Dev F1 0.43757115749525616


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 117, loss: 0.32902855945355963
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 129.06it/s]


Dev F1 0.44154370034052215


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 118, loss: 0.3286743462085724
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.89it/s]


Dev F1 0.435658620034214


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 119, loss: 0.32893378174666205
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.36it/s]


Dev F1 0.4408764639214205


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 120, loss: 0.3285727220954317
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 109.78it/s]


Dev F1 0.4392364392364393


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 121, loss: 0.3283942224401416
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.89it/s]


Dev F1 0.43614182234083115


100%|██████████| 33/33 [00:02<00:00, 11.88it/s]


epoch 122, loss: 0.32843012791691406
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.46it/s]


Dev F1 0.4414843006660324


100%|██████████| 33/33 [00:02<00:00, 12.06it/s]


epoch 123, loss: 0.32813692634755914
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.20it/s]


Dev F1 0.4396437369717643


100%|██████████| 33/33 [00:02<00:00, 12.02it/s]


epoch 124, loss: 0.3276678838513114
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.82it/s]


Dev F1 0.4292237442922374


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 125, loss: 0.32820091644922894
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.38it/s]


Dev F1 0.4444861919969942


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 126, loss: 0.327138268586361
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 129.82it/s]


Dev F1 0.4468447981807845


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 127, loss: 0.3272845040668141
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.49it/s]


Dev F1 0.44214955058328553


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 128, loss: 0.3272854911558556
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.01it/s]


Dev F1 0.43678598629093685


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 129, loss: 0.32679922291726776
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.45it/s]


Dev F1 0.44224924012158057


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 130, loss: 0.32745480266484345
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.31it/s]


Dev F1 0.44776119402985076


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 131, loss: 0.3265242576599121
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.24it/s]


Dev F1 0.43829868395956517


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 132, loss: 0.3273255806980711
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 106.37it/s]


Dev F1 0.4446546830652791


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 133, loss: 0.3264305194218953
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.79it/s]


Dev F1 0.4395979518300777


100%|██████████| 33/33 [00:02<00:00, 12.00it/s]


epoch 134, loss: 0.32624893929019116
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.32it/s]


Dev F1 0.44516498188060266


100%|██████████| 33/33 [00:02<00:00, 12.00it/s]


epoch 135, loss: 0.32623274759812787
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.25it/s]


Dev F1 0.44177761110069375


100%|██████████| 33/33 [00:02<00:00, 12.05it/s]


epoch 136, loss: 0.32565236723784247
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.06it/s]


Dev F1 0.448340874811463


100%|██████████| 33/33 [00:02<00:00, 12.12it/s]


epoch 137, loss: 0.3251833166136886
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.50it/s]


Dev F1 0.4389782403027436


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 138, loss: 0.32570733536373486
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.75it/s]


Dev F1 0.45214770158251694


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 139, loss: 0.3245590925216675
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.42it/s]


Dev F1 0.4403634986747444


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 140, loss: 0.3255855432062438
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.97it/s]


Dev F1 0.4433422357878672


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 141, loss: 0.32542876492847095
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.02it/s]


Dev F1 0.4429454749859471


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 142, loss: 0.32478845300096454
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 129.36it/s]


Dev F1 0.4493109307154994


100%|██████████| 33/33 [00:02<00:00, 11.87it/s]


epoch 143, loss: 0.32484091411937366
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.09it/s]


Dev F1 0.4536004536004537


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 144, loss: 0.3239058418707414
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 117.23it/s]


Dev F1 0.45217064461567374


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 145, loss: 0.3240972703153437
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 113.25it/s]


Dev F1 0.45406626506024095


100%|██████████| 33/33 [00:02<00:00, 11.85it/s]


epoch 146, loss: 0.3251915989500104
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 118.35it/s]


Dev F1 0.4517709118311982


100%|██████████| 33/33 [00:02<00:00, 11.87it/s]


epoch 147, loss: 0.3243917127450307
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 117.82it/s]


Dev F1 0.4542193694544082


100%|██████████| 33/33 [00:02<00:00, 11.82it/s]


epoch 148, loss: 0.32340939478440717
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 118.43it/s]


Dev F1 0.4493957703927493


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 149, loss: 0.32358029213818634
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 118.25it/s]


Dev F1 0.4540520306943665


100%|██████████| 33/33 [00:02<00:00, 12.07it/s]


epoch 150, loss: 0.3235279493259661
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 117.34it/s]


Dev F1 0.45514141224948496


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 151, loss: 0.32357147426316235
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.17it/s]


Dev F1 0.452837279218339


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 152, loss: 0.3237964050336318
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.64it/s]


Dev F1 0.4572713643178411


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 153, loss: 0.3235437075297038
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 116.34it/s]


Dev F1 0.4483864880166069


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 154, loss: 0.3229430209506642
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 117.26it/s]


Dev F1 0.448971892095831


100%|██████████| 33/33 [00:02<00:00, 11.87it/s]


epoch 155, loss: 0.32252041589130054
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.57it/s]


Dev F1 0.44591278081933167


100%|██████████| 33/33 [00:02<00:00, 11.78it/s]


epoch 156, loss: 0.3223165436224504
Evaluating dev...


100%|██████████| 1044/1044 [00:10<00:00, 99.65it/s]


Dev F1 0.4550065752395266


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 157, loss: 0.32228348020351294
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 117.02it/s]


Dev F1 0.452000751455946


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 158, loss: 0.3224896156426632
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 118.85it/s]


Dev F1 0.44993399962285496


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 159, loss: 0.3223616300207196
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 118.43it/s]


Dev F1 0.46044244469441314


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 160, loss: 0.3224610993356416
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 118.00it/s]


Dev F1 0.45522667665792427


100%|██████████| 33/33 [00:02<00:00, 11.83it/s]


epoch 161, loss: 0.3217494614196546
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 116.84it/s]


Dev F1 0.4529499626587005


100%|██████████| 33/33 [00:02<00:00, 11.84it/s]


epoch 162, loss: 0.32228508681961987
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 116.21it/s]


Dev F1 0.45399924896733007


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 163, loss: 0.32154271548444574
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.35it/s]


Dev F1 0.46156695684386


100%|██████████| 33/33 [00:02<00:00, 12.08it/s]


epoch 164, loss: 0.3212822404774753
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.83it/s]


Dev F1 0.451867142052918


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 165, loss: 0.32159154704122833
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.60it/s]


Dev F1 0.44985029940119764


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 166, loss: 0.3206418852011363
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.57it/s]


Dev F1 0.4487107095802748


100%|██████████| 33/33 [00:02<00:00, 12.10it/s]


epoch 167, loss: 0.32187306429400586
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.43it/s]


Dev F1 0.4487658937920719


100%|██████████| 33/33 [00:02<00:00, 12.10it/s]


epoch 168, loss: 0.321374877835765
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 112.36it/s]


Dev F1 0.4630213160333642


100%|██████████| 33/33 [00:02<00:00, 12.13it/s]


epoch 169, loss: 0.32055299300135986
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.16it/s]


Dev F1 0.45328849028400603


100%|██████████| 33/33 [00:02<00:00, 12.05it/s]


epoch 170, loss: 0.32080305525750824
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.10it/s]


Dev F1 0.4503532911863146


100%|██████████| 33/33 [00:02<00:00, 12.05it/s]


epoch 171, loss: 0.32053841605330957
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.68it/s]


Dev F1 0.4498412105361479


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 172, loss: 0.3202318016326789
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.48it/s]


Dev F1 0.4512514008218155


100%|██████████| 33/33 [00:02<00:00, 11.84it/s]


epoch 173, loss: 0.32030676440759137
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.85it/s]


Dev F1 0.4568565596585637


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 174, loss: 0.32026927669843036
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.49it/s]


Dev F1 0.44310247216455934


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 175, loss: 0.3202290986523484
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.91it/s]


Dev F1 0.45134730538922163


100%|██████████| 33/33 [00:02<00:00, 11.87it/s]


epoch 176, loss: 0.31971569675387757
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.17it/s]


Dev F1 0.4557291666666667


100%|██████████| 33/33 [00:02<00:00, 12.01it/s]


epoch 177, loss: 0.31976072174130066
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.77it/s]


Dev F1 0.4492780798799925


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 178, loss: 0.31994860370953876
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.83it/s]


Dev F1 0.4549034314644665


100%|██████████| 33/33 [00:02<00:00, 12.03it/s]


epoch 179, loss: 0.3196916390549053
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.34it/s]


Dev F1 0.45731365589389117


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 180, loss: 0.32007401730075025
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.17it/s]


Dev F1 0.45561139028475706


100%|██████████| 33/33 [00:02<00:00, 11.86it/s]


epoch 181, loss: 0.3189333334113612
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 113.83it/s]


Dev F1 0.45857275254865615


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 182, loss: 0.31997954845428467
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.44it/s]


Dev F1 0.4619402985074627


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 183, loss: 0.3196895185745124
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.87it/s]


Dev F1 0.4630560208449656


100%|██████████| 33/33 [00:02<00:00, 11.80it/s]


epoch 184, loss: 0.3188174646912199
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.79it/s]


Dev F1 0.46010439970171507


100%|██████████| 33/33 [00:02<00:00, 11.85it/s]


epoch 185, loss: 0.3196441305406166
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.97it/s]


Dev F1 0.45796667290769516


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 186, loss: 0.31837654294389667
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.03it/s]


Dev F1 0.46017372019959346


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 187, loss: 0.3185749433257363
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.21it/s]


Dev F1 0.4601604777010636


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 188, loss: 0.31797437144048285
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.63it/s]


Dev F1 0.46173848439821685


100%|██████████| 33/33 [00:02<00:00, 12.05it/s]


epoch 189, loss: 0.31776937029578467
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.75it/s]


Dev F1 0.45172155688622756


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 190, loss: 0.3184802098707719
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.14it/s]


Dev F1 0.45707873573966706


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 191, loss: 0.3179428279399872
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.96it/s]


Dev F1 0.46128195628010377


100%|██████████| 33/33 [00:02<00:00, 12.00it/s]


epoch 192, loss: 0.31921834205136157
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.90it/s]


Dev F1 0.4614243323442137


100%|██████████| 33/33 [00:02<00:00, 12.05it/s]


epoch 193, loss: 0.31765287843617523
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 104.90it/s]


Dev F1 0.45800671892497197


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 194, loss: 0.3173502069531065
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.60it/s]


Dev F1 0.46362286562732


100%|██████████| 33/33 [00:02<00:00, 12.02it/s]


epoch 195, loss: 0.31798617586945044
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.97it/s]


Dev F1 0.4635687732342007


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 196, loss: 0.3171639144420624
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 128.23it/s]


Dev F1 0.46416002963511765


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 197, loss: 0.31724098324775696
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.15it/s]


Dev F1 0.457921253965292


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 198, loss: 0.31684290820902045
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.59it/s]


Dev F1 0.45845697329376844


100%|██████████| 33/33 [00:02<00:00, 12.03it/s]


epoch 199, loss: 0.31662185896526684
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.11it/s]


Dev F1 0.46050919903363685


100%|██████████| 33/33 [00:02<00:00, 12.00it/s]


epoch 200, loss: 0.316792604598132
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.88it/s]


Dev F1 0.46331471135940416


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 201, loss: 0.3173447413878007
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.65it/s]


Dev F1 0.4582020389249304


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 202, loss: 0.3171371618906657
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.08it/s]


Dev F1 0.4650304484222182


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 203, loss: 0.316567549199769
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.43it/s]


Dev F1 0.46281296023564067


100%|██████████| 33/33 [00:02<00:00, 12.05it/s]


epoch 204, loss: 0.3158595869035432
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.33it/s]


Dev F1 0.46042899408284027


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 205, loss: 0.3166579378373695
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 104.95it/s]


Dev F1 0.4576239476145931


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 206, loss: 0.3160427375273271
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 117.77it/s]


Dev F1 0.4592730661696179


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 207, loss: 0.31607488520217664
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.38it/s]


Dev F1 0.46190651453809345


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 208, loss: 0.31567532365972345
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.43it/s]


Dev F1 0.4675516224188791


100%|██████████| 33/33 [00:02<00:00, 11.85it/s]


epoch 209, loss: 0.31579297600370465
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.87it/s]


Dev F1 0.45628364581399666


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 210, loss: 0.3161553451509187
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 127.67it/s]


Dev F1 0.46781905112173594


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 211, loss: 0.31474897265434265
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.28it/s]


Dev F1 0.46486686390532544


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 212, loss: 0.31475000670461944
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 126.04it/s]


Dev F1 0.4689223979404193


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 213, loss: 0.3163280613494642
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.64it/s]


Dev F1 0.4585185185185185


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 214, loss: 0.3157447459119739
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.65it/s]


Dev F1 0.4630650496141125


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 215, loss: 0.3153669825105956
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.52it/s]


Dev F1 0.4621380846325167


100%|██████████| 33/33 [00:02<00:00, 12.09it/s]


epoch 216, loss: 0.3150852882500851
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.62it/s]


Dev F1 0.4562638991845811


100%|██████████| 33/33 [00:02<00:00, 11.88it/s]


epoch 217, loss: 0.31493927944790234
Evaluating dev...


100%|██████████| 1044/1044 [00:10<00:00, 102.10it/s]


Dev F1 0.4643712023568404


100%|██████████| 33/33 [00:02<00:00, 12.02it/s]


epoch 218, loss: 0.3148750355749419
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.68it/s]


Dev F1 0.4587973273942093


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 219, loss: 0.31467444427085645
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.84it/s]


Dev F1 0.4615384615384616


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 220, loss: 0.31398901072415436
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.20it/s]


Dev F1 0.46978879706152427


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 221, loss: 0.31469610965613165
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.77it/s]


Dev F1 0.4640354112873478


100%|██████████| 33/33 [00:02<00:00, 11.81it/s]


epoch 222, loss: 0.31431120814699115
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.88it/s]


Dev F1 0.46550777676120764


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 223, loss: 0.31462550163269043
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.40it/s]


Dev F1 0.462819089900111


100%|██████████| 33/33 [00:02<00:00, 11.84it/s]


epoch 224, loss: 0.31424747362281336
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.28it/s]


Dev F1 0.4645494830132939


100%|██████████| 33/33 [00:02<00:00, 12.03it/s]


epoch 225, loss: 0.31372156377994653
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.59it/s]


Dev F1 0.4633920296570899


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 226, loss: 0.31436512686989526
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.31it/s]


Dev F1 0.4639764142251704


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 227, loss: 0.3136114315553145
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.92it/s]


Dev F1 0.46299037749814953


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 228, loss: 0.3134249241063089
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.41it/s]


Dev F1 0.46448288553551714


100%|██████████| 33/33 [00:02<00:00, 12.11it/s]


epoch 229, loss: 0.3139420675508904
Evaluating dev...


100%|██████████| 1044/1044 [00:10<00:00, 101.58it/s]


Dev F1 0.4619354838709677


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 230, loss: 0.31302931904792786
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.60it/s]


Dev F1 0.474663718444813


100%|██████████| 33/33 [00:02<00:00, 12.01it/s]


epoch 231, loss: 0.31397057302070386
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.50it/s]


Dev F1 0.4699372925119882


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 232, loss: 0.31238873438401654
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.52it/s]


Dev F1 0.47278785634950016


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 233, loss: 0.3131673444401134
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.56it/s]


Dev F1 0.4595744680851063


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 234, loss: 0.31305635156053485
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.89it/s]


Dev F1 0.47329650092081027


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 235, loss: 0.31287204045237915
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.30it/s]


Dev F1 0.469481836621796


100%|██████████| 33/33 [00:02<00:00, 12.03it/s]


epoch 236, loss: 0.3137800133589542
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.57it/s]


Dev F1 0.46719645167251894


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 237, loss: 0.3131573127977776
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.68it/s]


Dev F1 0.4801907556859868


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 238, loss: 0.31286767666990106
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.53it/s]


Dev F1 0.4679675994108984


100%|██████████| 33/33 [00:02<00:00, 12.02it/s]


epoch 239, loss: 0.31240388931650104
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.62it/s]


Dev F1 0.46321224414530704


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 240, loss: 0.3126806652907169
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.85it/s]


Dev F1 0.46476540938362465


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 241, loss: 0.3128959672017531
Evaluating dev...


100%|██████████| 1044/1044 [00:10<00:00, 100.71it/s]


Dev F1 0.47329919531821507


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 242, loss: 0.3118066065239184
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 118.59it/s]


Dev F1 0.46622123236822566


100%|██████████| 33/33 [00:02<00:00, 12.02it/s]


epoch 243, loss: 0.3115957361279112
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.67it/s]


Dev F1 0.4743330266789328


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 244, loss: 0.31158301053625165
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.56it/s]


Dev F1 0.4725073313782991


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 245, loss: 0.3115179213610562
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 118.80it/s]


Dev F1 0.4704810495626822


100%|██████████| 33/33 [00:02<00:00, 12.03it/s]


epoch 246, loss: 0.3113424787015626
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.57it/s]


Dev F1 0.463559008509064


100%|██████████| 33/33 [00:02<00:00, 11.81it/s]


epoch 247, loss: 0.31208020900235034
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.28it/s]


Dev F1 0.4707834101382488


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 248, loss: 0.3115587261590091
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.33it/s]


Dev F1 0.4771929824561404


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 249, loss: 0.3109033089695555
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.07it/s]


Dev F1 0.4741552511415525


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 250, loss: 0.31045644933527167
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.07it/s]


Dev F1 0.4722575950518464


100%|██████████| 33/33 [00:02<00:00, 12.02it/s]


epoch 251, loss: 0.3110494568492427
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.00it/s]


Dev F1 0.4639326254119371


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 252, loss: 0.31162941455841064
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.93it/s]


Dev F1 0.4653301453007173


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 253, loss: 0.31068868799643085
Evaluating dev...


100%|██████████| 1044/1044 [00:10<00:00, 100.20it/s]


Dev F1 0.4704370179948586


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 254, loss: 0.31096404700568225
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.20it/s]


Dev F1 0.46581905288372955


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 255, loss: 0.31130536036057904
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.54it/s]


Dev F1 0.47330185252451873


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 256, loss: 0.3105715341640241
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.26it/s]


Dev F1 0.46955245781364635


100%|██████████| 33/33 [00:02<00:00, 11.84it/s]


epoch 257, loss: 0.31068343285358313
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.19it/s]


Dev F1 0.46620132255694346


100%|██████████| 33/33 [00:02<00:00, 12.01it/s]


epoch 258, loss: 0.30945742852760083
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.20it/s]


Dev F1 0.4697166141878126


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 259, loss: 0.3114446076479825
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.44it/s]


Dev F1 0.46607994132746605


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 260, loss: 0.310232448758501
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.29it/s]


Dev F1 0.46961932650073207


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 261, loss: 0.30909408583785547
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.23it/s]


Dev F1 0.46500549651887146


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 262, loss: 0.3106887439886729
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.41it/s]


Dev F1 0.474090407938258


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 263, loss: 0.30960299932595453
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.91it/s]


Dev F1 0.47591133907309036


100%|██████████| 33/33 [00:02<00:00, 12.03it/s]


epoch 264, loss: 0.3102182727871519
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.82it/s]


Dev F1 0.47283300018171903


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 265, loss: 0.3097716055133126
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 105.01it/s]


Dev F1 0.4719224437534297


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 266, loss: 0.30995293458302814
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.96it/s]


Dev F1 0.4670218629432299


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 267, loss: 0.31025200720989343
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.29it/s]


Dev F1 0.468892537858055


100%|██████████| 33/33 [00:02<00:00, 11.85it/s]


epoch 268, loss: 0.3093428566600337
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.17it/s]


Dev F1 0.46303788157410813


100%|██████████| 33/33 [00:02<00:00, 12.00it/s]


epoch 269, loss: 0.3100936512152354
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.02it/s]


Dev F1 0.470609711573567


100%|██████████| 33/33 [00:02<00:00, 11.86it/s]


epoch 270, loss: 0.3097130764614452
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.93it/s]


Dev F1 0.47222222222222215


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 271, loss: 0.3088805747754646
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.88it/s]


Dev F1 0.4696608615948671


100%|██████████| 33/33 [00:02<00:00, 12.03it/s]


epoch 272, loss: 0.30975379546483356
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.24it/s]


Dev F1 0.464167585446527


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 273, loss: 0.30885985222729767
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.74it/s]


Dev F1 0.46795224977043154


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 274, loss: 0.3084008034431573
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.65it/s]


Dev F1 0.47451411807847443


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 275, loss: 0.30863970969662524
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.62it/s]


Dev F1 0.46698459280997795


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 276, loss: 0.3089233329801848
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 114.13it/s]


Dev F1 0.4706959706959707


100%|██████████| 33/33 [00:02<00:00, 11.78it/s]


epoch 277, loss: 0.30855881987196027
Evaluating dev...


100%|██████████| 1044/1044 [00:10<00:00, 95.59it/s] 


Dev F1 0.4751280175566935


100%|██████████| 33/33 [00:02<00:00, 11.79it/s]


epoch 278, loss: 0.3093204380887927
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 112.97it/s]


Dev F1 0.46912222833058453


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 279, loss: 0.30932697924700653
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 112.44it/s]


Dev F1 0.47206551410373065


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 280, loss: 0.30857006618470856
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 113.19it/s]


Dev F1 0.46961932650073207


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 281, loss: 0.30913112019047595
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 111.27it/s]


Dev F1 0.47166028795334425


100%|██████████| 33/33 [00:02<00:00, 11.86it/s]


epoch 282, loss: 0.30867917158386926
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 111.33it/s]


Dev F1 0.4697495887406324


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 283, loss: 0.30754213802742236
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 112.85it/s]


Dev F1 0.47549644744033526


100%|██████████| 33/33 [00:02<00:00, 12.03it/s]


epoch 284, loss: 0.30784461444074457
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 112.07it/s]


Dev F1 0.48121387283237


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 285, loss: 0.30807155099782074
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 114.48it/s]


Dev F1 0.47389412617839016


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 286, loss: 0.30770142963438324
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 112.88it/s]


Dev F1 0.46487679293858036


100%|██████████| 33/33 [00:02<00:00, 11.75it/s]


epoch 287, loss: 0.30843252214518463
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 113.12it/s]


Dev F1 0.47220190197512796


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 288, loss: 0.30695270227663446
Evaluating dev...


100%|██████████| 1044/1044 [00:11<00:00, 93.87it/s] 


Dev F1 0.4783788673783245


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 289, loss: 0.30727261304855347
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 111.07it/s]


Dev F1 0.46628697409516817


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 290, loss: 0.308094937692989
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 113.34it/s]


Dev F1 0.4791096515234447


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 291, loss: 0.30635396007335547
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 113.60it/s]


Dev F1 0.4688070047427946


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 292, loss: 0.3076594219063268
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 114.97it/s]


Dev F1 0.4689203925845147


100%|██████████| 33/33 [00:02<00:00, 11.83it/s]


epoch 293, loss: 0.307592485890244
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 114.11it/s]


Dev F1 0.4699433583044034


100%|██████████| 33/33 [00:02<00:00, 11.86it/s]


epoch 294, loss: 0.3079592630718694
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 113.30it/s]


Dev F1 0.46902332361516036


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 295, loss: 0.3072267093441703
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 113.70it/s]


Dev F1 0.4716221164408641


100%|██████████| 33/33 [00:02<00:00, 12.02it/s]


epoch 296, loss: 0.3066342987797477
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.83it/s]


Dev F1 0.4763636363636364


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 297, loss: 0.30772611227902497
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.58it/s]


Dev F1 0.4772519641878312


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 298, loss: 0.30679363102623913
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.43it/s]


Dev F1 0.4723746798390048


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 299, loss: 0.30663454713243427
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.29it/s]


Dev F1 0.46994535519125674


100%|██████████| 33/33 [00:02<00:00, 12.00it/s]


epoch 300, loss: 0.3070494406151049
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 113.62it/s]


Dev F1 0.4752475247524752


100%|██████████| 33/33 [00:02<00:00, 12.07it/s]


epoch 301, loss: 0.3068511260278297
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.73it/s]


Dev F1 0.47209047792776354


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 302, loss: 0.3064288677591266
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.78it/s]


Dev F1 0.472420743998534


100%|██████████| 33/33 [00:02<00:00, 12.06it/s]


epoch 303, loss: 0.30609146844257007
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.05it/s]


Dev F1 0.4774217042971595


100%|██████████| 33/33 [00:02<00:00, 12.03it/s]


epoch 304, loss: 0.30678431193033856
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.83it/s]


Dev F1 0.474950262253572


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 305, loss: 0.3061187980753003
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.27it/s]


Dev F1 0.4670680532749499


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 306, loss: 0.3060105190132604
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.89it/s]


Dev F1 0.4727537816657554


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 307, loss: 0.30662797346259607
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.61it/s]


Dev F1 0.4715888526963446


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 308, loss: 0.3052127785754926
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.97it/s]


Dev F1 0.470030971032975


100%|██████████| 33/33 [00:02<00:00, 11.84it/s]


epoch 309, loss: 0.30538875406438654
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.29it/s]


Dev F1 0.4728739002932551


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 310, loss: 0.30531749671155756
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.29it/s]


Dev F1 0.4775956284153006


100%|██████████| 33/33 [00:02<00:00, 11.82it/s]


epoch 311, loss: 0.3054332859588392
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.81it/s]


Dev F1 0.474668603595424


100%|██████████| 33/33 [00:02<00:00, 12.07it/s]


epoch 312, loss: 0.3061535159746806
Evaluating dev...


100%|██████████| 1044/1044 [00:10<00:00, 103.77it/s]


Dev F1 0.47078099399235385


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 313, loss: 0.30558098175308923
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.41it/s]


Dev F1 0.47246111619396153


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 314, loss: 0.3055098806366776
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.93it/s]


Dev F1 0.4803192454199166


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 315, loss: 0.30627228726040234
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.13it/s]


Dev F1 0.4783956244302644


100%|██████████| 33/33 [00:02<00:00, 11.85it/s]


epoch 316, loss: 0.30513110756874084
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.63it/s]


Dev F1 0.47242948134667884


100%|██████████| 33/33 [00:02<00:00, 12.00it/s]


epoch 317, loss: 0.3054074256709128
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.58it/s]


Dev F1 0.4775413711583925


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 318, loss: 0.3055660950415062
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.98it/s]


Dev F1 0.47629388346000723


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 319, loss: 0.3048432568709056
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.15it/s]


Dev F1 0.47770355659866404


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 320, loss: 0.3048815140218446
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.58it/s]


Dev F1 0.4743682310469314


100%|██████████| 33/33 [00:02<00:00, 12.08it/s]


epoch 321, loss: 0.30525623848944
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.71it/s]


Dev F1 0.4749041795948166


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 322, loss: 0.30432413112033496
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.20it/s]


Dev F1 0.4738955823293172


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 323, loss: 0.30526746944947675
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.48it/s]


Dev F1 0.4801017626749046


100%|██████████| 33/33 [00:02<00:00, 12.01it/s]


epoch 324, loss: 0.30454100623275293
Evaluating dev...


100%|██████████| 1044/1044 [00:10<00:00, 101.23it/s]


Dev F1 0.47177050977526036


100%|██████████| 33/33 [00:02<00:00, 11.86it/s]


epoch 325, loss: 0.3055251394257401
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 113.25it/s]


Dev F1 0.47386507505878095


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 326, loss: 0.3043397339907559
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 116.70it/s]


Dev F1 0.4715743440233236


100%|██████████| 33/33 [00:02<00:00, 12.01it/s]


epoch 327, loss: 0.30457927602710144
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 118.15it/s]


Dev F1 0.47793717087343385


100%|██████████| 33/33 [00:02<00:00, 11.83it/s]


epoch 328, loss: 0.30429485891804553
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.07it/s]


Dev F1 0.47242948134667884


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 329, loss: 0.30457411661292566
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.28it/s]


Dev F1 0.47003440159333687


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 330, loss: 0.30405352512995404
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.62it/s]


Dev F1 0.47829232995658466


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 331, loss: 0.3035468242385171
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.42it/s]


Dev F1 0.4804772234273319


100%|██████████| 33/33 [00:02<00:00, 12.08it/s]


epoch 332, loss: 0.3041600991379131
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.98it/s]


Dev F1 0.4794470716624228


100%|██████████| 33/33 [00:02<00:00, 11.86it/s]


epoch 333, loss: 0.30402749596220074
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.16it/s]


Dev F1 0.4747419880499728


100%|██████████| 33/33 [00:02<00:00, 12.01it/s]


epoch 334, loss: 0.30326547225316364
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.79it/s]


Dev F1 0.47956600361663654


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 335, loss: 0.30409980181491736
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.52it/s]


Dev F1 0.48161630870040034


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 336, loss: 0.3028857617667227
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 108.16it/s]


Dev F1 0.4732272069464544


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 337, loss: 0.3042851243958329
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.19it/s]


Dev F1 0.47269439421338166


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 338, loss: 0.30378945487918274
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.22it/s]


Dev F1 0.47785039941902685


100%|██████████| 33/33 [00:02<00:00, 11.86it/s]


epoch 339, loss: 0.30318100795601355
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.55it/s]


Dev F1 0.4713399050748448


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 340, loss: 0.3038318997079676
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.06it/s]


Dev F1 0.4739507959479016


100%|██████████| 33/33 [00:02<00:00, 11.88it/s]


epoch 341, loss: 0.30287121642719617
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.98it/s]


Dev F1 0.47731397459165154


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 342, loss: 0.30377695777199487
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.69it/s]


Dev F1 0.47541576283441794


100%|██████████| 33/33 [00:02<00:00, 11.84it/s]


epoch 343, loss: 0.30326646747011127
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.96it/s]


Dev F1 0.476535604276137


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 344, loss: 0.3027743823600538
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.74it/s]


Dev F1 0.4814814814814814


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 345, loss: 0.3025868634382884
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.39it/s]


Dev F1 0.47786647314949204


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 346, loss: 0.3035535965905045
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.29it/s]


Dev F1 0.4708658558722091


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 347, loss: 0.30358884822238574
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.91it/s]


Dev F1 0.47989812625068223


100%|██████████| 33/33 [00:02<00:00, 11.72it/s]


epoch 348, loss: 0.30238882700602215
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 109.40it/s]


Dev F1 0.4800876872488126


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 349, loss: 0.30287277698516846
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.80it/s]


Dev F1 0.47681159420289854


100%|██████████| 33/33 [00:02<00:00, 11.87it/s]


epoch 350, loss: 0.30295435226324835
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.17it/s]


Dev F1 0.4756894049346879


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 351, loss: 0.30292765751029505
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.43it/s]


Dev F1 0.47073837739288965


100%|██████████| 33/33 [00:02<00:00, 11.87it/s]


epoch 352, loss: 0.3018663263682163
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.79it/s]


Dev F1 0.4801738185768605


100%|██████████| 33/33 [00:02<00:00, 12.01it/s]


epoch 353, loss: 0.3029215660962192
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.00it/s]


Dev F1 0.4711678832116788


100%|██████████| 33/33 [00:02<00:00, 11.88it/s]


epoch 354, loss: 0.3024729472218138
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.87it/s]


Dev F1 0.47430542945342297


100%|██████████| 33/33 [00:02<00:00, 12.02it/s]


epoch 355, loss: 0.3026015984289574
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.67it/s]


Dev F1 0.4807239819004525


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 356, loss: 0.30199217706015613
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 118.81it/s]


Dev F1 0.4756697648988518


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 357, loss: 0.3023976349469387
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.85it/s]


Dev F1 0.4763636363636364


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 358, loss: 0.30167708523345715
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.31it/s]


Dev F1 0.4800580130529369


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 359, loss: 0.302796971617323
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 116.31it/s]


Dev F1 0.4747053490480508


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 360, loss: 0.3016887867089474
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 116.99it/s]


Dev F1 0.48141465175027065


100%|██████████| 33/33 [00:02<00:00, 12.01it/s]


epoch 361, loss: 0.3018531654820298
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 118.05it/s]


Dev F1 0.48268398268398255


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 362, loss: 0.3016550829916289
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.09it/s]


Dev F1 0.47593390804597707


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 363, loss: 0.3014639751477675
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.01it/s]


Dev F1 0.473967684021544


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 364, loss: 0.30233797972852533
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.25it/s]


Dev F1 0.48122743682310465


100%|██████████| 33/33 [00:02<00:00, 11.86it/s]


epoch 365, loss: 0.3010410287163474
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.33it/s]


Dev F1 0.49045045045045055


100%|██████████| 33/33 [00:02<00:00, 12.01it/s]


epoch 366, loss: 0.3024333038113334
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.57it/s]


Dev F1 0.4803761982275276


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 367, loss: 0.30172899094494904
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.87it/s]


Dev F1 0.4877785765636233


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 368, loss: 0.30115782672708685
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.82it/s]


Dev F1 0.4813008130081301


100%|██████████| 33/33 [00:02<00:00, 12.02it/s]


epoch 369, loss: 0.3010909512187495
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.79it/s]


Dev F1 0.4776931447225245


100%|██████████| 33/33 [00:02<00:00, 11.85it/s]


epoch 370, loss: 0.30182901295748626
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.27it/s]


Dev F1 0.4839115585115944


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 371, loss: 0.3007178776191943
Evaluating dev...


100%|██████████| 1044/1044 [00:10<00:00, 104.34it/s]


Dev F1 0.47606961566352435


100%|██████████| 33/33 [00:02<00:00, 11.85it/s]


epoch 372, loss: 0.3014057937896613
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 118.55it/s]


Dev F1 0.4854054054054054


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 373, loss: 0.2997034688790639
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.98it/s]


Dev F1 0.4745517116464409


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 374, loss: 0.3006159428394202
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.10it/s]


Dev F1 0.48428237830070053


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 375, loss: 0.3004235742670117
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.06it/s]


Dev F1 0.47854309412188956


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 376, loss: 0.30016757863940613
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.82it/s]


Dev F1 0.4747474747474748


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 377, loss: 0.3003292815251784
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.08it/s]


Dev F1 0.48399348887683113


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 378, loss: 0.3003807248491229
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.56it/s]


Dev F1 0.47380822911002357


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 379, loss: 0.30105925057873584
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.78it/s]


Dev F1 0.4777576853526221


100%|██████████| 33/33 [00:02<00:00, 11.85it/s]


epoch 380, loss: 0.30044583389253326
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.05it/s]


Dev F1 0.4836789900811542


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 381, loss: 0.3010334444768501
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.37it/s]


Dev F1 0.482610254571531


100%|██████████| 33/33 [00:02<00:00, 12.02it/s]


epoch 382, loss: 0.3001248466246056
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.24it/s]


Dev F1 0.4756894049346879


100%|██████████| 33/33 [00:02<00:00, 12.06it/s]


epoch 383, loss: 0.30013419823213056
Evaluating dev...


100%|██████████| 1044/1044 [00:10<00:00, 100.94it/s]


Dev F1 0.4777093149691917


100%|██████████| 33/33 [00:02<00:00, 12.05it/s]


epoch 384, loss: 0.2998793134183595
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.87it/s]


Dev F1 0.4830079537237889


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 385, loss: 0.29986806800871185
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.25it/s]


Dev F1 0.48267148014440436


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 386, loss: 0.30010195211930707
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.04it/s]


Dev F1 0.48314606741573035


100%|██████████| 33/33 [00:02<00:00, 11.87it/s]


epoch 387, loss: 0.29949653961441736
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.39it/s]


Dev F1 0.48211396728383965


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 388, loss: 0.29924676815668744
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.62it/s]


Dev F1 0.48326736236056134


100%|██████████| 33/33 [00:02<00:00, 11.85it/s]


epoch 389, loss: 0.30001978892268555
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.24it/s]


Dev F1 0.48275862068965514


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 390, loss: 0.30004962556289905
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.65it/s]


Dev F1 0.48461816865725665


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 391, loss: 0.2990092508720629
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.21it/s]


Dev F1 0.482956584140653


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 392, loss: 0.2991700298858411
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.84it/s]


Dev F1 0.4801457194899818


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 393, loss: 0.2990103056936553
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.64it/s]


Dev F1 0.4818279956819


100%|██████████| 33/33 [00:02<00:00, 12.06it/s]


epoch 394, loss: 0.2991357517964912
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.98it/s]


Dev F1 0.4827958926319581


100%|██████████| 33/33 [00:02<00:00, 12.00it/s]


epoch 395, loss: 0.29895891565265076
Evaluating dev...


100%|██████████| 1044/1044 [00:10<00:00, 100.86it/s]


Dev F1 0.49161406672678093


100%|██████████| 33/33 [00:02<00:00, 12.07it/s]


epoch 396, loss: 0.2989708826397405
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.72it/s]


Dev F1 0.4876565295169947


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 397, loss: 0.2986068138570497
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.28it/s]


Dev F1 0.48273368287832213


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 398, loss: 0.2995179096857707
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.85it/s]


Dev F1 0.47917421224194123


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 399, loss: 0.2991073348305442
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.90it/s]


Dev F1 0.4797418429544639


100%|██████████| 33/33 [00:02<00:00, 11.82it/s]


epoch 400, loss: 0.298854519923528
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 117.95it/s]


Dev F1 0.48831355627472134


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 401, loss: 0.29868900414669153
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 111.42it/s]


Dev F1 0.47805585781646714


100%|██████████| 33/33 [00:02<00:00, 12.00it/s]


epoch 402, loss: 0.298426833116647
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 111.60it/s]


Dev F1 0.48129496402877703


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 403, loss: 0.29822857452161383
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 111.66it/s]


Dev F1 0.4852411807055435


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 404, loss: 0.29908103653878876
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 112.45it/s]


Dev F1 0.48059216465065885


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 405, loss: 0.2988112081180919
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 113.70it/s]


Dev F1 0.4809037116729424


100%|██████████| 33/33 [00:02<00:00, 12.00it/s]


epoch 406, loss: 0.29870588580767315
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 110.28it/s]


Dev F1 0.4955148905633297


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 407, loss: 0.2985824743906657
Evaluating dev...


100%|██████████| 1044/1044 [00:10<00:00, 103.06it/s]


Dev F1 0.48250991705733853


100%|██████████| 33/33 [00:02<00:00, 12.06it/s]


epoch 408, loss: 0.2981290437958457
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 113.50it/s]


Dev F1 0.48030221262817047


100%|██████████| 33/33 [00:02<00:00, 12.07it/s]


epoch 409, loss: 0.2976944816834999
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 111.07it/s]


Dev F1 0.485054591014856


100%|██████████| 33/33 [00:02<00:00, 11.88it/s]


epoch 410, loss: 0.2982686758041382
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 113.23it/s]


Dev F1 0.4782843755631646


100%|██████████| 33/33 [00:02<00:00, 11.88it/s]


epoch 411, loss: 0.29822869553710474
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 112.86it/s]


Dev F1 0.48126232741617353


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 412, loss: 0.29831992225213483
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 112.54it/s]


Dev F1 0.48564722874165


100%|██████████| 33/33 [00:02<00:00, 12.02it/s]


epoch 413, loss: 0.2976191955985445
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 113.52it/s]


Dev F1 0.4815415090941833


100%|██████████| 33/33 [00:02<00:00, 12.05it/s]


epoch 414, loss: 0.29766935471332434
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 114.55it/s]


Dev F1 0.47935368043087967


100%|██████████| 33/33 [00:02<00:00, 12.01it/s]


epoch 415, loss: 0.2980257046945167
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 116.59it/s]


Dev F1 0.4848158131176999


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 416, loss: 0.29748386596188403
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 115.71it/s]


Dev F1 0.48506657070888815


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 417, loss: 0.29788251266335
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 116.56it/s]


Dev F1 0.48661275831087153


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 418, loss: 0.2971879072261579
Evaluating dev...


100%|██████████| 1044/1044 [00:10<00:00, 96.43it/s] 


Dev F1 0.4830813534917206


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 419, loss: 0.29829534075476904
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 112.29it/s]


Dev F1 0.4857908847184987


100%|██████████| 33/33 [00:02<00:00, 11.85it/s]


epoch 420, loss: 0.29712873065110407
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 112.08it/s]


Dev F1 0.4856988667026443


100%|██████████| 33/33 [00:02<00:00, 11.86it/s]


epoch 421, loss: 0.2973596652348836
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 112.94it/s]


Dev F1 0.4834377797672336


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 422, loss: 0.29682858062512946
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 112.32it/s]


Dev F1 0.48293208767517065


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 423, loss: 0.29838034781542694
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 111.96it/s]


Dev F1 0.4843438914027149


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 424, loss: 0.2968236921411572
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 115.94it/s]


Dev F1 0.48475062791532114


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 425, loss: 0.2969247436884678
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.46it/s]


Dev F1 0.4871194379391101


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 426, loss: 0.2973731078884818
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.29it/s]


Dev F1 0.4849684968496849


100%|██████████| 33/33 [00:02<00:00, 11.84it/s]


epoch 427, loss: 0.29779715249032684
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.82it/s]


Dev F1 0.486437937848033


100%|██████████| 33/33 [00:02<00:00, 12.06it/s]


epoch 428, loss: 0.2963564630710717
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.59it/s]


Dev F1 0.48267193391991375


100%|██████████| 33/33 [00:02<00:00, 12.10it/s]


epoch 429, loss: 0.2967840947888114
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 116.68it/s]


Dev F1 0.47877274348911886


100%|██████████| 33/33 [00:02<00:00, 11.87it/s]


epoch 430, loss: 0.29654414003545587
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 118.36it/s]


Dev F1 0.48108980103961285


100%|██████████| 33/33 [00:02<00:00, 12.05it/s]


epoch 431, loss: 0.29739585518836975
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.87it/s]


Dev F1 0.48637015781922527


100%|██████████| 33/33 [00:02<00:00, 12.02it/s]


epoch 432, loss: 0.2972537965485544
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.43it/s]


Dev F1 0.4836694627877923


100%|██████████| 33/33 [00:02<00:00, 12.07it/s]


epoch 433, loss: 0.29678670926527545
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.81it/s]


Dev F1 0.47749369823550586


100%|██████████| 33/33 [00:02<00:00, 11.88it/s]


epoch 434, loss: 0.2966893667524511
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.13it/s]


Dev F1 0.48122743682310465


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 435, loss: 0.2960495190186934
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.78it/s]


Dev F1 0.4815547957531043


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 436, loss: 0.2969022239699508
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.07it/s]


Dev F1 0.48501070663811563


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 437, loss: 0.2959396414684527
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.12it/s]


Dev F1 0.48704103671706256


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 438, loss: 0.29618022748918243
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.08it/s]


Dev F1 0.47862939585211906


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 439, loss: 0.29638034014990833
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.57it/s]


Dev F1 0.48060616994407357


100%|██████████| 33/33 [00:02<00:00, 12.03it/s]


epoch 440, loss: 0.2960452058098533
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.13it/s]


Dev F1 0.48981779206859594


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 441, loss: 0.2966432715907241
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 111.54it/s]


Dev F1 0.4896564130239251


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 442, loss: 0.2960784001783891
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.57it/s]


Dev F1 0.4853420195439739


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 443, loss: 0.2963845341494589
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.98it/s]


Dev F1 0.48512173128944985


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 444, loss: 0.29695700966950617
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.39it/s]


Dev F1 0.48260947918543884


100%|██████████| 33/33 [00:02<00:00, 11.88it/s]


epoch 445, loss: 0.2953539888064067
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.89it/s]


Dev F1 0.4879971336438553


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 446, loss: 0.29587470311107056
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.73it/s]


Dev F1 0.4875157176216994


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 447, loss: 0.29588849526463135
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.67it/s]


Dev F1 0.4815874150875938


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 448, loss: 0.295431499228333
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.83it/s]


Dev F1 0.4883720930232559


100%|██████████| 33/33 [00:02<00:00, 11.89it/s]


epoch 449, loss: 0.295308813904271
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.56it/s]


Dev F1 0.4867877044760021


100%|██████████| 33/33 [00:02<00:00, 12.02it/s]


epoch 450, loss: 0.2959705618294803
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.60it/s]


Dev F1 0.4746006103033566


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 451, loss: 0.29505693912506104
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.11it/s]


Dev F1 0.47737515774292416


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 452, loss: 0.29518079938310565
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.30it/s]


Dev F1 0.4852914958103049


100%|██████████| 33/33 [00:02<00:00, 12.01it/s]


epoch 453, loss: 0.2942403834877592
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 106.06it/s]


Dev F1 0.48428237830070053


100%|██████████| 33/33 [00:02<00:00, 11.94it/s]


epoch 454, loss: 0.29495349887645606
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.51it/s]


Dev F1 0.47972730534625035


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 455, loss: 0.2955058208017638
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.92it/s]


Dev F1 0.48827292110874193


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 456, loss: 0.29415241154757416
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.71it/s]


Dev F1 0.4824246771879483


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 457, loss: 0.2953261705962094
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.40it/s]


Dev F1 0.4932444604575752


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 458, loss: 0.29547481193686975
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.49it/s]


Dev F1 0.48670977011494254


100%|██████████| 33/33 [00:02<00:00, 11.88it/s]


epoch 459, loss: 0.29529540647159924
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.85it/s]


Dev F1 0.4914407988587732


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 460, loss: 0.2950786695335851
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.36it/s]


Dev F1 0.48886494252873564


100%|██████████| 33/33 [00:02<00:00, 12.06it/s]


epoch 461, loss: 0.29518481547182257
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.95it/s]


Dev F1 0.486603128933645


100%|██████████| 33/33 [00:02<00:00, 12.00it/s]


epoch 462, loss: 0.29446994445540686
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.02it/s]


Dev F1 0.48656395557147974


100%|██████████| 33/33 [00:02<00:00, 12.02it/s]


epoch 463, loss: 0.29471756382422015
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.76it/s]


Dev F1 0.48644767546221507


100%|██████████| 33/33 [00:02<00:00, 12.03it/s]


epoch 464, loss: 0.2954660816626115
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.45it/s]


Dev F1 0.4809669853869746


100%|██████████| 33/33 [00:02<00:00, 11.86it/s]


epoch 465, loss: 0.29468478578509705
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 108.54it/s]


Dev F1 0.4841766493831575


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 466, loss: 0.29409613302259735
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.60it/s]


Dev F1 0.4831902718168812


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 467, loss: 0.2946847008936333
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.84it/s]


Dev F1 0.48489182907205436


100%|██████████| 33/33 [00:02<00:00, 11.85it/s]


epoch 468, loss: 0.29445844256516657
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.68it/s]


Dev F1 0.4843722093230935


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 469, loss: 0.29375995379505737
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.80it/s]


Dev F1 0.4890720171981368


100%|██████████| 33/33 [00:02<00:00, 11.88it/s]


epoch 470, loss: 0.2939361171288924
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.71it/s]


Dev F1 0.47969816744520305


100%|██████████| 33/33 [00:02<00:00, 11.79it/s]


epoch 471, loss: 0.29355300827459857
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.25it/s]


Dev F1 0.48800426515016876


100%|██████████| 33/33 [00:02<00:00, 11.87it/s]


epoch 472, loss: 0.29406893163016345
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.69it/s]


Dev F1 0.4856685063200997


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 473, loss: 0.2934943374359246
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.23it/s]


Dev F1 0.4857091497393492


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 474, loss: 0.29330577182047296
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.72it/s]


Dev F1 0.48732044688774606


100%|██████████| 33/33 [00:02<00:00, 12.04it/s]


epoch 475, loss: 0.2935405563224446
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.11it/s]


Dev F1 0.4902487028090893


100%|██████████| 33/33 [00:02<00:00, 12.06it/s]


epoch 476, loss: 0.29404448198549676
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.64it/s]


Dev F1 0.48160173160173164


100%|██████████| 33/33 [00:02<00:00, 12.01it/s]


epoch 477, loss: 0.2940969747124296
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 109.16it/s]


Dev F1 0.49181507465371466


100%|██████████| 33/33 [00:02<00:00, 11.90it/s]


epoch 478, loss: 0.29364053227684717
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.11it/s]


Dev F1 0.48951048951048953


100%|██████████| 33/33 [00:02<00:00, 12.02it/s]


epoch 479, loss: 0.294716940684752
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.38it/s]


Dev F1 0.4843161856963613


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 480, loss: 0.29364074540860724
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.65it/s]


Dev F1 0.48612851261857887


100%|██████████| 33/33 [00:02<00:00, 11.87it/s]


epoch 481, loss: 0.2937303393176108
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.53it/s]


Dev F1 0.48654428800570304


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 482, loss: 0.29330000732884265
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.27it/s]


Dev F1 0.48428237830070053


100%|██████████| 33/33 [00:02<00:00, 11.87it/s]


epoch 483, loss: 0.2938927339785027
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 120.90it/s]


Dev F1 0.4915344858314027


100%|██████████| 33/33 [00:02<00:00, 11.86it/s]


epoch 484, loss: 0.2947198757619569
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 121.67it/s]


Dev F1 0.4865058087578195


100%|██████████| 33/33 [00:02<00:00, 11.92it/s]


epoch 485, loss: 0.29306782736922754
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.02it/s]


Dev F1 0.489229125867901


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 486, loss: 0.2929506970174385
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.73it/s]


Dev F1 0.4909966125869139


100%|██████████| 33/33 [00:02<00:00, 12.07it/s]


epoch 487, loss: 0.2933749601696477
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.49it/s]


Dev F1 0.4895703333927616


100%|██████████| 33/33 [00:02<00:00, 12.07it/s]


epoch 488, loss: 0.29282035339962353
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 125.76it/s]


Dev F1 0.4911274421939416


100%|██████████| 33/33 [00:02<00:00, 11.95it/s]


epoch 489, loss: 0.29179884177265747
Evaluating dev...


100%|██████████| 1044/1044 [00:09<00:00, 111.13it/s]


Dev F1 0.4936911320419407


100%|██████████| 33/33 [00:02<00:00, 11.85it/s]


epoch 490, loss: 0.29272355635960895
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.38it/s]


Dev F1 0.4879442757635292


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 491, loss: 0.29238055149714154
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.70it/s]


Dev F1 0.48411986362820736


100%|██████████| 33/33 [00:02<00:00, 11.91it/s]


epoch 492, loss: 0.29258225271196076
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.69it/s]


Dev F1 0.48999285203716947


100%|██████████| 33/33 [00:02<00:00, 11.99it/s]


epoch 493, loss: 0.29250859491752856
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.99it/s]


Dev F1 0.4930332261521972


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 494, loss: 0.2926342378963124
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 123.83it/s]


Dev F1 0.48086848193628756


100%|██████████| 33/33 [00:02<00:00, 11.98it/s]


epoch 495, loss: 0.29289342869411816
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 119.43it/s]


Dev F1 0.48485928037050235


100%|██████████| 33/33 [00:02<00:00, 11.93it/s]


epoch 496, loss: 0.2928060573158842
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 122.64it/s]


Dev F1 0.4939587775408671


100%|██████████| 33/33 [00:02<00:00, 11.87it/s]


epoch 497, loss: 0.2929271531827522
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.88it/s]


Dev F1 0.49230224738984246


100%|██████████| 33/33 [00:02<00:00, 11.96it/s]


epoch 498, loss: 0.29324777469490515
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.18it/s]


Dev F1 0.4868726558313985


100%|██████████| 33/33 [00:02<00:00, 11.97it/s]


epoch 499, loss: 0.2922657746257204
Evaluating dev...


100%|██████████| 1044/1044 [00:08<00:00, 124.05it/s]


Dev F1 0.48986183384173687
Best dev F1 score: 0.4955148905633297
