In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

# import os
# os.chdir("drive/MyDrive/Colab Notebooks/Authorship Identification/")

import torch
import numpy as np
import pandas as pd
from scipy import sparse
from sklearn import preprocessing
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import DistilBertTokenizer, BertTokenizer, DebertaTokenizer

random_seed = 42
np.random.seed = 42
torch.random.seed = 42

  from .autonotebook import tqdm as notebook_tqdm


### Load C50 Dataset (if already loaded)

- this (original dataset) is 50 train - 50 test split

In [23]:
train_df = pd.read_csv('C50/train.csv')
test_df = pd.read_csv('C50/test.csv')

- this dataset is the original one with entity names removed

In [2]:
train_df = pd.read_csv('C50/train_noents.csv')
test_df = pd.read_csv('C50/test_noents.csv')
train_df.rename(columns={'text': 'content', 'label': 'author'}, inplace=True)
test_df.rename(columns={'text': 'content', 'label': 'author'}, inplace=True)

- this is 90 train - 10 test split

In [17]:
test_frac = 0.1
train_df = pd.concat([train_df, test_df])
test_df = train_df.sample(frac=test_frac, random_state=random_seed)
train_df = train_df.drop(test_df.index)

In [3]:
train_df.head()

Unnamed: 0,content,author
0,"The capital market, roundly criticised for a l...",1
1,beat the Republic 3-0 in a ice hockey game on ...,1
2,The fall in the trade deficit to crowns in fro...,1
3,paper concern a.s. on said its net profit fell...,1
4,shares rallied on following the coalition gove...,1


### Load "All the News" 1 Dataset

In [2]:
train_df = pd.read_csv('All the News/all_the_news1_noents.csv')
train_df = train_df[(train_df.publication == 'Breitbart')]
top10_authors = train_df[['author','id']].groupby('author').count().sort_values(by='id', ascending=False).head(10).reset_index() # this == what the paper proposed
print('The authors are', top10_authors.author.values)
train_df = pd.merge(train_df, top10_authors[['author']], on='author', how='right')
train_df = train_df.groupby(by='author').sample(500, random_state=random_seed)
train_df.shape

The authors are ['Breitbart News' 'Pam Key' 'Charlie Spiering' 'Jerome Hudson'
 'John Hayward' 'Daniel Nussbaum' 'AWR Hawkins' 'Ian Hanchett'
 'Joel B. Pollak' 'Alex Swoyer']


(5000, 10)

### Preprocessing

In [3]:
# if C50 split 50-50
test_frac = 0

# if NOT C50 split 50-50
test_frac = 0.15
test_df = train_df.sample(frac=test_frac, random_state=random_seed)
train_df = train_df.drop(test_df.index).reset_index()
test_df = test_df.reset_index()

le = preprocessing.LabelEncoder()
le.fit(train_df['author'])
train_df['author_id'] = le.transform(train_df['author']).astype(int)
test_df['author_id'] = le.transform(test_df['author']).astype(int)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [4]:
print('The average length of text of an article in training set is', np.mean([len(content.split()) for content in train_df['content'].values]), 
      'and in test set is', np.mean([len(content.split()) for content in test_df['content'].values]))

The average length of text of an article in training set is 365.8708235294118 and in test set is 381.47866666666664


In [5]:
valid_frac = 0.1 / (1 - test_frac)
valid_df = train_df.sample(frac=valid_frac, random_state=random_seed)
train_df = train_df.drop(valid_df.index).reset_index()
valid_df = valid_df.reset_index()

In [6]:
def debug():
    # text = tokenizer("Commonwealth _ of Australia (_) managing", padding='max_length', max_length = 32, truncation=True, return_tensors="pt")
    # print(text)
    # mask_val = tokenizer.vocab.get('_')
    # text['attention_mask'] = torch.where(text['input_ids'] == mask_val, 0, text['attention_mask'])
    # print(text)
    text = "Westpac Banking Corp Ltd is expected to report on Tuesday that its net profit growth was reined"
    sub = "Banking"
    i = text.find(sub)
    print(i, text[i-1], text[i+len(sub)])
    if i == -1 or (i > 0 and text[i-1].isalpha()) or (i+len(sub) < len(text) and text[i+len(sub)].isalpha()):
        print('no')
    else:
        print('yes')

debug()

8    
yes


In [7]:
def mask_contents(threshold):
    vectorizer = TfidfVectorizer(ngram_range=(1, 2), lowercase=False) #, stop_words='english')
    vectorizer.fit(pd.concat([train_df['content'], valid_df['content']]))
    X_train = vectorizer.transform(train_df['content'])
    X_valid = vectorizer.transform(valid_df['content'])
    X_test = vectorizer.transform(test_df['content'])
    feat_names = vectorizer.get_feature_names_out()
    feat_idf = vectorizer.idf_

    def mask_content(X, df):
        mask_words = set(feat_names[feat_idf >= threshold])
        ret = []
        for i in range(df['content'].shape[0]):
            text = df['content'][i]
            text = ' '.join(['_' if word in mask_words else word for word in text.split()])
          # for mask_word in mask_words:
          #   while True:
          #     i = text.find(mask_word)
          #     if i == -1 or (i > 0 and text[i-1].isalpha()) or (i+len(mask_word) < len(text) and text[i+len(mask_word)].isalpha()):
          #       break
          #     text = text[:i] + '_' + text[i+len(mask_word):]
            ret.append(text)
        return ret

    train_df['masked_content'] = mask_content(X_train, train_df)
    valid_df['masked_content'] = mask_content(X_valid, valid_df)
    test_df['masked_content'] = mask_content(X_test, test_df)

### Dataset Class

In [8]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self, df):

        self.labels = df['author_id'].values
        self.texts = [tokenizer(text, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for text in df['masked_content']]
        # add attention mask
        mask_val = tokenizer.vocab.get('_')
        for text in self.texts:
            text['attention_mask'] = torch.where(text['input_ids'] == mask_val, 0, text['attention_mask'])

    def classes(self):
        return self.labels

    def __len__(self):
        return len(self.labels)

    def get_batch_labels(self, idx):
        # Fetch a batch of labels
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        # Fetch a batch of inputs
        return self.texts[idx]

    def __getitem__(self, idx):

        batch_texts = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_texts, batch_y

### Model Class

In [9]:
from torch import nn
from transformers import DistilBertForSequenceClassification, BertForSequenceClassification, DebertaForSequenceClassification, get_linear_schedule_with_warmup

class BertClassifier(nn.Module):

    def __init__(self, dropout=0.5):

        super().__init__()

        self.bert = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(le.classes_))
        # self.dropout = nn.Dropout(dropout)
        # self.linear = nn.Linear(768, 50)
        # self.relu = nn.ReLU()

    def forward(self, input_id, labels=None):

        # pooled_output, = self.bert(input_ids=input_id, attention_mask=mask, return_dict=False)
        # pooled_output = pooled_output[:,0,:]
        # dropout_output = self.dropout(pooled_output)
        # linear_output = self.linear(dropout_output)
        # final_layer = self.relu(linear_output)
        # print(input_id.shape, labels.shape)
        final_layer = self.bert(input_id, labels=labels)

        return final_layer

### Training

In [11]:
# !git clone https://gist.github.com/NTT123/4596e5533e573c8ceab2f319ab5d36a2 jslog
# import random
# import math
# import time
# from jslog.jslogger import JSLogger

# logger = JSLogger('train/valid loss', ['train', 'valid'])
# logger_ = JSLogger('train/valid accuracy', ['train', 'valid'])

In [10]:
from torch.optim import AdamW
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score

def train(model, train_data, val_data, learning_rate, epochs, batch_size):

    train, val = Dataset(train_data), Dataset(val_data)

    train_dataloader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val, batch_size=batch_size)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    criterion = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=learning_rate)
    lr_scheduler = get_linear_schedule_with_warmup(
        optimizer=optimizer,
        num_warmup_steps=100,
        num_training_steps=len(train_dataloader) * epochs,
    )

    if use_cuda:
        
        print('cuda version', torch.__version__)
        model = model.cuda()
        criterion = criterion.cuda()

    for epoch_num in range(epochs):
        
        train_loss = 0
        train_pred = []
        train_truth = []
        model.train()

        for train_input, train_label in tqdm(train_dataloader):

            train_label = train_label.to(device)
            # mask = train_input['attention_mask'].to(device)
            input_id = train_input['input_ids'].squeeze(1).to(device)

            output = model(input_id, train_label)
                
            # batch_loss = criterion(output, train_label.long())
            batch_loss = output.loss
            train_loss += batch_loss.item()
            train_pred.append(output.logits.argmax(dim=1))
            train_truth.append(train_label)

            model.zero_grad()
            batch_loss.backward()
            optimizer.step()
            lr_scheduler.step()
                
        train_truth = torch.cat(train_truth).detach().cpu().numpy()
        train_pred = torch.cat(train_pred).detach().cpu().numpy()
        train_acc = accuracy_score(train_truth, train_pred)
        train_f1 = f1_score(train_truth, train_pred, average='macro')
            
        val_loss = 0
        val_pred = []
        val_truth = []
        model.eval()

        with torch.no_grad():

            for val_input, val_label in val_dataloader:

                val_label = val_label.to(device)
                # mask = val_input['attention_mask'].to(device)
                input_id = val_input['input_ids'].squeeze(1).to(device)

                output = model(input_id, val_label)

                # batch_loss = criterion(output, val_label.long())
                batch_loss = output.loss
                val_loss += batch_loss.item()
                val_pred.append(output.logits.argmax(dim=1))
                val_truth.append(val_label)
            
        val_truth = torch.cat(val_truth).detach().cpu().numpy()
        val_pred = torch.cat(val_pred).detach().cpu().numpy()
        val_acc = accuracy_score(val_truth, val_pred)
        val_f1 = f1_score(val_truth, val_pred, average='macro')
            
        print('Epochs: {} | Train Loss: {:.4f} | Train Accuracy: {:.4f} | Train F1: {:.4f}'\
                .format(epoch_num + 1, train_loss, train_acc, train_f1))
        print('           | Val Loss:   {:.4f} | Val Accuracy:   {:.4f} | Val F1:   {:.4f}'\
                .format(val_loss, val_acc, val_f1))
            
        # logger.log(epoch_num + 1, {'train': total_loss_train / len(train_data), 'valid': total_loss_val / len(val_data)})
        # logger_.log(epoch_num + 1, {'train': total_acc_train / len(train_data), 'valid': total_acc_val / len(val_data)})
    return train_acc, train_f1, val_acc, val_f1

# logger.show()
# logger_.show()
pd.options.display.max_colwidth = 100       
EPOCHS = 10
LR = 2e-5
BATCH_SIZE = 16
model = BertClassifier()
print('There are', sum(p.numel() for p in model.parameters()), 'parameters')

mask_contents(threshold=10.)
print(train_df['masked_content'])
              
# train(model, train_df, valid_df, LR, EPOCHS, BATCH_SIZE)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

There are 109489930 parameters
0       released a statement via email in which she said the decision is a clear demonstration of the im...
1       Former and current congressional candidate believes Lt. Governor gun control ballot initiative w...
2       On was suspended for after bringing an empty . shell casing to school to show his friends. [’s s...
4       The ’s and , held in , , produced a “economic impact. ”[According to The this made the conventio...
                                                       ...                                                 
3745    In a preview clip of an interview set to air on broadcast of “,” President said Presidents and w...
3746    at the League Conference, Director said law enforcement “officers and deputies and agents” need ...
3747    at a press gathering on the campaign trail in , , Sen. ( ) responded to the controversy involvin...
3748    on ’s “,” columnist for said some are at stage of panic over presumptive presidential nominee . .

### Evaluate

In [11]:
def evaluate(model, test_data):

    test = Dataset(test_data)

    test_dataloader = torch.utils.data.DataLoader(test, batch_size=32)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    if use_cuda:

        model = model.cuda()

    test_pred = []
    test_truth = []
    model.eval()
    
    with torch.no_grad():

        for test_input, test_label in test_dataloader:
            
            test_label = test_label.to(device)
            # mask = test_input['attention_mask'].to(device)
            input_id = test_input['input_ids'].squeeze(1).to(device)

            output = model(input_id, test_label)

            test_pred.append(output.logits.argmax(dim=1))
            test_truth.append(test_label)
        
        test_truth = torch.cat(test_truth).detach().cpu().numpy()
        test_pred = torch.cat(test_pred).detach().cpu().numpy()
        test_acc = accuracy_score(test_truth, test_pred)
        test_f1 = f1_score(test_truth, test_pred, average='macro')
    
    print('Test Accuracy:  {:.4f} | Test F1:  {:.4f}'.format(test_acc, test_f1))
    return test_acc, test_f1
    
# print(test_df['masked_content'])
# evaluate(model, test_df)

### Fine-Tuning

In [14]:
def fine_tuning(lr_list, epoch_list, batch_list, checkpoint_path='tuning2'):
    
    index_list, index2_list, index3_list = [], [], []
    train_accs, train_f1s = [], []
    test_accs, test_f1s = [], []
    
    for lr in lr_list:
        for epoch in epoch_list:
            for batch_size in batch_list:
            
                model = BertClassifier()
                # mask_contents(threshold=threshold)
                print('lr at {}, epoch at {}, batch_size at {}'.format(lr, epoch, batch_size))
                train_acc, train_f1, val_acc, val_f1 = train(model, train_df, valid_df, lr, epoch, batch_size)
                test_acc, test_f1 = evaluate(model, test_df)

                train_accs.append(train_acc)
                train_f1s.append(train_f1)
                test_accs.append(test_acc)
                test_f1s.append(test_f1)
                index_list.append(lr)
                index2_list.append(epoch)
                index3_list.append(batch_size)

                result_df = pd.DataFrame(index_list, columns=['lr'])
                result_df['epoch'] = index2_list
                result_df['batch_size'] = index3_list
                result_df['train_accs'] = train_accs
                result_df['train_f1s'] = train_f1s
                result_df['test_accs'] = test_accs
                result_df['test_f1s'] = test_f1s
                result_df['model_name'] = 'bert_uncased'
                result_df.to_csv(checkpoint_path + '.csv', index=False)
                # break
    return index_list, index2_list, index3_list, train_accs, train_f1s, test_accs, test_f1s

In [15]:
# C50 [5e-5 + 10 + 16, 2e-5 + 15 + 16, 1e-5 + 15 + 8]
# All The News []
lr_list = [2e-5] #[5e-5, 2e-5, 1e-5]
epoch_list = [15] #[10, 15]
batch_list = [8, 16, 32] #[8, 16, 32]
fine_tuning(lr_list=lr_list, epoch_list=epoch_list, batch_list=batch_list)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

lr at 2e-05, epoch at 15, batch_size at 8
cuda version 1.13.0


100%|█████████████████████████████████████████| 469/469 [01:41<00:00,  4.60it/s]


Epochs: 1 | Train Loss: 595.1981 | Train Accuracy: 0.6075 | Train F1: 0.6097
           | Val Loss:   29.6077 | Val Accuracy:   0.8580 | Val F1:   0.8653


100%|█████████████████████████████████████████| 469/469 [01:42<00:00,  4.57it/s]


Epochs: 2 | Train Loss: 177.7380 | Train Accuracy: 0.8931 | Train F1: 0.8932
           | Val Loss:   26.3167 | Val Accuracy:   0.8680 | Val F1:   0.8701


100%|█████████████████████████████████████████| 469/469 [01:42<00:00,  4.57it/s]


Epochs: 3 | Train Loss: 84.0845 | Train Accuracy: 0.9528 | Train F1: 0.9527
           | Val Loss:   16.1461 | Val Accuracy:   0.9220 | Val F1:   0.9261


 30%|████████████▌                            | 143/469 [00:31<01:11,  4.55it/s]


KeyboardInterrupt: 

### Grid Search IDF

In [14]:
import warnings

def grid_search_idf(threshold_list, checkpoint_path='checkpoint'):
    
    index_list = []
    train_accs, train_f1s = [], []
    test_accs, test_f1s = [], []
    
    for threshold in threshold_list:
        
        # warnings.filterwarnings(action='once')
        model = BertClassifier()
        mask_contents(threshold=threshold)
        print('idf removal threshold at', threshold)
        print(train_df['masked_content'])
        train_acc, train_f1, val_acc, val_f1 = train(model, train_df, valid_df, LR, EPOCHS, BATCH_SIZE)
        test_acc, test_f1 = evaluate(model, test_df)
        
        train_accs.append(train_acc)
        train_f1s.append(train_f1)
        test_accs.append(test_acc)
        test_f1s.append(test_f1)
        index_list.append(threshold)
        
        result_df = pd.DataFrame(index_list, columns=['idf_threshold'])
        result_df['train_accs'] = train_accs
        result_df['train_f1s'] = train_f1s
        result_df['test_accs'] = test_accs
        result_df['test_f1s'] = test_f1s
        result_df['model_name'] = 'bert_uncased'
        result_df.to_csv(checkpoint_path + '.csv', index=False)
        # break
    return index_list, train_accs, train_f1s, test_accs, test_f1s

In [15]:
pd.options.display.max_colwidth = 100            
EPOCHS = 15
LR = 2e-5
BATCH_SIZE = 16
grid_search_idf(threshold_list=np.arange(10., 1., -0.5))

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 10.0
0       The capital market, roundly criticised for a lack of transparency, needs an independent regulato...
1       beat the Republic 3-0 in a ice hockey game on , setting up a showdown for group supremacy agains...
2       The fall in the trade deficit to crowns in from in buoyed market sentiment, and the goods import...
3       paper concern a.s. on said its net profit fell as a shutdown of its main paper mill, coupled wit...
4       shares rallied on following the coalition government's win in elections, but analysts said the r...
                                                       ...                                                 
2245    "" -- counter-culture anthem of the 's -- is being used in a commericial for a bank and some fan...
2246    , the flamboyant stock picker hired recently to spearhead drive to dominate the market, has been...
2247    , 's biggest city and financial capital, is bracing for a near shutdown when protesters hit the ..

100%|█████████████████████████████████████████| 141/141 [01:38<00:00,  1.43it/s]


Epochs: 1 | Train Loss: 550.2232 | Train Accuracy: 0.0351 | Train F1: 0.0176
           | Val Loss:   59.8704 | Val Accuracy:   0.0680 | Val F1:   0.0356


100%|█████████████████████████████████████████| 141/141 [01:23<00:00,  1.69it/s]


Epochs: 2 | Train Loss: 461.5917 | Train Accuracy: 0.2862 | Train F1: 0.2549
           | Val Loss:   45.1188 | Val Accuracy:   0.4280 | Val F1:   0.3670


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 3 | Train Loss: 346.1387 | Train Accuracy: 0.5351 | Train F1: 0.4899
           | Val Loss:   34.3918 | Val Accuracy:   0.5320 | Val F1:   0.4807


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 4 | Train Loss: 268.3808 | Train Accuracy: 0.6373 | Train F1: 0.6070
           | Val Loss:   28.9515 | Val Accuracy:   0.5960 | Val F1:   0.5594


100%|█████████████████████████████████████████| 141/141 [01:24<00:00,  1.66it/s]


Epochs: 5 | Train Loss: 212.7974 | Train Accuracy: 0.7253 | Train F1: 0.7050
           | Val Loss:   24.0788 | Val Accuracy:   0.6760 | Val F1:   0.6336


100%|█████████████████████████████████████████| 141/141 [01:23<00:00,  1.68it/s]


Epochs: 6 | Train Loss: 169.2888 | Train Accuracy: 0.8062 | Train F1: 0.7973
           | Val Loss:   20.8430 | Val Accuracy:   0.6640 | Val F1:   0.6451


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.64it/s]


Epochs: 7 | Train Loss: 138.5270 | Train Accuracy: 0.8542 | Train F1: 0.8501
           | Val Loss:   19.0347 | Val Accuracy:   0.7240 | Val F1:   0.7085


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.64it/s]


Epochs: 8 | Train Loss: 113.5811 | Train Accuracy: 0.8991 | Train F1: 0.8951
           | Val Loss:   17.5416 | Val Accuracy:   0.7160 | Val F1:   0.6985


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 9 | Train Loss: 93.7077 | Train Accuracy: 0.9302 | Train F1: 0.9283
           | Val Loss:   15.2412 | Val Accuracy:   0.7760 | Val F1:   0.7644


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 10 | Train Loss: 78.8180 | Train Accuracy: 0.9538 | Train F1: 0.9528
           | Val Loss:   14.7126 | Val Accuracy:   0.7840 | Val F1:   0.7818


100%|█████████████████████████████████████████| 141/141 [01:22<00:00,  1.70it/s]


Epochs: 11 | Train Loss: 66.5752 | Train Accuracy: 0.9747 | Train F1: 0.9741
           | Val Loss:   13.6979 | Val Accuracy:   0.7960 | Val F1:   0.7883


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.64it/s]


Epochs: 12 | Train Loss: 58.5916 | Train Accuracy: 0.9809 | Train F1: 0.9809
           | Val Loss:   13.4515 | Val Accuracy:   0.8000 | Val F1:   0.7965


100%|█████████████████████████████████████████| 141/141 [01:23<00:00,  1.68it/s]


Epochs: 13 | Train Loss: 52.2945 | Train Accuracy: 0.9898 | Train F1: 0.9897
           | Val Loss:   12.9643 | Val Accuracy:   0.7920 | Val F1:   0.7854


100%|█████████████████████████████████████████| 141/141 [01:24<00:00,  1.67it/s]


Epochs: 14 | Train Loss: 48.9512 | Train Accuracy: 0.9902 | Train F1: 0.9903
           | Val Loss:   12.6345 | Val Accuracy:   0.8000 | Val F1:   0.7874


100%|█████████████████████████████████████████| 141/141 [01:23<00:00,  1.68it/s]


Epochs: 15 | Train Loss: 46.5174 | Train Accuracy: 0.9960 | Train F1: 0.9960
           | Val Loss:   12.6025 | Val Accuracy:   0.8080 | Val F1:   0.7992
Test Accuracy:  0.6092 | Test F1:  0.6055


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 9.5
0       The capital market, roundly criticised for a lack of transparency, needs an independent regulato...
1       beat the Republic 3-0 in a ice hockey game on , setting up a showdown for group supremacy agains...
2       The fall in the trade deficit to crowns in from in buoyed market sentiment, and the goods import...
3       paper concern a.s. on said its net profit fell as a shutdown of its main paper mill, coupled wit...
4       shares rallied on following the coalition government's win in elections, but analysts said the r...
                                                       ...                                                 
2245    "" -- counter-culture anthem of the 's -- is being used in a commericial for a bank and some fan...
2246    , the flamboyant stock picker hired recently to spearhead drive to dominate the market, has been...
2247    , 's biggest city and financial capital, is bracing for a near shutdown when protesters hit the ...

100%|█████████████████████████████████████████| 141/141 [01:24<00:00,  1.67it/s]


Epochs: 1 | Train Loss: 552.1946 | Train Accuracy: 0.0351 | Train F1: 0.0230
           | Val Loss:   61.1238 | Val Accuracy:   0.0600 | Val F1:   0.0276


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.64it/s]


Epochs: 2 | Train Loss: 483.0313 | Train Accuracy: 0.2396 | Train F1: 0.2033
           | Val Loss:   47.6166 | Val Accuracy:   0.3920 | Val F1:   0.3216


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.59it/s]


Epochs: 3 | Train Loss: 366.6828 | Train Accuracy: 0.4747 | Train F1: 0.4135
           | Val Loss:   37.3574 | Val Accuracy:   0.4800 | Val F1:   0.4276


100%|█████████████████████████████████████████| 141/141 [01:24<00:00,  1.66it/s]


Epochs: 4 | Train Loss: 280.5803 | Train Accuracy: 0.6067 | Train F1: 0.5629
           | Val Loss:   29.8069 | Val Accuracy:   0.5680 | Val F1:   0.5052


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 5 | Train Loss: 218.6320 | Train Accuracy: 0.7164 | Train F1: 0.6902
           | Val Loss:   25.0693 | Val Accuracy:   0.6080 | Val F1:   0.5702


100%|█████████████████████████████████████████| 141/141 [01:23<00:00,  1.70it/s]


Epochs: 6 | Train Loss: 169.4768 | Train Accuracy: 0.8111 | Train F1: 0.7971
           | Val Loss:   20.9934 | Val Accuracy:   0.6720 | Val F1:   0.6296


100%|█████████████████████████████████████████| 141/141 [01:22<00:00,  1.71it/s]


Epochs: 7 | Train Loss: 132.2210 | Train Accuracy: 0.8711 | Train F1: 0.8616
           | Val Loss:   18.5356 | Val Accuracy:   0.7080 | Val F1:   0.6887


100%|█████████████████████████████████████████| 141/141 [01:24<00:00,  1.67it/s]


Epochs: 8 | Train Loss: 105.2219 | Train Accuracy: 0.9222 | Train F1: 0.9201
           | Val Loss:   16.8369 | Val Accuracy:   0.7360 | Val F1:   0.7207


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 9 | Train Loss: 84.6603 | Train Accuracy: 0.9498 | Train F1: 0.9483
           | Val Loss:   15.1876 | Val Accuracy:   0.7560 | Val F1:   0.7329


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.64it/s]


Epochs: 10 | Train Loss: 68.9760 | Train Accuracy: 0.9689 | Train F1: 0.9679
           | Val Loss:   13.9397 | Val Accuracy:   0.7680 | Val F1:   0.7431


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 11 | Train Loss: 57.9840 | Train Accuracy: 0.9822 | Train F1: 0.9820
           | Val Loss:   13.2110 | Val Accuracy:   0.7880 | Val F1:   0.7740


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.64it/s]


Epochs: 12 | Train Loss: 49.9220 | Train Accuracy: 0.9898 | Train F1: 0.9896
           | Val Loss:   12.6932 | Val Accuracy:   0.7960 | Val F1:   0.7727


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 13 | Train Loss: 44.2721 | Train Accuracy: 0.9933 | Train F1: 0.9932
           | Val Loss:   12.4584 | Val Accuracy:   0.8040 | Val F1:   0.7881


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 14 | Train Loss: 41.0203 | Train Accuracy: 0.9964 | Train F1: 0.9964
           | Val Loss:   12.1899 | Val Accuracy:   0.8000 | Val F1:   0.7788


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.64it/s]


Epochs: 15 | Train Loss: 39.3353 | Train Accuracy: 0.9978 | Train F1: 0.9977
           | Val Loss:   12.0659 | Val Accuracy:   0.8000 | Val F1:   0.7821
Test Accuracy:  0.6280 | Test F1:  0.6259


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 9.0
0       The capital market, roundly criticised for a lack of transparency, needs an independent regulato...
1       beat the Republic 3-0 in a ice hockey game on , setting up a showdown for group supremacy agains...
2       The fall in the trade deficit to crowns in from in buoyed market sentiment, and the goods import...
3       paper concern a.s. on said its net profit fell as a shutdown of its main paper mill, coupled wit...
4       shares rallied on following the coalition government's win in elections, but analysts said the r...
                                                       ...                                                 
2245    "" -- counter-culture anthem of the 's -- is being used in a commericial for a bank and some fan...
2246    , the flamboyant stock picker hired recently to spearhead drive to dominate the market, has been...
2247    , 's biggest city and financial capital, is bracing for a near shutdown when protesters hit the ...

100%|█████████████████████████████████████████| 141/141 [01:22<00:00,  1.72it/s]


Epochs: 1 | Train Loss: 545.6690 | Train Accuracy: 0.0391 | Train F1: 0.0310
           | Val Loss:   56.8319 | Val Accuracy:   0.2280 | Val F1:   0.1398


100%|█████████████████████████████████████████| 141/141 [01:24<00:00,  1.66it/s]


Epochs: 2 | Train Loss: 445.5411 | Train Accuracy: 0.3191 | Train F1: 0.2811
           | Val Loss:   42.7870 | Val Accuracy:   0.5240 | Val F1:   0.4227


100%|█████████████████████████████████████████| 141/141 [01:24<00:00,  1.67it/s]


Epochs: 3 | Train Loss: 338.6640 | Train Accuracy: 0.5360 | Train F1: 0.4960
           | Val Loss:   34.1993 | Val Accuracy:   0.5560 | Val F1:   0.4679


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 4 | Train Loss: 260.6475 | Train Accuracy: 0.6564 | Train F1: 0.6315
           | Val Loss:   27.6729 | Val Accuracy:   0.6160 | Val F1:   0.5299


100%|█████████████████████████████████████████| 141/141 [01:24<00:00,  1.66it/s]


Epochs: 5 | Train Loss: 205.7296 | Train Accuracy: 0.7311 | Train F1: 0.7138
           | Val Loss:   23.1741 | Val Accuracy:   0.6680 | Val F1:   0.6216


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.66it/s]


Epochs: 6 | Train Loss: 162.0240 | Train Accuracy: 0.8222 | Train F1: 0.8126
           | Val Loss:   19.9237 | Val Accuracy:   0.7280 | Val F1:   0.6754


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 7 | Train Loss: 128.2476 | Train Accuracy: 0.8738 | Train F1: 0.8686
           | Val Loss:   17.7218 | Val Accuracy:   0.7520 | Val F1:   0.7175


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 8 | Train Loss: 101.2882 | Train Accuracy: 0.9276 | Train F1: 0.9249
           | Val Loss:   16.8412 | Val Accuracy:   0.7600 | Val F1:   0.7211


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 9 | Train Loss: 82.1711 | Train Accuracy: 0.9569 | Train F1: 0.9564
           | Val Loss:   14.7878 | Val Accuracy:   0.7800 | Val F1:   0.7648


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.60it/s]


Epochs: 10 | Train Loss: 67.3053 | Train Accuracy: 0.9773 | Train F1: 0.9773
           | Val Loss:   14.2406 | Val Accuracy:   0.7680 | Val F1:   0.7535


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 11 | Train Loss: 57.0654 | Train Accuracy: 0.9876 | Train F1: 0.9875
           | Val Loss:   13.7323 | Val Accuracy:   0.7720 | Val F1:   0.7563


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.60it/s]


Epochs: 12 | Train Loss: 49.9826 | Train Accuracy: 0.9907 | Train F1: 0.9906
           | Val Loss:   12.9256 | Val Accuracy:   0.7800 | Val F1:   0.7685


100%|█████████████████████████████████████████| 141/141 [01:29<00:00,  1.58it/s]


Epochs: 13 | Train Loss: 44.1601 | Train Accuracy: 0.9956 | Train F1: 0.9956
           | Val Loss:   12.8948 | Val Accuracy:   0.7880 | Val F1:   0.7756


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 14 | Train Loss: 40.8485 | Train Accuracy: 0.9964 | Train F1: 0.9964
           | Val Loss:   12.6227 | Val Accuracy:   0.7760 | Val F1:   0.7606


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 15 | Train Loss: 39.2317 | Train Accuracy: 0.9969 | Train F1: 0.9969
           | Val Loss:   12.6787 | Val Accuracy:   0.7800 | Val F1:   0.7675
Test Accuracy:  0.6204 | Test F1:  0.6162


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 8.5
0       The capital market, roundly criticised for a lack of transparency, needs an independent regulato...
1       beat the Republic 3-0 in a ice hockey game on , setting up a showdown for group supremacy agains...
2       The fall in the trade deficit to crowns in from in buoyed market sentiment, and the goods import...
3       paper concern a.s. on said its net profit fell as a shutdown of its main paper mill, coupled wit...
4       shares rallied on following the coalition government's win in elections, but analysts said the r...
                                                       ...                                                 
2245    "" -- counter-culture anthem of the 's -- is being used in a commericial for a bank and some fan...
2246    , the flamboyant stock picker hired recently to spearhead drive to dominate the market, has been...
2247    , 's biggest city and financial capital, is bracing for a near shutdown when protesters hit the ...

100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 1 | Train Loss: 547.8119 | Train Accuracy: 0.0356 | Train F1: 0.0247
           | Val Loss:   59.9728 | Val Accuracy:   0.0680 | Val F1:   0.0340


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 2 | Train Loss: 469.1858 | Train Accuracy: 0.2507 | Train F1: 0.2128
           | Val Loss:   45.0238 | Val Accuracy:   0.4600 | Val F1:   0.3663


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 3 | Train Loss: 357.3934 | Train Accuracy: 0.5049 | Train F1: 0.4607
           | Val Loss:   35.5113 | Val Accuracy:   0.5240 | Val F1:   0.4499


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 4 | Train Loss: 277.5387 | Train Accuracy: 0.6307 | Train F1: 0.6063
           | Val Loss:   28.7479 | Val Accuracy:   0.6040 | Val F1:   0.5383


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.60it/s]


Epochs: 5 | Train Loss: 217.5627 | Train Accuracy: 0.7324 | Train F1: 0.7196
           | Val Loss:   24.2615 | Val Accuracy:   0.6320 | Val F1:   0.5946


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 6 | Train Loss: 173.2185 | Train Accuracy: 0.7978 | Train F1: 0.7897
           | Val Loss:   20.7701 | Val Accuracy:   0.7080 | Val F1:   0.6823


100%|█████████████████████████████████████████| 141/141 [01:29<00:00,  1.58it/s]


Epochs: 7 | Train Loss: 138.7811 | Train Accuracy: 0.8644 | Train F1: 0.8616
           | Val Loss:   18.6614 | Val Accuracy:   0.7080 | Val F1:   0.6703


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 8 | Train Loss: 112.7359 | Train Accuracy: 0.9000 | Train F1: 0.8975
           | Val Loss:   16.4997 | Val Accuracy:   0.7400 | Val F1:   0.6881


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.60it/s]


Epochs: 9 | Train Loss: 91.4834 | Train Accuracy: 0.9391 | Train F1: 0.9378
           | Val Loss:   15.1998 | Val Accuracy:   0.7640 | Val F1:   0.7382


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 10 | Train Loss: 75.3522 | Train Accuracy: 0.9644 | Train F1: 0.9639
           | Val Loss:   14.7451 | Val Accuracy:   0.7640 | Val F1:   0.7372


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 11 | Train Loss: 63.8301 | Train Accuracy: 0.9778 | Train F1: 0.9775
           | Val Loss:   13.5316 | Val Accuracy:   0.7920 | Val F1:   0.7710


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 12 | Train Loss: 55.7171 | Train Accuracy: 0.9836 | Train F1: 0.9831
           | Val Loss:   12.9863 | Val Accuracy:   0.8080 | Val F1:   0.7838


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.59it/s]


Epochs: 13 | Train Loss: 48.8430 | Train Accuracy: 0.9911 | Train F1: 0.9910
           | Val Loss:   12.7448 | Val Accuracy:   0.7880 | Val F1:   0.7706


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.59it/s]


Epochs: 14 | Train Loss: 45.3138 | Train Accuracy: 0.9960 | Train F1: 0.9960
           | Val Loss:   12.3038 | Val Accuracy:   0.7960 | Val F1:   0.7769


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 15 | Train Loss: 43.1101 | Train Accuracy: 0.9960 | Train F1: 0.9960
           | Val Loss:   12.3001 | Val Accuracy:   0.8120 | Val F1:   0.7915
Test Accuracy:  0.6108 | Test F1:  0.6039


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 8.0
0       The capital market, roundly criticised for a lack of transparency, needs an independent regulato...
1       beat the Republic 3-0 in a ice hockey game on , setting up a showdown for group supremacy agains...
2       The fall in the trade deficit to crowns in from in buoyed market sentiment, and the goods import...
3       paper concern a.s. on said its net profit fell as a shutdown of its main paper mill, coupled wit...
4       shares rallied on following the coalition government's win in elections, but analysts said the r...
                                                       ...                                                 
2245    "" -- counter-culture anthem of the 's -- is being used in a _ for a bank and some fans are up i...
2246    , the flamboyant stock picker hired recently to spearhead drive to dominate the market, has been...
2247    , 's biggest city and financial capital, is _ for a near shutdown when protesters hit the street...

100%|█████████████████████████████████████████| 141/141 [01:24<00:00,  1.68it/s]


Epochs: 1 | Train Loss: 544.3266 | Train Accuracy: 0.0547 | Train F1: 0.0354
           | Val Loss:   56.4350 | Val Accuracy:   0.1480 | Val F1:   0.0763


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 2 | Train Loss: 441.3210 | Train Accuracy: 0.3013 | Train F1: 0.2578
           | Val Loss:   42.6535 | Val Accuracy:   0.4320 | Val F1:   0.3305


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 3 | Train Loss: 336.2253 | Train Accuracy: 0.5209 | Train F1: 0.4698
           | Val Loss:   33.2633 | Val Accuracy:   0.6000 | Val F1:   0.5168


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 4 | Train Loss: 262.3604 | Train Accuracy: 0.6533 | Train F1: 0.6263
           | Val Loss:   27.5371 | Val Accuracy:   0.6000 | Val F1:   0.5385


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.59it/s]


Epochs: 5 | Train Loss: 207.3152 | Train Accuracy: 0.7218 | Train F1: 0.7054
           | Val Loss:   23.9238 | Val Accuracy:   0.6480 | Val F1:   0.5958


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 6 | Train Loss: 166.5604 | Train Accuracy: 0.7969 | Train F1: 0.7855
           | Val Loss:   19.9664 | Val Accuracy:   0.7160 | Val F1:   0.6778


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 7 | Train Loss: 135.9366 | Train Accuracy: 0.8453 | Train F1: 0.8370
           | Val Loss:   18.0559 | Val Accuracy:   0.6920 | Val F1:   0.6624


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 8 | Train Loss: 110.9053 | Train Accuracy: 0.8938 | Train F1: 0.8848
           | Val Loss:   16.2063 | Val Accuracy:   0.7280 | Val F1:   0.7063


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 9 | Train Loss: 91.8830 | Train Accuracy: 0.9271 | Train F1: 0.9205
           | Val Loss:   15.0072 | Val Accuracy:   0.7640 | Val F1:   0.7476


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.59it/s]


Epochs: 10 | Train Loss: 78.0685 | Train Accuracy: 0.9480 | Train F1: 0.9422
           | Val Loss:   14.0629 | Val Accuracy:   0.7640 | Val F1:   0.7502


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.60it/s]


Epochs: 11 | Train Loss: 65.7859 | Train Accuracy: 0.9636 | Train F1: 0.9605
           | Val Loss:   13.8327 | Val Accuracy:   0.7560 | Val F1:   0.7408


100%|█████████████████████████████████████████| 141/141 [01:29<00:00,  1.57it/s]


Epochs: 12 | Train Loss: 57.4366 | Train Accuracy: 0.9769 | Train F1: 0.9736
           | Val Loss:   13.1504 | Val Accuracy:   0.7720 | Val F1:   0.7573


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.64it/s]


Epochs: 13 | Train Loss: 51.8157 | Train Accuracy: 0.9813 | Train F1: 0.9797
           | Val Loss:   12.8255 | Val Accuracy:   0.7720 | Val F1:   0.7588


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 14 | Train Loss: 47.8295 | Train Accuracy: 0.9871 | Train F1: 0.9862
           | Val Loss:   12.5425 | Val Accuracy:   0.7680 | Val F1:   0.7587


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.64it/s]


Epochs: 15 | Train Loss: 45.8359 | Train Accuracy: 0.9884 | Train F1: 0.9877
           | Val Loss:   12.6230 | Val Accuracy:   0.7640 | Val F1:   0.7489
Test Accuracy:  0.6092 | Test F1:  0.6035


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 7.5
0       The capital market, _ criticised for a lack of transparency, needs an independent regulator, but...
1       beat the Republic 3-0 in a ice hockey game on , setting up a showdown for group _ against and le...
2       The fall in the trade deficit to crowns in from in buoyed market sentiment, and the goods import...
3       paper concern a.s. on said its net profit fell as a shutdown of its main paper mill, coupled wit...
4       shares rallied on following the coalition government's win in elections, but analysts said the r...
                                                       ...                                                 
2245    "" -- counter-culture _ of the 's -- is being used in a _ for a bank and some fans are up in arm...
2246    , the flamboyant stock picker hired recently to spearhead drive to dominate the market, has been...
2247    , 's biggest city and financial capital, is _ for a near shutdown when protesters hit the street...

100%|█████████████████████████████████████████| 141/141 [01:24<00:00,  1.67it/s]


Epochs: 1 | Train Loss: 543.1617 | Train Accuracy: 0.0444 | Train F1: 0.0378
           | Val Loss:   56.8691 | Val Accuracy:   0.1320 | Val F1:   0.1075


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 2 | Train Loss: 438.6305 | Train Accuracy: 0.3196 | Train F1: 0.2841
           | Val Loss:   42.7082 | Val Accuracy:   0.4200 | Val F1:   0.3502


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 3 | Train Loss: 337.5981 | Train Accuracy: 0.5333 | Train F1: 0.4953
           | Val Loss:   33.9039 | Val Accuracy:   0.5360 | Val F1:   0.4971


100%|█████████████████████████████████████████| 141/141 [01:24<00:00,  1.66it/s]


Epochs: 4 | Train Loss: 264.8288 | Train Accuracy: 0.6449 | Train F1: 0.6258
           | Val Loss:   27.7451 | Val Accuracy:   0.6400 | Val F1:   0.6101


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.60it/s]


Epochs: 5 | Train Loss: 209.1207 | Train Accuracy: 0.7440 | Train F1: 0.7360
           | Val Loss:   23.9122 | Val Accuracy:   0.6800 | Val F1:   0.6453


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.60it/s]


Epochs: 6 | Train Loss: 165.5356 | Train Accuracy: 0.8116 | Train F1: 0.8064
           | Val Loss:   20.2934 | Val Accuracy:   0.7120 | Val F1:   0.7023


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.60it/s]


Epochs: 7 | Train Loss: 133.6930 | Train Accuracy: 0.8560 | Train F1: 0.8522
           | Val Loss:   17.8711 | Val Accuracy:   0.7400 | Val F1:   0.7301


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 8 | Train Loss: 108.4332 | Train Accuracy: 0.9093 | Train F1: 0.9077
           | Val Loss:   16.6376 | Val Accuracy:   0.7440 | Val F1:   0.7214


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 9 | Train Loss: 90.6728 | Train Accuracy: 0.9316 | Train F1: 0.9300
           | Val Loss:   15.4686 | Val Accuracy:   0.7480 | Val F1:   0.7381


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 10 | Train Loss: 75.0130 | Train Accuracy: 0.9618 | Train F1: 0.9612
           | Val Loss:   13.9986 | Val Accuracy:   0.7760 | Val F1:   0.7622


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.59it/s]


Epochs: 11 | Train Loss: 64.2943 | Train Accuracy: 0.9733 | Train F1: 0.9730
           | Val Loss:   13.6550 | Val Accuracy:   0.7920 | Val F1:   0.7839


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 12 | Train Loss: 56.1876 | Train Accuracy: 0.9862 | Train F1: 0.9860
           | Val Loss:   13.3161 | Val Accuracy:   0.8000 | Val F1:   0.7846


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 13 | Train Loss: 50.2843 | Train Accuracy: 0.9907 | Train F1: 0.9906
           | Val Loss:   13.0983 | Val Accuracy:   0.8200 | Val F1:   0.8009


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.64it/s]


Epochs: 14 | Train Loss: 46.1909 | Train Accuracy: 0.9947 | Train F1: 0.9946
           | Val Loss:   12.7936 | Val Accuracy:   0.8080 | Val F1:   0.7918


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.60it/s]


Epochs: 15 | Train Loss: 44.1613 | Train Accuracy: 0.9960 | Train F1: 0.9959
           | Val Loss:   12.6964 | Val Accuracy:   0.8280 | Val F1:   0.8131
Test Accuracy:  0.6056 | Test F1:  0.6007


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 7.0
0       The capital market, _ criticised for a lack of transparency, needs an independent regulator, but...
1       beat the Republic 3-0 in a ice hockey game on , setting up a showdown for group _ against and le...
2       The fall in the trade deficit to crowns in from in buoyed market sentiment, and the goods import...
3       paper concern a.s. on said its net profit fell as a shutdown of its main paper mill, coupled wit...
4       shares rallied on following the coalition government's win in elections, but analysts said the r...
                                                       ...                                                 
2245    "" -- counter-culture _ of the 's -- is being used in a _ for a bank and some fans are up in arm...
2246    , the flamboyant stock picker hired recently to _ drive to dominate the market, has been removed...
2247    , 's biggest city and financial capital, is _ for a near shutdown when protesters hit the street...

100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.64it/s]


Epochs: 1 | Train Loss: 547.8157 | Train Accuracy: 0.0396 | Train F1: 0.0266
           | Val Loss:   57.8942 | Val Accuracy:   0.0880 | Val F1:   0.0659


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 2 | Train Loss: 450.0931 | Train Accuracy: 0.3040 | Train F1: 0.2617
           | Val Loss:   43.7971 | Val Accuracy:   0.4840 | Val F1:   0.3806


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 3 | Train Loss: 340.5478 | Train Accuracy: 0.5284 | Train F1: 0.4859
           | Val Loss:   34.1065 | Val Accuracy:   0.5560 | Val F1:   0.4521


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.59it/s]


Epochs: 4 | Train Loss: 264.7741 | Train Accuracy: 0.6387 | Train F1: 0.6156
           | Val Loss:   27.3134 | Val Accuracy:   0.6680 | Val F1:   0.5954


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 5 | Train Loss: 208.0517 | Train Accuracy: 0.7387 | Train F1: 0.7242
           | Val Loss:   22.8972 | Val Accuracy:   0.7240 | Val F1:   0.6730


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 6 | Train Loss: 164.8351 | Train Accuracy: 0.8102 | Train F1: 0.7984
           | Val Loss:   19.6179 | Val Accuracy:   0.7400 | Val F1:   0.7013


100%|█████████████████████████████████████████| 141/141 [01:30<00:00,  1.56it/s]


Epochs: 7 | Train Loss: 131.5745 | Train Accuracy: 0.8702 | Train F1: 0.8637
           | Val Loss:   17.1826 | Val Accuracy:   0.7840 | Val F1:   0.7414


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.60it/s]


Epochs: 8 | Train Loss: 105.1313 | Train Accuracy: 0.9107 | Train F1: 0.9076
           | Val Loss:   15.6629 | Val Accuracy:   0.7920 | Val F1:   0.7763


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 9 | Train Loss: 85.1930 | Train Accuracy: 0.9480 | Train F1: 0.9469
           | Val Loss:   14.2413 | Val Accuracy:   0.7880 | Val F1:   0.7809


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.59it/s]


Epochs: 10 | Train Loss: 71.0432 | Train Accuracy: 0.9627 | Train F1: 0.9620
           | Val Loss:   13.6141 | Val Accuracy:   0.7760 | Val F1:   0.7528


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 11 | Train Loss: 59.9374 | Train Accuracy: 0.9791 | Train F1: 0.9788
           | Val Loss:   13.1307 | Val Accuracy:   0.7920 | Val F1:   0.7858


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 12 | Train Loss: 51.3739 | Train Accuracy: 0.9853 | Train F1: 0.9852
           | Val Loss:   12.6772 | Val Accuracy:   0.7880 | Val F1:   0.7633


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 13 | Train Loss: 46.2469 | Train Accuracy: 0.9876 | Train F1: 0.9873
           | Val Loss:   12.4155 | Val Accuracy:   0.7960 | Val F1:   0.7842


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.60it/s]


Epochs: 14 | Train Loss: 42.6094 | Train Accuracy: 0.9951 | Train F1: 0.9951
           | Val Loss:   12.3786 | Val Accuracy:   0.7920 | Val F1:   0.7665


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 15 | Train Loss: 40.4833 | Train Accuracy: 0.9982 | Train F1: 0.9982
           | Val Loss:   12.2665 | Val Accuracy:   0.8080 | Val F1:   0.7996
Test Accuracy:  0.6056 | Test F1:  0.6017


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 6.5
0       The capital market, _ criticised for a lack of transparency, needs an independent regulator, but...
1       beat the _ 3-0 in a ice _ game on , setting up a _ for group _ against and leaving the _ world _...
2       The fall in the trade deficit to crowns in from in buoyed market sentiment, and the goods import...
3       paper concern a.s. on said its net profit fell as a shutdown of its main paper mill, coupled wit...
4       shares rallied on following the coalition government's win in elections, but analysts said the r...
                                                       ...                                                 
2245    "" -- counter-culture _ of the 's -- is being used in a _ for a bank and some fans are up in arm...
2246    , the _ stock _ hired recently to _ drive to dominate the market, has been removed from her fund...
2247    , 's biggest city and financial capital, is _ for a near shutdown when protesters hit the street...

100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 1 | Train Loss: 546.8009 | Train Accuracy: 0.0440 | Train F1: 0.0368
           | Val Loss:   58.4389 | Val Accuracy:   0.1120 | Val F1:   0.0686


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.64it/s]


Epochs: 2 | Train Loss: 441.4921 | Train Accuracy: 0.3218 | Train F1: 0.2748
           | Val Loss:   42.3960 | Val Accuracy:   0.4360 | Val F1:   0.3528


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.64it/s]


Epochs: 3 | Train Loss: 338.4493 | Train Accuracy: 0.5120 | Train F1: 0.4668
           | Val Loss:   34.4107 | Val Accuracy:   0.5520 | Val F1:   0.4547


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 4 | Train Loss: 263.2964 | Train Accuracy: 0.6480 | Train F1: 0.6260
           | Val Loss:   28.0984 | Val Accuracy:   0.5880 | Val F1:   0.5281


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 5 | Train Loss: 208.2546 | Train Accuracy: 0.7391 | Train F1: 0.7267
           | Val Loss:   24.2735 | Val Accuracy:   0.6800 | Val F1:   0.6449


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.60it/s]


Epochs: 6 | Train Loss: 165.7873 | Train Accuracy: 0.8164 | Train F1: 0.8107
           | Val Loss:   20.9784 | Val Accuracy:   0.6920 | Val F1:   0.6655


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.60it/s]


Epochs: 7 | Train Loss: 132.1105 | Train Accuracy: 0.8804 | Train F1: 0.8755
           | Val Loss:   19.2166 | Val Accuracy:   0.7240 | Val F1:   0.7042


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 8 | Train Loss: 106.6034 | Train Accuracy: 0.9187 | Train F1: 0.9155
           | Val Loss:   16.8579 | Val Accuracy:   0.7280 | Val F1:   0.7241


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 9 | Train Loss: 87.0886 | Train Accuracy: 0.9480 | Train F1: 0.9469
           | Val Loss:   15.4139 | Val Accuracy:   0.7720 | Val F1:   0.7585


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 10 | Train Loss: 71.5476 | Train Accuracy: 0.9640 | Train F1: 0.9633
           | Val Loss:   14.4888 | Val Accuracy:   0.7640 | Val F1:   0.7589


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 11 | Train Loss: 60.7118 | Train Accuracy: 0.9778 | Train F1: 0.9774
           | Val Loss:   13.8176 | Val Accuracy:   0.7920 | Val F1:   0.7843


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 12 | Train Loss: 53.4849 | Train Accuracy: 0.9836 | Train F1: 0.9832
           | Val Loss:   13.7821 | Val Accuracy:   0.7960 | Val F1:   0.7899


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 13 | Train Loss: 47.4676 | Train Accuracy: 0.9911 | Train F1: 0.9910
           | Val Loss:   13.3590 | Val Accuracy:   0.7920 | Val F1:   0.7889


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 14 | Train Loss: 44.6493 | Train Accuracy: 0.9911 | Train F1: 0.9910
           | Val Loss:   13.1845 | Val Accuracy:   0.7920 | Val F1:   0.7878


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 15 | Train Loss: 42.8698 | Train Accuracy: 0.9933 | Train F1: 0.9932
           | Val Loss:   13.1776 | Val Accuracy:   0.7880 | Val F1:   0.7838
Test Accuracy:  0.5932 | Test F1:  0.5884


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 6.0
0       The capital market, _ criticised for a lack of transparency, needs an independent regulator, but...
1       beat the _ 3-0 in a ice _ game on , setting up a _ for group _ against and leaving the _ world _...
2       The fall in the trade deficit to crowns in from in buoyed market sentiment, and the goods import...
3       paper concern a.s. on said its net profit fell as a shutdown of its main paper mill, coupled wit...
4       shares rallied on following the coalition government's win in elections, but analysts said the r...
                                                       ...                                                 
2245    "" -- counter-culture _ of the 's -- is being used in a _ for a bank and some fans are up in arm...
2246    , the _ stock _ hired recently to _ drive to dominate the market, has been removed from her fund...
2247    , 's biggest city and financial capital, is _ for a near shutdown when protesters hit the street...

100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 1 | Train Loss: 545.1122 | Train Accuracy: 0.0333 | Train F1: 0.0201
           | Val Loss:   57.4729 | Val Accuracy:   0.1280 | Val F1:   0.0619


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.64it/s]


Epochs: 2 | Train Loss: 456.4119 | Train Accuracy: 0.2480 | Train F1: 0.2137
           | Val Loss:   45.4192 | Val Accuracy:   0.4120 | Val F1:   0.3374


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.64it/s]


Epochs: 3 | Train Loss: 356.5252 | Train Accuracy: 0.4724 | Train F1: 0.4270
           | Val Loss:   36.0088 | Val Accuracy:   0.5000 | Val F1:   0.4420


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 4 | Train Loss: 283.2964 | Train Accuracy: 0.5951 | Train F1: 0.5638
           | Val Loss:   29.6648 | Val Accuracy:   0.6320 | Val F1:   0.5780


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 5 | Train Loss: 224.9873 | Train Accuracy: 0.7107 | Train F1: 0.6942
           | Val Loss:   24.9175 | Val Accuracy:   0.7000 | Val F1:   0.6498


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 6 | Train Loss: 180.0145 | Train Accuracy: 0.7787 | Train F1: 0.7713
           | Val Loss:   21.9798 | Val Accuracy:   0.7040 | Val F1:   0.6677


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.59it/s]


Epochs: 7 | Train Loss: 144.0817 | Train Accuracy: 0.8596 | Train F1: 0.8547
           | Val Loss:   19.4636 | Val Accuracy:   0.7200 | Val F1:   0.6837


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 8 | Train Loss: 116.6679 | Train Accuracy: 0.9049 | Train F1: 0.9032
           | Val Loss:   16.6694 | Val Accuracy:   0.7600 | Val F1:   0.7283


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 9 | Train Loss: 94.3236 | Train Accuracy: 0.9418 | Train F1: 0.9410
           | Val Loss:   16.1349 | Val Accuracy:   0.7400 | Val F1:   0.7089


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 10 | Train Loss: 79.0521 | Train Accuracy: 0.9622 | Train F1: 0.9615
           | Val Loss:   14.3184 | Val Accuracy:   0.7880 | Val F1:   0.7628


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 11 | Train Loss: 67.2600 | Train Accuracy: 0.9756 | Train F1: 0.9751
           | Val Loss:   13.6149 | Val Accuracy:   0.8000 | Val F1:   0.7602


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.59it/s]


Epochs: 12 | Train Loss: 58.6056 | Train Accuracy: 0.9849 | Train F1: 0.9846
           | Val Loss:   13.1373 | Val Accuracy:   0.7960 | Val F1:   0.7705


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 13 | Train Loss: 52.9590 | Train Accuracy: 0.9871 | Train F1: 0.9868
           | Val Loss:   12.7683 | Val Accuracy:   0.8040 | Val F1:   0.7825


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 14 | Train Loss: 49.0651 | Train Accuracy: 0.9907 | Train F1: 0.9906
           | Val Loss:   12.4441 | Val Accuracy:   0.8000 | Val F1:   0.7803


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 15 | Train Loss: 46.6515 | Train Accuracy: 0.9942 | Train F1: 0.9940
           | Val Loss:   12.4656 | Val Accuracy:   0.8040 | Val F1:   0.7810
Test Accuracy:  0.6100 | Test F1:  0.6090


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 5.5
0       The capital market, _ criticised for a lack of transparency, needs an independent regulator, but...
1       beat the _ 3-0 in a _ _ game on , setting up a _ for group _ against and leaving the _ world _ _...
2       The fall in the trade deficit to crowns in from in _ market sentiment, and the goods imported sh...
3       paper concern a.s. on said its net profit fell as a _ of its main paper mill, _ with a downturn ...
4       shares _ on following the coalition government's win in elections, but analysts said the rise wa...
                                                       ...                                                 
2245    "" -- counter-culture _ of the 's -- is being used in a _ for a bank and some _ are up in arms. ...
2246    , the _ stock _ hired recently to _ drive to _ the market, has been removed from her fund as she...
2247    , 's biggest city and financial capital, is _ for a near _ when _ hit the _ against deep budget ...

100%|█████████████████████████████████████████| 141/141 [01:24<00:00,  1.66it/s]


Epochs: 1 | Train Loss: 544.9005 | Train Accuracy: 0.0444 | Train F1: 0.0339
           | Val Loss:   56.8193 | Val Accuracy:   0.1720 | Val F1:   0.0863


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.66it/s]


Epochs: 2 | Train Loss: 453.4688 | Train Accuracy: 0.2796 | Train F1: 0.2359
           | Val Loss:   44.7312 | Val Accuracy:   0.3840 | Val F1:   0.2965


100%|█████████████████████████████████████████| 141/141 [01:29<00:00,  1.57it/s]


Epochs: 3 | Train Loss: 354.8201 | Train Accuracy: 0.4871 | Train F1: 0.4526
           | Val Loss:   35.4897 | Val Accuracy:   0.5560 | Val F1:   0.4753


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 4 | Train Loss: 280.3967 | Train Accuracy: 0.6204 | Train F1: 0.5963
           | Val Loss:   29.4893 | Val Accuracy:   0.6120 | Val F1:   0.5405


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 5 | Train Loss: 224.3960 | Train Accuracy: 0.7236 | Train F1: 0.7125
           | Val Loss:   25.1749 | Val Accuracy:   0.6240 | Val F1:   0.5628


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 6 | Train Loss: 180.1351 | Train Accuracy: 0.7822 | Train F1: 0.7726
           | Val Loss:   21.7336 | Val Accuracy:   0.6760 | Val F1:   0.6276


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 7 | Train Loss: 144.4155 | Train Accuracy: 0.8436 | Train F1: 0.8374
           | Val Loss:   19.7515 | Val Accuracy:   0.6920 | Val F1:   0.6550


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 8 | Train Loss: 118.1760 | Train Accuracy: 0.8996 | Train F1: 0.8963
           | Val Loss:   17.2988 | Val Accuracy:   0.7320 | Val F1:   0.6938


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 9 | Train Loss: 97.1874 | Train Accuracy: 0.9307 | Train F1: 0.9291
           | Val Loss:   16.0664 | Val Accuracy:   0.7320 | Val F1:   0.6946


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 10 | Train Loss: 81.6710 | Train Accuracy: 0.9533 | Train F1: 0.9521
           | Val Loss:   15.1580 | Val Accuracy:   0.7560 | Val F1:   0.7296


100%|█████████████████████████████████████████| 141/141 [01:29<00:00,  1.57it/s]


Epochs: 11 | Train Loss: 70.6490 | Train Accuracy: 0.9644 | Train F1: 0.9641
           | Val Loss:   14.6623 | Val Accuracy:   0.7360 | Val F1:   0.6957


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.60it/s]


Epochs: 12 | Train Loss: 61.6188 | Train Accuracy: 0.9787 | Train F1: 0.9783
           | Val Loss:   13.9272 | Val Accuracy:   0.7560 | Val F1:   0.7308


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.60it/s]


Epochs: 13 | Train Loss: 55.1617 | Train Accuracy: 0.9898 | Train F1: 0.9897
           | Val Loss:   13.5057 | Val Accuracy:   0.7560 | Val F1:   0.7407


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 14 | Train Loss: 51.2025 | Train Accuracy: 0.9893 | Train F1: 0.9892
           | Val Loss:   13.2934 | Val Accuracy:   0.7600 | Val F1:   0.7359


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 15 | Train Loss: 48.9218 | Train Accuracy: 0.9920 | Train F1: 0.9919
           | Val Loss:   13.1335 | Val Accuracy:   0.7600 | Val F1:   0.7365
Test Accuracy:  0.6020 | Test F1:  0.6020


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 5.0
0       The capital market, _ criticised for a lack of transparency, needs an independent regulator, but...
1       beat the _ 3-0 in a _ _ game on , setting up a _ for group _ against and leaving the _ world _ _...
2       The fall in the trade deficit to crowns in from in _ market sentiment, and the goods _ show indu...
3       paper concern a.s. on said its net profit fell as a _ of its main paper mill, _ with a _ in the ...
4       shares _ on following the coalition government's win in elections, but analysts said the rise wa...
                                                       ...                                                 
2245    "" -- counter-culture _ of the 's -- is being used in a _ for a bank and some _ are up in arms. ...
2246    , the _ stock _ _ recently to _ drive to _ the market, has been _ from her fund as she is _ by r...
2247    , 's biggest city and financial capital, is _ for a near _ when _ hit the _ against deep budget ...

100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.64it/s]


Epochs: 1 | Train Loss: 550.7766 | Train Accuracy: 0.0320 | Train F1: 0.0183
           | Val Loss:   61.3259 | Val Accuracy:   0.0400 | Val F1:   0.0115


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 2 | Train Loss: 491.5761 | Train Accuracy: 0.1969 | Train F1: 0.1516
           | Val Loss:   48.5824 | Val Accuracy:   0.3320 | Val F1:   0.2672


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 3 | Train Loss: 383.6277 | Train Accuracy: 0.4356 | Train F1: 0.3934
           | Val Loss:   38.3968 | Val Accuracy:   0.5120 | Val F1:   0.4369


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 4 | Train Loss: 301.8775 | Train Accuracy: 0.5827 | Train F1: 0.5527
           | Val Loss:   31.2303 | Val Accuracy:   0.6080 | Val F1:   0.5513


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 5 | Train Loss: 237.2310 | Train Accuracy: 0.6947 | Train F1: 0.6770
           | Val Loss:   26.2269 | Val Accuracy:   0.6280 | Val F1:   0.5682


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.64it/s]


Epochs: 6 | Train Loss: 188.2190 | Train Accuracy: 0.7773 | Train F1: 0.7693
           | Val Loss:   22.9438 | Val Accuracy:   0.6760 | Val F1:   0.6281


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 7 | Train Loss: 149.6868 | Train Accuracy: 0.8422 | Train F1: 0.8351
           | Val Loss:   19.7616 | Val Accuracy:   0.7120 | Val F1:   0.6881


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 8 | Train Loss: 120.0112 | Train Accuracy: 0.9009 | Train F1: 0.8961
           | Val Loss:   17.9785 | Val Accuracy:   0.7200 | Val F1:   0.6878


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 9 | Train Loss: 97.0877 | Train Accuracy: 0.9329 | Train F1: 0.9306
           | Val Loss:   15.7845 | Val Accuracy:   0.7600 | Val F1:   0.7231


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 10 | Train Loss: 80.0206 | Train Accuracy: 0.9533 | Train F1: 0.9519
           | Val Loss:   15.1702 | Val Accuracy:   0.7800 | Val F1:   0.7661


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 11 | Train Loss: 67.8785 | Train Accuracy: 0.9733 | Train F1: 0.9726
           | Val Loss:   14.0598 | Val Accuracy:   0.7880 | Val F1:   0.7579


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 12 | Train Loss: 58.6677 | Train Accuracy: 0.9809 | Train F1: 0.9804
           | Val Loss:   13.6865 | Val Accuracy:   0.8160 | Val F1:   0.7979


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 13 | Train Loss: 52.1952 | Train Accuracy: 0.9853 | Train F1: 0.9850
           | Val Loss:   13.2268 | Val Accuracy:   0.8080 | Val F1:   0.7949


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 14 | Train Loss: 48.4050 | Train Accuracy: 0.9867 | Train F1: 0.9863
           | Val Loss:   13.2762 | Val Accuracy:   0.7920 | Val F1:   0.7761


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.60it/s]


Epochs: 15 | Train Loss: 45.7474 | Train Accuracy: 0.9911 | Train F1: 0.9909
           | Val Loss:   13.1331 | Val Accuracy:   0.8000 | Val F1:   0.7904
Test Accuracy:  0.5864 | Test F1:  0.5816


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 4.5
0       The capital market, _ _ for a lack of transparency, needs an independent regulator, but it is no...
1       _ the _ 3-0 in a _ _ _ on , setting up a _ for group _ against and leaving the _ world _ _ for a...
2       The fall in the trade _ to _ in from in _ market sentiment, and the goods _ show industrial rest...
3       _ concern a.s. on said its net profit fell as a _ of its main _ mill, _ with a _ in the _ _ bit ...
4       shares _ on following the _ government's win in elections, but analysts said the rise was due mo...
                                                       ...                                                 
2245    "" -- counter-culture _ of the 's -- is being used in a _ for a bank and some _ are up in arms. ...
2246    , the _ stock _ _ recently to _ drive to _ the market, has been _ from her fund as she is _ by r...
2247    , 's biggest city and financial capital, is _ for a near _ when _ hit the _ against _ budget cut...

100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 1 | Train Loss: 547.8288 | Train Accuracy: 0.0356 | Train F1: 0.0194
           | Val Loss:   59.4684 | Val Accuracy:   0.1080 | Val F1:   0.0517


100%|█████████████████████████████████████████| 141/141 [01:24<00:00,  1.67it/s]


Epochs: 2 | Train Loss: 468.8100 | Train Accuracy: 0.2111 | Train F1: 0.1741
           | Val Loss:   47.1702 | Val Accuracy:   0.3560 | Val F1:   0.2870


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.60it/s]


Epochs: 3 | Train Loss: 376.6755 | Train Accuracy: 0.4178 | Train F1: 0.3756
           | Val Loss:   38.5178 | Val Accuracy:   0.4880 | Val F1:   0.4254


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 4 | Train Loss: 301.3985 | Train Accuracy: 0.5662 | Train F1: 0.5401
           | Val Loss:   32.1363 | Val Accuracy:   0.5760 | Val F1:   0.5161


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 5 | Train Loss: 241.7832 | Train Accuracy: 0.6729 | Train F1: 0.6585
           | Val Loss:   27.2420 | Val Accuracy:   0.6200 | Val F1:   0.5590


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.59it/s]


Epochs: 6 | Train Loss: 194.0568 | Train Accuracy: 0.7671 | Train F1: 0.7583
           | Val Loss:   24.0822 | Val Accuracy:   0.6880 | Val F1:   0.6454


100%|█████████████████████████████████████████| 141/141 [01:29<00:00,  1.58it/s]


Epochs: 7 | Train Loss: 157.5379 | Train Accuracy: 0.8427 | Train F1: 0.8373
           | Val Loss:   21.1774 | Val Accuracy:   0.7040 | Val F1:   0.6667


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 8 | Train Loss: 128.8493 | Train Accuracy: 0.8880 | Train F1: 0.8838
           | Val Loss:   19.3029 | Val Accuracy:   0.7400 | Val F1:   0.7072


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.60it/s]


Epochs: 9 | Train Loss: 105.5590 | Train Accuracy: 0.9218 | Train F1: 0.9189
           | Val Loss:   17.9119 | Val Accuracy:   0.7440 | Val F1:   0.7283


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.60it/s]


Epochs: 10 | Train Loss: 87.5294 | Train Accuracy: 0.9520 | Train F1: 0.9508
           | Val Loss:   16.7353 | Val Accuracy:   0.7640 | Val F1:   0.7398


100%|█████████████████████████████████████████| 141/141 [01:29<00:00,  1.58it/s]


Epochs: 11 | Train Loss: 74.4285 | Train Accuracy: 0.9649 | Train F1: 0.9630
           | Val Loss:   15.7356 | Val Accuracy:   0.7720 | Val F1:   0.7550


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 12 | Train Loss: 64.8127 | Train Accuracy: 0.9751 | Train F1: 0.9733
           | Val Loss:   15.2340 | Val Accuracy:   0.7720 | Val F1:   0.7484


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.59it/s]


Epochs: 13 | Train Loss: 58.2425 | Train Accuracy: 0.9831 | Train F1: 0.9818
           | Val Loss:   14.6709 | Val Accuracy:   0.7680 | Val F1:   0.7476


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 14 | Train Loss: 54.0913 | Train Accuracy: 0.9867 | Train F1: 0.9863
           | Val Loss:   14.4824 | Val Accuracy:   0.7880 | Val F1:   0.7667


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 15 | Train Loss: 51.4708 | Train Accuracy: 0.9889 | Train F1: 0.9882
           | Val Loss:   14.5266 | Val Accuracy:   0.7800 | Val F1:   0.7614
Test Accuracy:  0.5684 | Test F1:  0.5635


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 4.0
0       The capital market, _ _ for a _ of transparency, needs an independent regulator, but it is not a...
1       _ the _ 3-0 in a _ _ _ on , _ up a _ for group _ against and _ the _ world _ _ for answers. The ...
2       The fall in the trade _ to _ in from in _ market sentiment, and the _ _ show _ restructuring is ...
3       _ concern a.s. on said its net profit fell as a _ of its main _ mill, _ with a _ in the _ _ bit ...
4       shares _ on following the _ government's win in elections, but analysts said the rise was due mo...
                                                       ...                                                 
2245    "" -- counter-culture _ of the 's -- is being used in a _ for a bank and some _ are up in arms. ...
2246    , the _ stock _ _ recently to _ _ to _ the market, has been _ from _ fund as she is _ by regulat...
2247    , 's biggest city and financial capital, is _ for a near _ when _ hit the _ against _ _ _ by 's ...

100%|█████████████████████████████████████████| 141/141 [01:24<00:00,  1.67it/s]


Epochs: 1 | Train Loss: 551.5193 | Train Accuracy: 0.0298 | Train F1: 0.0132
           | Val Loss:   61.3760 | Val Accuracy:   0.0440 | Val F1:   0.0139


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 2 | Train Loss: 489.8229 | Train Accuracy: 0.1693 | Train F1: 0.1375
           | Val Loss:   49.1745 | Val Accuracy:   0.3040 | Val F1:   0.2248


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 3 | Train Loss: 389.6359 | Train Accuracy: 0.4102 | Train F1: 0.3690
           | Val Loss:   39.4996 | Val Accuracy:   0.4840 | Val F1:   0.4318


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 4 | Train Loss: 311.7821 | Train Accuracy: 0.5596 | Train F1: 0.5356
           | Val Loss:   32.7198 | Val Accuracy:   0.5600 | Val F1:   0.5261


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 5 | Train Loss: 247.1725 | Train Accuracy: 0.6787 | Train F1: 0.6592
           | Val Loss:   28.4111 | Val Accuracy:   0.6120 | Val F1:   0.5593


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.64it/s]


Epochs: 6 | Train Loss: 197.6550 | Train Accuracy: 0.7671 | Train F1: 0.7551
           | Val Loss:   25.8538 | Val Accuracy:   0.6400 | Val F1:   0.6105


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 7 | Train Loss: 158.9596 | Train Accuracy: 0.8338 | Train F1: 0.8288
           | Val Loss:   21.8438 | Val Accuracy:   0.6640 | Val F1:   0.6374


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 8 | Train Loss: 126.4021 | Train Accuracy: 0.9004 | Train F1: 0.8988
           | Val Loss:   20.0939 | Val Accuracy:   0.7000 | Val F1:   0.6734


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 9 | Train Loss: 103.6440 | Train Accuracy: 0.9329 | Train F1: 0.9329
           | Val Loss:   18.7424 | Val Accuracy:   0.7160 | Val F1:   0.7063


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 10 | Train Loss: 86.3517 | Train Accuracy: 0.9627 | Train F1: 0.9627
           | Val Loss:   17.1529 | Val Accuracy:   0.7320 | Val F1:   0.7113


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 11 | Train Loss: 73.1675 | Train Accuracy: 0.9742 | Train F1: 0.9741
           | Val Loss:   16.3157 | Val Accuracy:   0.7560 | Val F1:   0.7391


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 12 | Train Loss: 63.5640 | Train Accuracy: 0.9844 | Train F1: 0.9846
           | Val Loss:   16.2153 | Val Accuracy:   0.7200 | Val F1:   0.7101


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 13 | Train Loss: 56.6161 | Train Accuracy: 0.9880 | Train F1: 0.9880
           | Val Loss:   15.6043 | Val Accuracy:   0.7560 | Val F1:   0.7402


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 14 | Train Loss: 52.8627 | Train Accuracy: 0.9929 | Train F1: 0.9930
           | Val Loss:   15.4458 | Val Accuracy:   0.7640 | Val F1:   0.7531


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 15 | Train Loss: 50.1660 | Train Accuracy: 0.9929 | Train F1: 0.9930
           | Val Loss:   15.2222 | Val Accuracy:   0.7520 | Val F1:   0.7385
Test Accuracy:  0.5544 | Test F1:  0.5495


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 3.5
0       The capital market, _ _ for a _ of transparency, _ an _ regulator, but it is not a _ for all of ...
1       _ the _ 3-0 in a _ _ _ on , _ up a _ for group _ against and _ the _ world _ _ for answers. The ...
2       The _ in the trade _ to _ in from in _ market sentiment, and the _ _ show _ _ is on track, analy...
3       _ _ a.s. on said its net profit fell as a _ of its main _ mill, _ with a _ in the _ _ _ into its...
4       shares _ on _ the _ government's _ in elections, but analysts said the rise was due more to a _ ...
                                                       ...                                                 
2245    "" -- counter-culture _ of the 's -- is being used in a _ for a bank and some _ are up in arms. ...
2246    , the _ stock _ _ recently to _ _ to _ the market, has been _ from _ _ as she is _ by regulators...
2247    , 's biggest _ and financial capital, is _ for a _ _ when _ hit the _ against _ _ _ by 's _ gove...

100%|█████████████████████████████████████████| 141/141 [01:22<00:00,  1.70it/s]


Epochs: 1 | Train Loss: 552.8613 | Train Accuracy: 0.0284 | Train F1: 0.0207
           | Val Loss:   61.8464 | Val Accuracy:   0.0280 | Val F1:   0.0045


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 2 | Train Loss: 498.1234 | Train Accuracy: 0.1653 | Train F1: 0.1298
           | Val Loss:   50.1995 | Val Accuracy:   0.2720 | Val F1:   0.2279


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 3 | Train Loss: 393.8210 | Train Accuracy: 0.3898 | Train F1: 0.3426
           | Val Loss:   41.1593 | Val Accuracy:   0.3880 | Val F1:   0.3299


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 4 | Train Loss: 312.0034 | Train Accuracy: 0.5444 | Train F1: 0.5114
           | Val Loss:   34.0840 | Val Accuracy:   0.5000 | Val F1:   0.4445


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 5 | Train Loss: 250.0152 | Train Accuracy: 0.6716 | Train F1: 0.6546
           | Val Loss:   29.2978 | Val Accuracy:   0.6000 | Val F1:   0.5571


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 6 | Train Loss: 201.4806 | Train Accuracy: 0.7653 | Train F1: 0.7580
           | Val Loss:   25.9606 | Val Accuracy:   0.6280 | Val F1:   0.5894


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.60it/s]


Epochs: 7 | Train Loss: 160.1251 | Train Accuracy: 0.8498 | Train F1: 0.8464
           | Val Loss:   23.5986 | Val Accuracy:   0.6760 | Val F1:   0.6471


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 8 | Train Loss: 131.6670 | Train Accuracy: 0.8836 | Train F1: 0.8813
           | Val Loss:   21.6865 | Val Accuracy:   0.6760 | Val F1:   0.6559


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 9 | Train Loss: 106.5076 | Train Accuracy: 0.9258 | Train F1: 0.9249
           | Val Loss:   20.1706 | Val Accuracy:   0.6880 | Val F1:   0.6705


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 10 | Train Loss: 88.6508 | Train Accuracy: 0.9498 | Train F1: 0.9493
           | Val Loss:   19.8015 | Val Accuracy:   0.7000 | Val F1:   0.6935


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.60it/s]


Epochs: 11 | Train Loss: 75.6089 | Train Accuracy: 0.9622 | Train F1: 0.9613
           | Val Loss:   18.3309 | Val Accuracy:   0.7040 | Val F1:   0.6880


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 12 | Train Loss: 66.5425 | Train Accuracy: 0.9698 | Train F1: 0.9691
           | Val Loss:   17.7959 | Val Accuracy:   0.7320 | Val F1:   0.7225


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 13 | Train Loss: 59.7308 | Train Accuracy: 0.9800 | Train F1: 0.9798
           | Val Loss:   17.7316 | Val Accuracy:   0.7160 | Val F1:   0.6978


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 14 | Train Loss: 54.6335 | Train Accuracy: 0.9898 | Train F1: 0.9897
           | Val Loss:   17.4800 | Val Accuracy:   0.7200 | Val F1:   0.7032


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 15 | Train Loss: 52.4767 | Train Accuracy: 0.9880 | Train F1: 0.9879
           | Val Loss:   17.3101 | Val Accuracy:   0.7280 | Val F1:   0.7096
Test Accuracy:  0.5360 | Test F1:  0.5328


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 3.0
0       The _ market, _ _ for a _ of transparency, _ an _ regulator, but it is not a _ for all of the ma...
1       _ the _ 3-0 in a _ _ _ on , _ up a _ for group _ against and _ the _ world _ _ for answers. The ...
2       The _ in the _ _ to _ in from in _ market sentiment, and the _ _ _ _ _ is on track, analysts sai...
3       _ _ a.s. on said its _ profit _ as a _ of its _ _ mill, _ with a _ in the _ _ _ into its _ line....
4       shares _ on _ the _ government's _ in elections, but analysts said the rise was due more to a _ ...
                                                       ...                                                 
2245    "" -- counter-culture _ of the 's -- is being used in a _ for a bank and some _ are up in arms. ...
2246    , the _ stock _ _ _ to _ _ to _ the market, has been _ from _ _ as _ is _ by regulators. The _ m...
2247    , 's biggest _ and financial capital, is _ for a _ _ when _ _ the _ against _ _ _ by 's _ govern...

100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 1 | Train Loss: 553.0537 | Train Accuracy: 0.0276 | Train F1: 0.0137
           | Val Loss:   62.7742 | Val Accuracy:   0.0200 | Val F1:   0.0017


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 2 | Train Loss: 518.7290 | Train Accuracy: 0.0938 | Train F1: 0.0630
           | Val Loss:   52.6656 | Val Accuracy:   0.2720 | Val F1:   0.2082


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 3 | Train Loss: 422.9285 | Train Accuracy: 0.3164 | Train F1: 0.2774
           | Val Loss:   43.9672 | Val Accuracy:   0.3600 | Val F1:   0.2913


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 4 | Train Loss: 338.8816 | Train Accuracy: 0.4964 | Train F1: 0.4574
           | Val Loss:   37.1672 | Val Accuracy:   0.4360 | Val F1:   0.3753


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 5 | Train Loss: 272.2118 | Train Accuracy: 0.6182 | Train F1: 0.5943
           | Val Loss:   32.3687 | Val Accuracy:   0.5040 | Val F1:   0.4523


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 6 | Train Loss: 220.0234 | Train Accuracy: 0.7276 | Train F1: 0.7136
           | Val Loss:   28.5648 | Val Accuracy:   0.5760 | Val F1:   0.5441


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 7 | Train Loss: 178.9745 | Train Accuracy: 0.8067 | Train F1: 0.7986
           | Val Loss:   25.8990 | Val Accuracy:   0.6120 | Val F1:   0.5756


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 8 | Train Loss: 146.2738 | Train Accuracy: 0.8604 | Train F1: 0.8548
           | Val Loss:   23.1552 | Val Accuracy:   0.6600 | Val F1:   0.6623


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.64it/s]


Epochs: 9 | Train Loss: 118.9081 | Train Accuracy: 0.9107 | Train F1: 0.9071
           | Val Loss:   21.6975 | Val Accuracy:   0.6800 | Val F1:   0.6521


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 10 | Train Loss: 99.4621 | Train Accuracy: 0.9444 | Train F1: 0.9439
           | Val Loss:   20.5191 | Val Accuracy:   0.7040 | Val F1:   0.6818


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 11 | Train Loss: 85.0400 | Train Accuracy: 0.9591 | Train F1: 0.9581
           | Val Loss:   19.5501 | Val Accuracy:   0.7200 | Val F1:   0.6981


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 12 | Train Loss: 73.3597 | Train Accuracy: 0.9724 | Train F1: 0.9721
           | Val Loss:   19.0741 | Val Accuracy:   0.7160 | Val F1:   0.6973


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 13 | Train Loss: 66.9646 | Train Accuracy: 0.9791 | Train F1: 0.9788
           | Val Loss:   18.7739 | Val Accuracy:   0.7320 | Val F1:   0.7126


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 14 | Train Loss: 61.3157 | Train Accuracy: 0.9862 | Train F1: 0.9862
           | Val Loss:   18.5065 | Val Accuracy:   0.7280 | Val F1:   0.7188


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 15 | Train Loss: 58.8804 | Train Accuracy: 0.9858 | Train F1: 0.9855
           | Val Loss:   18.4804 | Val Accuracy:   0.7360 | Val F1:   0.7297
Test Accuracy:  0.5328 | Test F1:  0.5286


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 2.5
0       The _ market, _ _ for a _ of transparency, _ an _ regulator, but it is not a _ for all of the ma...
1       _ the _ 3-0 in a _ _ _ on , _ up a _ for group _ _ and _ the _ world _ _ for answers. The fast-s...
2       The _ in the _ _ to _ in from in _ market sentiment, and the _ _ _ _ _ is on track, analysts sai...
3       _ _ a.s. on said its _ _ _ as a _ of its _ _ mill, _ with a _ in the _ _ _ into its _ line. said...
4       shares _ on _ the _ government's _ in elections, but analysts said the _ was _ more to a _ _ tha...
                                                       ...                                                 
2245    "" -- counter-culture _ of the 's -- is _ _ in a _ for a _ and some _ are up in arms. is _ a _ _...
2246    , the _ stock _ _ _ to _ _ to _ the market, has been _ from _ _ as _ is _ by regulators. The _ _...
2247    , 's _ _ and _ capital, is _ for a _ _ when _ _ the _ _ _ _ _ by 's _ government. The so-called ...

100%|█████████████████████████████████████████| 141/141 [01:24<00:00,  1.66it/s]


Epochs: 1 | Train Loss: 552.5349 | Train Accuracy: 0.0333 | Train F1: 0.0162
           | Val Loss:   61.0401 | Val Accuracy:   0.0520 | Val F1:   0.0158


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 2 | Train Loss: 493.2901 | Train Accuracy: 0.1782 | Train F1: 0.1372
           | Val Loss:   50.3002 | Val Accuracy:   0.2640 | Val F1:   0.2031


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 3 | Train Loss: 397.8694 | Train Accuracy: 0.3711 | Train F1: 0.3040
           | Val Loss:   42.0384 | Val Accuracy:   0.3960 | Val F1:   0.3324


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 4 | Train Loss: 325.7269 | Train Accuracy: 0.4951 | Train F1: 0.4460
           | Val Loss:   37.0926 | Val Accuracy:   0.4600 | Val F1:   0.3829


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 5 | Train Loss: 266.6901 | Train Accuracy: 0.6222 | Train F1: 0.5894
           | Val Loss:   31.7486 | Val Accuracy:   0.5280 | Val F1:   0.4909


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 6 | Train Loss: 217.7919 | Train Accuracy: 0.7396 | Train F1: 0.7291
           | Val Loss:   28.7533 | Val Accuracy:   0.5240 | Val F1:   0.4844


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 7 | Train Loss: 178.1440 | Train Accuracy: 0.8018 | Train F1: 0.7935
           | Val Loss:   25.9977 | Val Accuracy:   0.5880 | Val F1:   0.5581


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.64it/s]


Epochs: 8 | Train Loss: 144.1378 | Train Accuracy: 0.8787 | Train F1: 0.8771
           | Val Loss:   23.9603 | Val Accuracy:   0.6440 | Val F1:   0.6392


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.60it/s]


Epochs: 9 | Train Loss: 118.7984 | Train Accuracy: 0.9129 | Train F1: 0.9113
           | Val Loss:   22.1325 | Val Accuracy:   0.6840 | Val F1:   0.6460


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.59it/s]


Epochs: 10 | Train Loss: 98.7959 | Train Accuracy: 0.9480 | Train F1: 0.9477
           | Val Loss:   21.4711 | Val Accuracy:   0.6560 | Val F1:   0.6438


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 11 | Train Loss: 84.5318 | Train Accuracy: 0.9613 | Train F1: 0.9610
           | Val Loss:   20.5657 | Val Accuracy:   0.6880 | Val F1:   0.6649


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 12 | Train Loss: 74.4660 | Train Accuracy: 0.9747 | Train F1: 0.9746
           | Val Loss:   19.9539 | Val Accuracy:   0.6960 | Val F1:   0.6769


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 13 | Train Loss: 66.9575 | Train Accuracy: 0.9791 | Train F1: 0.9791
           | Val Loss:   19.7189 | Val Accuracy:   0.6840 | Val F1:   0.6669


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.59it/s]


Epochs: 14 | Train Loss: 61.6201 | Train Accuracy: 0.9827 | Train F1: 0.9828
           | Val Loss:   19.6343 | Val Accuracy:   0.6880 | Val F1:   0.6731


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 15 | Train Loss: 58.7805 | Train Accuracy: 0.9876 | Train F1: 0.9877
           | Val Loss:   19.4601 | Val Accuracy:   0.6920 | Val F1:   0.6767
Test Accuracy:  0.5100 | Test F1:  0.5051


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 2.0
0       The _ market, _ _ for a _ of transparency, _ an _ regulator, but it is not a _ for all of the ma...
1       _ the _ 3-0 in a _ _ _ on , _ up a _ for _ _ _ and _ the _ _ _ _ for answers. The fast-skating _...
2       The _ in the _ _ to _ in from in _ market sentiment, and the _ _ _ _ _ is on track, analysts sai...
3       _ _ a.s. on said its _ _ _ as a _ of its _ _ mill, _ with a _ in the _ _ _ into its _ line. said...
4       _ _ on _ the _ government's _ in elections, but analysts said the _ was _ more to a _ _ than to ...
                                                       ...                                                 
2245    "" -- counter-culture _ of the 's -- is _ _ in a _ for a _ and some _ are up in arms. is _ a _ _...
2246    , the _ _ _ _ _ to _ _ to _ the market, has been _ from _ _ as _ is _ by regulators. The _ _ -- ...
2247    , 's _ _ and _ capital, is _ for a _ _ when _ _ the _ _ _ _ _ by 's _ government. The so-called ...

100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 1 | Train Loss: 551.9670 | Train Accuracy: 0.0289 | Train F1: 0.0133
           | Val Loss:   62.1177 | Val Accuracy:   0.0440 | Val F1:   0.0095


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 2 | Train Loss: 531.4052 | Train Accuracy: 0.0769 | Train F1: 0.0509
           | Val Loss:   56.2084 | Val Accuracy:   0.1960 | Val F1:   0.1092


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 3 | Train Loss: 450.6425 | Train Accuracy: 0.2573 | Train F1: 0.2140
           | Val Loss:   46.2836 | Val Accuracy:   0.3120 | Val F1:   0.2560


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 4 | Train Loss: 369.9585 | Train Accuracy: 0.4022 | Train F1: 0.3501
           | Val Loss:   39.6706 | Val Accuracy:   0.4160 | Val F1:   0.3502


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 5 | Train Loss: 304.1834 | Train Accuracy: 0.5209 | Train F1: 0.4848
           | Val Loss:   34.8263 | Val Accuracy:   0.4560 | Val F1:   0.4118


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 6 | Train Loss: 251.5308 | Train Accuracy: 0.6360 | Train F1: 0.6147
           | Val Loss:   30.9540 | Val Accuracy:   0.4920 | Val F1:   0.4425


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 7 | Train Loss: 207.2312 | Train Accuracy: 0.7498 | Train F1: 0.7380
           | Val Loss:   28.2238 | Val Accuracy:   0.5560 | Val F1:   0.5148


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.60it/s]


Epochs: 8 | Train Loss: 171.8763 | Train Accuracy: 0.8209 | Train F1: 0.8149
           | Val Loss:   26.5921 | Val Accuracy:   0.5720 | Val F1:   0.5354


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 9 | Train Loss: 143.8023 | Train Accuracy: 0.8724 | Train F1: 0.8662
           | Val Loss:   25.1096 | Val Accuracy:   0.6040 | Val F1:   0.5634


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 10 | Train Loss: 122.1309 | Train Accuracy: 0.9040 | Train F1: 0.9013
           | Val Loss:   23.6817 | Val Accuracy:   0.6400 | Val F1:   0.5989


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 11 | Train Loss: 104.8496 | Train Accuracy: 0.9280 | Train F1: 0.9259
           | Val Loss:   22.6105 | Val Accuracy:   0.6440 | Val F1:   0.5959


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 12 | Train Loss: 91.7896 | Train Accuracy: 0.9493 | Train F1: 0.9478
           | Val Loss:   22.0769 | Val Accuracy:   0.6520 | Val F1:   0.6110


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 13 | Train Loss: 82.6556 | Train Accuracy: 0.9622 | Train F1: 0.9612
           | Val Loss:   21.7916 | Val Accuracy:   0.6440 | Val F1:   0.6057


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.59it/s]


Epochs: 14 | Train Loss: 76.8944 | Train Accuracy: 0.9671 | Train F1: 0.9662
           | Val Loss:   21.8100 | Val Accuracy:   0.6440 | Val F1:   0.6062


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 15 | Train Loss: 73.7173 | Train Accuracy: 0.9747 | Train F1: 0.9742
           | Val Loss:   21.6463 | Val Accuracy:   0.6400 | Val F1:   0.6028
Test Accuracy:  0.4832 | Test F1:  0.4778


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

idf removal threshold at 1.5
0       The _ market, _ _ for a _ of transparency, _ an _ regulator, but it is not a _ for _ of the mark...
1       _ the _ 3-0 in a _ _ _ on , _ up a _ for _ _ _ and _ the _ _ _ _ for answers. The fast-skating _...
2       The _ in the _ _ to _ in from in _ _ sentiment, and the _ _ _ _ _ is on track, _ said on . () _ ...
3       _ _ a.s. on said its _ _ _ as a _ of its _ _ mill, _ with a _ in the _ _ _ _ its _ line. said it...
4       _ _ on _ the _ government's _ in elections, but _ said the _ was _ _ to a _ _ _ to a _ in _ sent...
                                                       ...                                                 
2245    "" -- counter-culture _ of the 's -- is _ _ in a _ for a _ and _ _ are up in arms. is _ a _ _ _ ...
2246    , the _ _ _ _ _ to _ _ to _ the market, has _ _ from _ _ as _ is _ by regulators. The _ _ -- _ '...
2247    , 's _ _ and _ capital, is _ for a _ _ _ _ _ the _ _ _ _ _ by 's _ government. The so-called "" ...

100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.65it/s]


Epochs: 1 | Train Loss: 552.1703 | Train Accuracy: 0.0249 | Train F1: 0.0133
           | Val Loss:   62.3298 | Val Accuracy:   0.0280 | Val F1:   0.0104


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 2 | Train Loss: 529.5559 | Train Accuracy: 0.0702 | Train F1: 0.0511
           | Val Loss:   57.0716 | Val Accuracy:   0.1320 | Val F1:   0.0730


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.61it/s]


Epochs: 3 | Train Loss: 451.6420 | Train Accuracy: 0.2538 | Train F1: 0.2068
           | Val Loss:   47.3256 | Val Accuracy:   0.2920 | Val F1:   0.2355


100%|█████████████████████████████████████████| 141/141 [01:28<00:00,  1.59it/s]


Epochs: 4 | Train Loss: 372.6802 | Train Accuracy: 0.4160 | Train F1: 0.3587
           | Val Loss:   41.2559 | Val Accuracy:   0.4120 | Val F1:   0.3610


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 5 | Train Loss: 307.7450 | Train Accuracy: 0.5476 | Train F1: 0.5045
           | Val Loss:   35.9772 | Val Accuracy:   0.4680 | Val F1:   0.4112


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.60it/s]


Epochs: 6 | Train Loss: 252.5391 | Train Accuracy: 0.6556 | Train F1: 0.6328
           | Val Loss:   31.3227 | Val Accuracy:   0.5480 | Val F1:   0.4996


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 7 | Train Loss: 207.4441 | Train Accuracy: 0.7627 | Train F1: 0.7510
           | Val Loss:   28.3566 | Val Accuracy:   0.5600 | Val F1:   0.5086


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 8 | Train Loss: 171.1749 | Train Accuracy: 0.8240 | Train F1: 0.8144
           | Val Loss:   26.7205 | Val Accuracy:   0.6160 | Val F1:   0.5668


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.62it/s]


Epochs: 9 | Train Loss: 141.1489 | Train Accuracy: 0.8902 | Train F1: 0.8861
           | Val Loss:   24.3947 | Val Accuracy:   0.6360 | Val F1:   0.6066


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 10 | Train Loss: 117.9541 | Train Accuracy: 0.9249 | Train F1: 0.9233
           | Val Loss:   22.9139 | Val Accuracy:   0.6480 | Val F1:   0.6100


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 11 | Train Loss: 101.4925 | Train Accuracy: 0.9404 | Train F1: 0.9386
           | Val Loss:   22.0814 | Val Accuracy:   0.6720 | Val F1:   0.6351


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.64it/s]


Epochs: 12 | Train Loss: 88.2166 | Train Accuracy: 0.9609 | Train F1: 0.9603
           | Val Loss:   21.2171 | Val Accuracy:   0.6600 | Val F1:   0.6239


100%|█████████████████████████████████████████| 141/141 [01:27<00:00,  1.62it/s]


Epochs: 13 | Train Loss: 79.7207 | Train Accuracy: 0.9707 | Train F1: 0.9704
           | Val Loss:   20.9621 | Val Accuracy:   0.6680 | Val F1:   0.6439


100%|█████████████████████████████████████████| 141/141 [01:25<00:00,  1.64it/s]


Epochs: 14 | Train Loss: 72.6325 | Train Accuracy: 0.9809 | Train F1: 0.9807
           | Val Loss:   20.8237 | Val Accuracy:   0.6640 | Val F1:   0.6322


100%|█████████████████████████████████████████| 141/141 [01:26<00:00,  1.63it/s]


Epochs: 15 | Train Loss: 70.2369 | Train Accuracy: 0.9804 | Train F1: 0.9803
           | Val Loss:   20.6775 | Val Accuracy:   0.6680 | Val F1:   0.6385
Test Accuracy:  0.5080 | Test F1:  0.5028


([10.0,
  9.5,
  9.0,
  8.5,
  8.0,
  7.5,
  7.0,
  6.5,
  6.0,
  5.5,
  5.0,
  4.5,
  4.0,
  3.5,
  3.0,
  2.5,
  2.0,
  1.5],
 [0.996,
  0.9977777777777778,
  0.9968888888888889,
  0.996,
  0.9884444444444445,
  0.996,
  0.9982222222222222,
  0.9933333333333333,
  0.9942222222222222,
  0.992,
  0.9911111111111112,
  0.9888888888888889,
  0.9928888888888889,
  0.988,
  0.9857777777777778,
  0.9875555555555555,
  0.9746666666666667,
  0.9804444444444445],
 [0.9959642800473014,
  0.9977092598261774,
  0.9969006964151248,
  0.995976240283706,
  0.9877127538679704,
  0.9958688123532511,
  0.9981964351557647,
  0.9932460689747131,
  0.994009167454589,
  0.9919352127819567,
  0.9909322250261116,
  0.9881834563664834,
  0.9929590924891909,
  0.987932655916726,
  0.9854612925034312,
  0.9876574827279331,
  0.9742435640531251,
  0.9802586740963428],
 [0.6092,
  0.628,
  0.6204,
  0.6108,
  0.6092,
  0.6056,
  0.6056,
  0.5932,
  0.61,
  0.602,
  0.5864,
  0.5684,
  0.5544,
  0.536,
  0.5328,
 

### Saliency Map

In [None]:
def get_saliency_map(model, df):
    input_text = [tokenizer(text, padding='max_length', max_length = 512, truncation=True, return_tensors="pt") for text in df['content']]
    input = input_text[0]
    print(input.shape)
    for param in model.parameters():
        param.requires_grad = False
    model.eval()
    # input.unsqueeze_(0)
    input.requires_grad = True
    preds = model(input)
    score, indices = torch.max(preds, 1)
    #backward pass to get gradients of score predicted class w.r.t. input image
    score.backward()
    #get max along channel axis
    slc, _ = torch.max(torch.abs(input.grad[0]), dim=0)
    #normalize to [0..1]
    slc = (slc - slc.min())/(slc.max()-slc.min())
    return slc

input_slc = get_saliency_map(model, train_df)
print(input_slc.shape)
input_slc

### SHAP

In [None]:
import shap
import scipy as sp

def f(x):
    tv = torch.tensor([tokenizer.encode(v, padding='max_length', \
        max_length=512, truncation=True) for v in x]).cuda()
    outputs = model(tv)[0].detach().cpu().numpy()
    scores = (np.exp(outputs).T / np.exp(outputs).sum(-1)).T
    val = sp.special.logit(scores[:,1]) # use one vs rest logit units
    return val

explainer = shap.Explainer(f, tokenizer)
shap_values = explainer(train_df['masked_content'][:3], fixed_context=1, batch_size=32)

In [None]:
shap.plots.bar(shap_values.abs.mean(0)) # default

In [None]:
shap.plots.bar(shap_values.abs.sum(0))

In [None]:
shap.plots.bar(shap_values.abs.max(0))

In [None]:
shap.plots.text(shap_values)