<a href="https://colab.research.google.com/github/skyshine460/Binary-Classification-with-a-Bank-Churn-Dataset/blob/main/BERT2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [54]:
!nvidia-smi

Sun Feb  9 01:50:41 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |
| N/A   40C    P0             58W /  400W |    1345MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [55]:
import os
import sys

In [56]:
!cp /content/drive/MyDrive/Datasets/signate-471/train.csv .
!cp /content/drive/MyDrive/Datasets/signate-471/test.csv .
!cp /content/drive/MyDrive/Datasets/signate-471/sample_submit.csv .

In [57]:
!pip install -q transformers

In [58]:
import math
import random
import time
import warnings

import numpy as np
import pandas as pd
import re

import torch
import torch.nn as nn
import transformers as T
from sklearn.metrics import fbeta_score
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import DataLoader, Dataset
from tqdm.notebook import tqdm

warnings.filterwarnings("ignore")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

DATA_DIR = "./"
OUTPUT_DIR = './t'

In [59]:
# Loggerの定義
def init_logger(log_file=OUTPUT_DIR + "train.log"):
    from logging import INFO, FileHandler, Formatter, StreamHandler, getLogger

    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()

In [60]:
# 乱数の固定

def seed_torch(seed=42):
    # python の組み込み関数の seed を固定
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    # numpy の seed を固定
    np.random.seed(seed)
    # torch の seed を固定
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # 決定論的アルゴリズムを使用する
    torch.backends.cudnn.deterministic = True

seed = 471
seed_torch(seed)

In [61]:
train = pd.read_csv(DATA_DIR + 'train.csv')
test = pd.read_csv(DATA_DIR + 'test.csv')
sub = pd.read_csv(DATA_DIR + 'sample_submit.csv', header=None)
sub.columns = ['id', 'judgement']

In [62]:
# # 誤植部分の訂正
# train['judgement'][2288] = 0
# train['judgement'][7708] = 0

In [63]:
border = len(train[train['judgement'] == 1]) / len(train['judgement'])
print(border)

0.023282372444280715


# 前処理

In [64]:
Fold = 10

def get_train_data(train):
    fold = StratifiedKFold(n_splits=Fold, shuffle=True, random_state=seed)
    for n, (train_index, val_index) in enumerate(fold.split(train, train["judgement"])):
        train.loc[val_index, "fold"] = int(n)
    train["fold"] = train["fold"].astype(np.uint8)

    return train

def get_test_data(test):
  return test

train = get_train_data(train)

# データセットの定義

In [65]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

class BaseDataset(Dataset):
    def __init__(self, df, model_name, include_labels=True):
        tokenizer = AutoTokenizer.from_pretrained(model_name)

        self.df = df
        self.include_labels = include_labels

        df["title_abstract"] = df["title"] + " " + df["abstract"].fillna("")
        sentences = df["title_abstract"].tolist()

        max_length = 512
        self.encoded = tokenizer.batch_encode_plus(
            sentences,
            padding = 'max_length',
            max_length = max_length,
            truncation = True,
            return_attention_mask=True
        )

        if self.include_labels:
            self.labels = df["judgement"].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        input_ids = torch.tensor(self.encoded['input_ids'][idx])
        attention_mask = torch.tensor(self.encoded['attention_mask'][idx])

        if self.include_labels:
            label = torch.tensor(self.labels[idx]).float()
            return input_ids, attention_mask, label

        return input_ids, attention_mask


# モデルの定義

In [66]:
model_name = 'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract'

In [67]:

class BaseModel(nn.Module):
    def __init__(self, model_name):
        super().__init__()
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, input_ids, attention_mask):
        out = self.model(input_ids=input_ids, attention_mask=attention_mask)
        out = self.sigmoid(out.logits).squeeze()

        return out

class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        BCE_loss = nn.BCEWithLogitsLoss(reduction='none')(inputs, targets)
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1 - pt) ** self.gamma * BCE_loss

        if self.reduction == 'mean':
            return F_loss.mean()
        elif self.reduction == 'sum':
            return F_loss.sum()
        else:
            return F_loss


# 実行時間計測ツール

In [68]:

class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return "%dm %ds" % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return "%s (remain %s)" % (asMinutes(s), asMinutes(rs))

# 補助関数

In [69]:
def train_fn(train_loader, model, criterion, optimizer, epoch, device):
    start = end = time.time()
    losses = AverageMeter()

    # switch to train mode
    model.train()

    for step, (input_ids, attention_mask, labels) in enumerate(train_loader):
        optimizer.zero_grad()

        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)

        y_preds = model(input_ids, attention_mask)

        loss = criterion(y_preds, labels)

        # record loss
        losses.update(loss.item(), batch_size)
        loss.backward()

        optimizer.step()

        if step % 100 == 0 or step == (len(train_loader) - 1):
            print(
                f"Epoch: [{epoch + 1}][{step}/{len(train_loader)}] "
                f"Elapsed {timeSince(start, float(step + 1) / len(train_loader)):s} "
                f"Loss: {losses.avg:.4f} "
            )

    return losses.avg


In [70]:
def valid_fn(valid_loader, model, criterion, device):
    start = end = time.time()
    losses = AverageMeter()

    # switch to evaluation mode
    model.eval()
    preds = []

    for step, (input_ids, attention_mask, labels) in enumerate(valid_loader):
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)

        # compute loss
        with torch.no_grad():
            y_preds = model(input_ids, attention_mask)

        loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)

        # record score
        preds.append(y_preds.to("cpu").numpy())

        if step % 100 == 0 or step == (len(valid_loader) - 1):
            print(
                f"EVAL: [{step}/{len(valid_loader)}] "
                f"Elapsed {timeSince(start, float(step + 1) / len(valid_loader)):s} "
                f"Loss: {losses.avg:.4f} "
            )

    predictions = np.concatenate(preds)
    return losses.avg, predictions

# 推論関数

In [71]:
def inference():
    predictions = []

    test_dataset = BaseDataset(test, model_name, include_labels=False)
    test_loader = DataLoader(
        test_dataset, batch_size=16, shuffle=False, pin_memory=True
    )

    for fold in range(Fold):
        LOGGER.info(f"========== model: bert-base-uncased fold: {fold} inference ==========")
        model = BaseModel(model_name)
        model.to(device)
        model.load_state_dict(torch.load(OUTPUT_DIR + f"bert-base-uncased_fold{fold}_best.pth")["model"])
        model.eval()
        preds = []
        for i, (input_ids, attention_mask) in tqdm(enumerate(test_loader), total=len(test_loader)):
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            with torch.no_grad():
                y_preds = model(input_ids, attention_mask)
            preds.append(y_preds.to("cpu").numpy())
        preds = np.concatenate(preds)
        predictions.append(preds)
    predictions = np.mean(predictions, axis=0)

    return predictions

# 学習

In [72]:
def train_loop(train, fold):

    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # Data Loader
    # ====================================================
    trn_idx = train[train["fold"] != fold].index
    val_idx = train[train["fold"] == fold].index

    train_folds = train.loc[trn_idx].reset_index(drop=True)
    valid_folds = train.loc[val_idx].reset_index(drop=True)

    train_dataset = BaseDataset(train_folds, model_name)
    valid_dataset = BaseDataset(valid_folds, model_name)

    train_loader = DataLoader(
        train_dataset,
        batch_size=16,
        shuffle=True,
        num_workers=4,
        pin_memory=True,
        drop_last=True,
    )
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=16,
        shuffle=False,
        num_workers=4,
        pin_memory=True,
        drop_last=False,
    )

    # ====================================================
    # Model
    # ====================================================
    model = BaseModel(model_name)
    model.to(device)

    optimizer = T.AdamW(model.parameters(), lr=2e-5)

    criterion = FocalLoss()

    # ====================================================
    # Loop
    # ====================================================
    best_score = -1
    best_loss = np.inf

    for epoch in range(5):
        start_time = time.time()

        # train
        avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, device)

        # eval
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        valid_labels = valid_folds["judgement"].values

        # scoring
        score = fbeta_score(valid_labels, np.where(preds < border, 0, 1), beta=7.0)

        elapsed = time.time() - start_time
        LOGGER.info(
            f"Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s"
        )
        LOGGER.info(f"Epoch {epoch+1} - Score: {score}")

        if score > best_score:
            best_score = score
            LOGGER.info(f"Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model")
            torch.save(
                {"model": model.state_dict(), "preds": preds}, OUTPUT_DIR + f"bert-base-uncased_fold{fold}_best.pth"
            )

    check_point = torch.load(OUTPUT_DIR + f"bert-base-uncased_fold{fold}_best.pth")

    valid_folds["preds"] = check_point["preds"]

    return valid_folds

# メイン

In [73]:
def get_result(result_df):
    preds = result_df["preds"].values
    labels = result_df["judgement"].values
    score = fbeta_score(labels, np.where(preds < border, 0, 1), beta=7.0)
    LOGGER.info(f"Score: {score:<.5f}")

In [74]:
# Training
oof_df = pd.DataFrame()
for fold in range(Fold):
    _oof_df = train_loop(train, fold)
    oof_df = pd.concat([oof_df, _oof_df])
    LOGGER.info(f"========== fold: {fold} result ==========")
    get_result(_oof_df)

# CV result
LOGGER.info(f"========== CV ==========")
get_result(oof_df)

# Save OOF result
oof_df.to_csv(OUTPUT_DIR + "oof_df.csv", index=False)

# Inference
predictions = inference()
pd.Series(predictions).to_csv(OUTPUT_DIR + "predictions.csv", index=False)
predictions1 = np.where(predictions < border, 0, 1) #しきい値変更

# submission
sub["judgement"] = predictions1
sub.to_csv(OUTPUT_DIR + "submission.csv", index=False, header=False)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: [1][0/1526] Elapsed 0m 2s (remain 53m 56s) Loss: 0.4361 
Epoch: [1][100/1526] Elapsed 0m 32s (remain 7m 36s) Loss: 0.1882 
Epoch: [1][200/1526] Elapsed 1m 2s (remain 6m 53s) Loss: 0.1811 
Epoch: [1][300/1526] Elapsed 1m 33s (remain 6m 18s) Loss: 0.1786 
Epoch: [1][400/1526] Elapsed 2m 3s (remain 5m 46s) Loss: 0.1774 
Epoch: [1][500/1526] Elapsed 2m 33s (remain 5m 14s) Loss: 0.1766 
Epoch: [1][600/1526] Elapsed 3m 4s (remain 4m 43s) Loss: 0.1761 
Epoch: [1][700/1526] Elapsed 3m 34s (remain 4m 12s) Loss: 0.1757 
Epoch: [1][800/1526] Elapsed 4m 4s (remain 3m 41s) Loss: 0.1754 
Epoch: [1][900/1526] Elapsed 4m 35s (remain 3m 10s) Loss: 0.1752 
Epoch: [1][1000/1526] Elapsed 5m 5s (remain 2m 40s) Loss: 0.1750 
Epoch: [1][1100/1526] Elapsed 5m 36s (remain 2m 9s) Loss: 0.1748 
Epoch: [1][1200/1526] Elapsed 6m 6s (remain 1m 39s) Loss: 0.1747 
Epoch: [1][1300/1526] Elapsed 6m 36s (remain 1m 8s) Loss: 0.1746 
Epoch: [1][1400/1526] Elapsed 7m 7s (remain 0m 38s) Loss: 0.1745 
Epoch: [1][1500/

Epoch 1 - avg_train_loss: 0.1744  avg_val_loss: 0.1733  time: 482s
Epoch 1 - avg_train_loss: 0.1744  avg_val_loss: 0.1733  time: 482s
Epoch 1 - avg_train_loss: 0.1744  avg_val_loss: 0.1733  time: 482s
Epoch 1 - avg_train_loss: 0.1744  avg_val_loss: 0.1733  time: 482s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1744  avg_val_loss: 0.1733  time: 482s
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
INFO:__main__:Epoch 1 - Score: 0.0
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.0000 Model


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 
Epoch: [2][0/1526] Elapsed 0m 0s (remain 18m 40s) Loss: 0.1733 
Epoch: [2][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [2][200/1526] Elapsed 1m 1s (remain 6m 45s) Loss: 0.1733 
Epoch: [2][300/1526] Elapsed 1m 31s (remain 6m 13s) Loss: 0.1733 
Epoch: [2][400/1526] Elapsed 2m 2s (remain 5m 42s) Loss: 0.1733 
Epoch: [2][500/1526] Elapsed 2m 32s (remain 5m 12s) Loss: 0.1733 
Epoch: [2][600/1526] Elapsed 3m 2s (remain 4m 41s) Loss: 0.1733 
Epoch: [2][700/1526] Elapsed 3m 33s (remain 4m 11s) Loss: 0.1733 
Epoch: [2][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1733 
Epoch: [2][900/1526] Elapsed 4m 34s (remain 3m 10s) Loss: 0.1733 
Epoch: [2][1000/1526] Elapsed 5m 4s (remain 2m 39s) Loss: 0.1733 
Epoch: [2][1100/1526] Elapsed 5m 34s (remain 2m 9s) Loss: 0.1733 
Epoch: [2][1200/1526] Elapsed 6m 5s (remain 1m 38s) Loss: 0.1733 
Epoch: [2][1300/1526] Elapsed 6m 35s (remain 1m 8s) Loss: 0.1733 
Epoch: [2][1400/1526] 

Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
INFO:__main__:Epoch 2 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 
Epoch: [3][0/1526] Elapsed 0m 0s (remain 18m 50s) Loss: 0.1733 
Epoch: [3][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [3][200/1526] Elapsed 1m 1s (remain 6m 45s) Loss: 0.1733 
Epoch: [3][300/1526] Elapsed 1m 31s (remain 6m 13s) Loss: 0.1733 
Epoch: [3][400/1526] Elapsed 2m 2s (remain 5m 43s) Loss: 0.1733 
Epoch: [3][500/1526] Elapsed 2m 32s (remain 5m 12s) Loss: 0.1733 
Epoch: [3][600/1526] Elapsed 3m 3s (remain 4m 41s) Loss: 0.1733 
Epoch: [3][700/1526] Elapsed 3m 33s (remain 4m 11s) Loss: 0.1733 
Epoch: [3][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1733 
Epoch: [3][900/1526] Elapsed 4m 34s (remain 3m 10s) Loss: 0.1733 
Epoch: [3][1000/1526] Elapsed 5m 4s (remain 2m 39s) Loss: 0.1733 
Epoch: [3][1100/1526] Elapsed 5m 35s (remain 2m 9s) Loss: 0.1733 
Epoch: [3][1200/1526] Elapsed 6m 5s (remain 1m 38s) Loss: 0.1733 
Epoch: [3][1300/1526] Elapsed 6m 35s (remain 1m 8s) Loss: 0.1733 
Epoch: [3][1400/1526] 

Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
INFO:__main__:Epoch 3 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 
Epoch: [4][0/1526] Elapsed 0m 0s (remain 18m 47s) Loss: 0.1733 
Epoch: [4][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [4][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [4][300/1526] Elapsed 1m 31s (remain 6m 13s) Loss: 0.1733 
Epoch: [4][400/1526] Elapsed 2m 2s (remain 5m 42s) Loss: 0.1733 
Epoch: [4][500/1526] Elapsed 2m 32s (remain 5m 12s) Loss: 0.1733 
Epoch: [4][600/1526] Elapsed 3m 2s (remain 4m 41s) Loss: 0.1733 
Epoch: [4][700/1526] Elapsed 3m 33s (remain 4m 10s) Loss: 0.1733 
Epoch: [4][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1733 
Epoch: [4][900/1526] Elapsed 4m 34s (remain 3m 10s) Loss: 0.1733 
Epoch: [4][1000/1526] Elapsed 5m 4s (remain 2m 39s) Loss: 0.1733 
Epoch: [4][1100/1526] Elapsed 5m 34s (remain 2m 9s) Loss: 0.1733 
Epoch: [4][1200/1526] Elapsed 6m 5s (remain 1m 38s) Loss: 0.1733 
Epoch: [4][1300/1526] Elapsed 6m 35s (remain 1m 8s) Loss: 0.1733 
Epoch: [4][1400/1526] 

Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
INFO:__main__:Epoch 4 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 
Epoch: [5][0/1526] Elapsed 0m 0s (remain 18m 51s) Loss: 0.1733 
Epoch: [5][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [5][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [5][300/1526] Elapsed 1m 31s (remain 6m 13s) Loss: 0.1733 
Epoch: [5][400/1526] Elapsed 2m 2s (remain 5m 42s) Loss: 0.1733 
Epoch: [5][500/1526] Elapsed 2m 32s (remain 5m 12s) Loss: 0.1733 
Epoch: [5][600/1526] Elapsed 3m 2s (remain 4m 41s) Loss: 0.1733 
Epoch: [5][700/1526] Elapsed 3m 33s (remain 4m 11s) Loss: 0.1733 
Epoch: [5][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1733 
Epoch: [5][900/1526] Elapsed 4m 34s (remain 3m 10s) Loss: 0.1733 
Epoch: [5][1000/1526] Elapsed 5m 4s (remain 2m 39s) Loss: 0.1733 
Epoch: [5][1100/1526] Elapsed 5m 34s (remain 2m 9s) Loss: 0.1733 
Epoch: [5][1200/1526] Elapsed 6m 5s (remain 1m 38s) Loss: 0.1733 
Epoch: [5][1300/1526] Elapsed 6m 35s (remain 1m 8s) Loss: 0.1733 
Epoch: [5][1400/1526] 

Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
INFO:__main__:Epoch 5 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Score: 0.00000
Score: 0.00000
Score: 0.00000
Score: 0.00000
INFO:__main__:Score: 0.00000
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: [1][0/1526] Elapsed 0m 0s (remain 20m 48s) Loss: 0.4215 
Epoch: [1][100/1526] Elapsed 0m 31s (remain 7m 19s) Loss: 0.1867 
Epoch: [1][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1803 
Epoch: [1][300/1526] Elapsed 1m 31s (remain 6m 13s) Loss: 0.1781 
Epoch: [1][400/1526] Elapsed 2m 2s (remain 5m 42s) Loss: 0.1770 
Epoch: [1][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1763 
Epoch: [1][600/1526] Elapsed 3m 2s (remain 4m 41s) Loss: 0.1758 
Epoch: [1][700/1526] Elapsed 3m 33s (remain 4m 10s) Loss: 0.1754 
Epoch: [1][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1752 
Epoch: [1][900/1526] Elapsed 4m 34s (remain 3m 10s) Loss: 0.1750 
Epoch: [1][1000/1526] Elapsed 5m 4s (remain 2m 39s) Loss: 0.1748 
Epoch: [1][1100/1526] Elapsed 5m 34s (remain 2m 9s) Loss: 0.1747 
Epoch: [1][1200/1526] Elapsed 6m 5s (remain 1m 38s) Loss: 0.1746 
Epoch: [1][1300/1526] Elapsed 6m 35s (remain 1m 8s) Loss: 0.1745 
Epoch: [1][1400/1526] Elapsed 7m 6s (remain 0m 38s) Loss: 0.1744 
Epoch: [1][1500/

Epoch 1 - avg_train_loss: 0.1743  avg_val_loss: 0.1733  time: 481s
Epoch 1 - avg_train_loss: 0.1743  avg_val_loss: 0.1733  time: 481s
Epoch 1 - avg_train_loss: 0.1743  avg_val_loss: 0.1733  time: 481s
Epoch 1 - avg_train_loss: 0.1743  avg_val_loss: 0.1733  time: 481s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1743  avg_val_loss: 0.1733  time: 481s
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


INFO:__main__:Epoch 1 - Score: 0.0
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.0000 Model


Epoch: [2][0/1526] Elapsed 0m 0s (remain 19m 26s) Loss: 0.1733 
Epoch: [2][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [2][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [2][300/1526] Elapsed 1m 31s (remain 6m 13s) Loss: 0.1733 
Epoch: [2][400/1526] Elapsed 2m 2s (remain 5m 42s) Loss: 0.1733 
Epoch: [2][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [2][600/1526] Elapsed 3m 2s (remain 4m 41s) Loss: 0.1733 
Epoch: [2][700/1526] Elapsed 3m 33s (remain 4m 10s) Loss: 0.1733 
Epoch: [2][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1733 
Epoch: [2][900/1526] Elapsed 4m 33s (remain 3m 10s) Loss: 0.1733 
Epoch: [2][1000/1526] Elapsed 5m 4s (remain 2m 39s) Loss: 0.1733 
Epoch: [2][1100/1526] Elapsed 5m 34s (remain 2m 9s) Loss: 0.1733 
Epoch: [2][1200/1526] Elapsed 6m 5s (remain 1m 38s) Loss: 0.1733 
Epoch: [2][1300/1526] Elapsed 6m 35s (remain 1m 8s) Loss: 0.1733 
Epoch: [2][1400/1526] Elapsed 7m 5s (remain 0m 38s) Loss: 0.1733 
Epoch: [2][1500/

Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
INFO:__main__:Epoch 2 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 
Epoch: [3][0/1526] Elapsed 0m 0s (remain 19m 34s) Loss: 0.1733 
Epoch: [3][100/1526] Elapsed 0m 31s (remain 7m 17s) Loss: 0.1733 
Epoch: [3][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [3][300/1526] Elapsed 1m 31s (remain 6m 13s) Loss: 0.1733 
Epoch: [3][400/1526] Elapsed 2m 2s (remain 5m 42s) Loss: 0.1733 
Epoch: [3][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [3][600/1526] Elapsed 3m 2s (remain 4m 41s) Loss: 0.1733 
Epoch: [3][700/1526] Elapsed 3m 33s (remain 4m 10s) Loss: 0.1733 
Epoch: [3][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1733 
Epoch: [3][900/1526] Elapsed 4m 34s (remain 3m 10s) Loss: 0.1733 
Epoch: [3][1000/1526] Elapsed 5m 4s (remain 2m 39s) Loss: 0.1733 
Epoch: [3][1100/1526] Elapsed 5m 34s (remain 2m 9s) Loss: 0.1733 
Epoch: [3][1200/1526] Elapsed 6m 5s (remain 1m 38s) Loss: 0.1733 
Epoch: [3][1300/1526] Elapsed 6m 35s (remain 1m 8s) Loss: 0.1733 
Epoch: [3][1400/1526] 

Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
INFO:__main__:Epoch 3 - Score: 0.0


Epoch: [4][0/1526] Elapsed 0m 0s (remain 20m 11s) Loss: 0.1733 
Epoch: [4][100/1526] Elapsed 0m 31s (remain 7m 20s) Loss: 0.1733 
Epoch: [4][200/1526] Elapsed 1m 1s (remain 6m 46s) Loss: 0.1733 
Epoch: [4][300/1526] Elapsed 1m 31s (remain 6m 14s) Loss: 0.1733 
Epoch: [4][400/1526] Elapsed 2m 2s (remain 5m 43s) Loss: 0.1733 
Epoch: [4][500/1526] Elapsed 2m 32s (remain 5m 12s) Loss: 0.1733 
Epoch: [4][600/1526] Elapsed 3m 3s (remain 4m 41s) Loss: 0.1733 
Epoch: [4][700/1526] Elapsed 3m 33s (remain 4m 11s) Loss: 0.1733 
Epoch: [4][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1733 
Epoch: [4][900/1526] Elapsed 4m 34s (remain 3m 10s) Loss: 0.1733 
Epoch: [4][1000/1526] Elapsed 5m 4s (remain 2m 39s) Loss: 0.1733 
Epoch: [4][1100/1526] Elapsed 5m 35s (remain 2m 9s) Loss: 0.1733 
Epoch: [4][1200/1526] Elapsed 6m 5s (remain 1m 38s) Loss: 0.1733 
Epoch: [4][1300/1526] Elapsed 6m 36s (remain 1m 8s) Loss: 0.1733 
Epoch: [4][1400/1526] Elapsed 7m 6s (remain 0m 38s) Loss: 0.1733 
Epoch: [4][1500/

Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
INFO:__main__:Epoch 4 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 
Epoch: [5][0/1526] Elapsed 0m 0s (remain 19m 48s) Loss: 0.1733 
Epoch: [5][100/1526] Elapsed 0m 31s (remain 7m 19s) Loss: 0.1733 
Epoch: [5][200/1526] Elapsed 1m 1s (remain 6m 45s) Loss: 0.1733 
Epoch: [5][300/1526] Elapsed 1m 31s (remain 6m 13s) Loss: 0.1733 
Epoch: [5][400/1526] Elapsed 2m 2s (remain 5m 42s) Loss: 0.1733 
Epoch: [5][500/1526] Elapsed 2m 32s (remain 5m 12s) Loss: 0.1733 
Epoch: [5][600/1526] Elapsed 3m 2s (remain 4m 41s) Loss: 0.1733 
Epoch: [5][700/1526] Elapsed 3m 33s (remain 4m 11s) Loss: 0.1733 
Epoch: [5][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1733 
Epoch: [5][900/1526] Elapsed 4m 34s (remain 3m 10s) Loss: 0.1733 
Epoch: [5][1000/1526] Elapsed 5m 4s (remain 2m 39s) Loss: 0.1733 
Epoch: [5][1100/1526] Elapsed 5m 35s (remain 2m 9s) Loss: 0.1733 
Epoch: [5][1200/1526] Elapsed 6m 5s (remain 1m 38s) Loss: 0.1733 
Epoch: [5][1300/1526] Elapsed 6m 35s (remain 1m 8s) Loss: 0.1733 
Epoch: [5][1400/1526] 

Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


INFO:__main__:Epoch 5 - Score: 0.0
Score: 0.00000
Score: 0.00000
Score: 0.00000
Score: 0.00000
INFO:__main__:Score: 0.00000
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: [1][0/1526] Elapsed 0m 0s (remain 21m 24s) Loss: 0.3465 
Epoch: [1][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1835 
Epoch: [1][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1787 
Epoch: [1][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1770 
Epoch: [1][400/1526] Elapsed 2m 1s (remain 5m 42s) Loss: 0.1761 
Epoch: [1][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1756 
Epoch: [1][600/1526] Elapsed 3m 2s (remain 4m 41s) Loss: 0.1752 
Epoch: [1][700/1526] Elapsed 3m 33s (remain 4m 10s) Loss: 0.1749 
Epoch: [1][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1747 
Epoch: [1][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1746 
Epoch: [1][1000/1526] Elapsed 5m 4s (remain 2m 39s) Loss: 0.1745 
Epoch: [1][1100/1526] Elapsed 5m 34s (remain 2m 9s) Loss: 0.1744 
Epoch: [1][1200/1526] Elapsed 6m 5s (remain 1m 38s) Loss: 0.1743 
Epoch: [1][1300/1526] Elapsed 6m 35s (remain 1m 8s) Loss: 0.1742 
Epoch: [1][1400/1526] Elapsed 7m 5s (remain 0m 37s) Loss: 0.1741 
Epoch: [1][1500/1

Epoch 1 - avg_train_loss: 0.1741  avg_val_loss: 0.1733  time: 481s
Epoch 1 - avg_train_loss: 0.1741  avg_val_loss: 0.1733  time: 481s
Epoch 1 - avg_train_loss: 0.1741  avg_val_loss: 0.1733  time: 481s
Epoch 1 - avg_train_loss: 0.1741  avg_val_loss: 0.1733  time: 481s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1741  avg_val_loss: 0.1733  time: 481s
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Epoch 1 - Score: 0.0
INFO:__main__:Epoch 1 - Score: 0.0
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.0000 Model


Epoch: [2][0/1526] Elapsed 0m 0s (remain 19m 45s) Loss: 0.1733 
Epoch: [2][100/1526] Elapsed 0m 31s (remain 7m 19s) Loss: 0.1733 
Epoch: [2][200/1526] Elapsed 1m 1s (remain 6m 45s) Loss: 0.1733 
Epoch: [2][300/1526] Elapsed 1m 31s (remain 6m 13s) Loss: 0.1733 
Epoch: [2][400/1526] Elapsed 2m 2s (remain 5m 43s) Loss: 0.1733 
Epoch: [2][500/1526] Elapsed 2m 32s (remain 5m 12s) Loss: 0.1733 
Epoch: [2][600/1526] Elapsed 3m 3s (remain 4m 41s) Loss: 0.1733 
Epoch: [2][700/1526] Elapsed 3m 33s (remain 4m 11s) Loss: 0.1733 
Epoch: [2][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1733 
Epoch: [2][900/1526] Elapsed 4m 34s (remain 3m 10s) Loss: 0.1733 
Epoch: [2][1000/1526] Elapsed 5m 4s (remain 2m 39s) Loss: 0.1733 
Epoch: [2][1100/1526] Elapsed 5m 34s (remain 2m 9s) Loss: 0.1733 
Epoch: [2][1200/1526] Elapsed 6m 5s (remain 1m 38s) Loss: 0.1733 
Epoch: [2][1300/1526] Elapsed 6m 35s (remain 1m 8s) Loss: 0.1733 
Epoch: [2][1400/1526] Elapsed 7m 5s (remain 0m 37s) Loss: 0.1733 
Epoch: [2][1500/

Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
INFO:__main__:Epoch 2 - Score: 0.0


Epoch: [3][0/1526] Elapsed 0m 0s (remain 20m 27s) Loss: 0.1733 
Epoch: [3][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [3][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [3][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [3][400/1526] Elapsed 2m 1s (remain 5m 42s) Loss: 0.1733 
Epoch: [3][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [3][600/1526] Elapsed 3m 2s (remain 4m 41s) Loss: 0.1733 
Epoch: [3][700/1526] Elapsed 3m 33s (remain 4m 10s) Loss: 0.1733 
Epoch: [3][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1733 
Epoch: [3][900/1526] Elapsed 4m 33s (remain 3m 10s) Loss: 0.1733 
Epoch: [3][1000/1526] Elapsed 5m 4s (remain 2m 39s) Loss: 0.1733 
Epoch: [3][1100/1526] Elapsed 5m 34s (remain 2m 9s) Loss: 0.1733 
Epoch: [3][1200/1526] Elapsed 6m 5s (remain 1m 38s) Loss: 0.1733 
Epoch: [3][1300/1526] Elapsed 6m 35s (remain 1m 8s) Loss: 0.1733 
Epoch: [3][1400/1526] Elapsed 7m 6s (remain 0m 38s) Loss: 0.1733 
Epoch: [3][1500/

Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
INFO:__main__:Epoch 3 - Score: 0.0


Epoch: [4][0/1526] Elapsed 0m 0s (remain 19m 52s) Loss: 0.1733 
Epoch: [4][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [4][200/1526] Elapsed 1m 1s (remain 6m 45s) Loss: 0.1733 
Epoch: [4][300/1526] Elapsed 1m 31s (remain 6m 13s) Loss: 0.1733 
Epoch: [4][400/1526] Elapsed 2m 2s (remain 5m 43s) Loss: 0.1733 
Epoch: [4][500/1526] Elapsed 2m 32s (remain 5m 12s) Loss: 0.1733 
Epoch: [4][600/1526] Elapsed 3m 3s (remain 4m 41s) Loss: 0.1733 
Epoch: [4][700/1526] Elapsed 3m 33s (remain 4m 11s) Loss: 0.1733 
Epoch: [4][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1733 
Epoch: [4][900/1526] Elapsed 4m 34s (remain 3m 10s) Loss: 0.1733 
Epoch: [4][1000/1526] Elapsed 5m 4s (remain 2m 39s) Loss: 0.1733 
Epoch: [4][1100/1526] Elapsed 5m 35s (remain 2m 9s) Loss: 0.1733 
Epoch: [4][1200/1526] Elapsed 6m 5s (remain 1m 38s) Loss: 0.1733 
Epoch: [4][1300/1526] Elapsed 6m 36s (remain 1m 8s) Loss: 0.1733 
Epoch: [4][1400/1526] Elapsed 7m 6s (remain 0m 38s) Loss: 0.1733 
Epoch: [4][1500/

Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 482s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 482s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 482s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 482s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 482s
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


INFO:__main__:Epoch 4 - Score: 0.0


Epoch: [5][0/1526] Elapsed 0m 0s (remain 19m 44s) Loss: 0.1733 
Epoch: [5][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [5][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [5][300/1526] Elapsed 1m 31s (remain 6m 13s) Loss: 0.1733 
Epoch: [5][400/1526] Elapsed 2m 2s (remain 5m 42s) Loss: 0.1733 
Epoch: [5][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [5][600/1526] Elapsed 3m 2s (remain 4m 41s) Loss: 0.1733 
Epoch: [5][700/1526] Elapsed 3m 33s (remain 4m 10s) Loss: 0.1733 
Epoch: [5][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1733 
Epoch: [5][900/1526] Elapsed 4m 34s (remain 3m 10s) Loss: 0.1733 
Epoch: [5][1000/1526] Elapsed 5m 4s (remain 2m 39s) Loss: 0.1733 
Epoch: [5][1100/1526] Elapsed 5m 34s (remain 2m 9s) Loss: 0.1733 
Epoch: [5][1200/1526] Elapsed 6m 5s (remain 1m 38s) Loss: 0.1733 
Epoch: [5][1300/1526] Elapsed 6m 35s (remain 1m 8s) Loss: 0.1733 
Epoch: [5][1400/1526] Elapsed 7m 6s (remain 0m 38s) Loss: 0.1733 
Epoch: [5][1500/

Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
INFO:__main__:Epoch 5 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Score: 0.00000
Score: 0.00000
Score: 0.00000
Score: 0.00000
INFO:__main__:Score: 0.00000
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: [1][0/1526] Elapsed 0m 0s (remain 21m 13s) Loss: 0.4386 
Epoch: [1][100/1526] Elapsed 0m 31s (remain 7m 19s) Loss: 0.1863 
Epoch: [1][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1801 
Epoch: [1][300/1526] Elapsed 1m 31s (remain 6m 13s) Loss: 0.1780 
Epoch: [1][400/1526] Elapsed 2m 2s (remain 5m 42s) Loss: 0.1768 
Epoch: [1][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1762 
Epoch: [1][600/1526] Elapsed 3m 2s (remain 4m 41s) Loss: 0.1757 
Epoch: [1][700/1526] Elapsed 3m 33s (remain 4m 11s) Loss: 0.1754 
Epoch: [1][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1751 
Epoch: [1][900/1526] Elapsed 4m 34s (remain 3m 10s) Loss: 0.1749 
Epoch: [1][1000/1526] Elapsed 5m 4s (remain 2m 39s) Loss: 0.1748 
Epoch: [1][1100/1526] Elapsed 5m 34s (remain 2m 9s) Loss: 0.1746 
Epoch: [1][1200/1526] Elapsed 6m 5s (remain 1m 38s) Loss: 0.1745 
Epoch: [1][1300/1526] Elapsed 6m 35s (remain 1m 8s) Loss: 0.1744 
Epoch: [1][1400/1526] Elapsed 7m 6s (remain 0m 38s) Loss: 0.1744 
Epoch: [1][1500/

Epoch 1 - avg_train_loss: 0.1743  avg_val_loss: 0.1733  time: 481s
Epoch 1 - avg_train_loss: 0.1743  avg_val_loss: 0.1733  time: 481s
Epoch 1 - avg_train_loss: 0.1743  avg_val_loss: 0.1733  time: 481s
Epoch 1 - avg_train_loss: 0.1743  avg_val_loss: 0.1733  time: 481s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1743  avg_val_loss: 0.1733  time: 481s
Epoch 1 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
INFO:__main__:Epoch 1 - Score: 0.0
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.0000 Model


Epoch: [2][0/1526] Elapsed 0m 0s (remain 19m 48s) Loss: 0.1733 
Epoch: [2][100/1526] Elapsed 0m 31s (remain 7m 17s) Loss: 0.1733 
Epoch: [2][200/1526] Elapsed 1m 1s (remain 6m 43s) Loss: 0.1733 
Epoch: [2][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [2][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [2][500/1526] Elapsed 2m 31s (remain 5m 10s) Loss: 0.1733 
Epoch: [2][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [2][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [2][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [2][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [2][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [2][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [2][1200/1526] Elapsed 6m 3s (remain 1m 38s) Loss: 0.1733 
Epoch: [2][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [2][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1733 
Epoch: [2][1500/1

Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
INFO:__main__:Epoch 2 - Score: 0.0


Epoch: [3][0/1526] Elapsed 0m 0s (remain 19m 47s) Loss: 0.1733 
Epoch: [3][100/1526] Elapsed 0m 31s (remain 7m 17s) Loss: 0.1733 
Epoch: [3][200/1526] Elapsed 1m 1s (remain 6m 43s) Loss: 0.1733 
Epoch: [3][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [3][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [3][500/1526] Elapsed 2m 31s (remain 5m 10s) Loss: 0.1733 
Epoch: [3][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [3][700/1526] Elapsed 3m 32s (remain 4m 9s) Loss: 0.1733 
Epoch: [3][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [3][900/1526] Elapsed 4m 32s (remain 3m 9s) Loss: 0.1733 
Epoch: [3][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [3][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [3][1200/1526] Elapsed 6m 3s (remain 1m 38s) Loss: 0.1733 
Epoch: [3][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [3][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1733 
Epoch: [3][1500/15

Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Epoch 3 - Score: 0.0
INFO:__main__:Epoch 3 - Score: 0.0


Epoch: [4][0/1526] Elapsed 0m 0s (remain 20m 0s) Loss: 0.1733 
Epoch: [4][100/1526] Elapsed 0m 31s (remain 7m 17s) Loss: 0.1733 
Epoch: [4][200/1526] Elapsed 1m 1s (remain 6m 43s) Loss: 0.1733 
Epoch: [4][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [4][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [4][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [4][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [4][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [4][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [4][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [4][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [4][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [4][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1733 
Epoch: [4][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [4][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1733 
Epoch: [4][1500/15

Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
INFO:__main__:Epoch 4 - Score: 0.0


Epoch: [5][0/1526] Elapsed 0m 0s (remain 19m 42s) Loss: 0.1733 
Epoch: [5][100/1526] Elapsed 0m 31s (remain 7m 17s) Loss: 0.1733 
Epoch: [5][200/1526] Elapsed 1m 1s (remain 6m 43s) Loss: 0.1733 
Epoch: [5][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [5][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [5][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [5][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [5][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [5][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [5][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [5][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [5][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [5][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1733 
Epoch: [5][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [5][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1733 
Epoch: [5][1500/1

Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
INFO:__main__:Epoch 5 - Score: 0.0
Score: 0.00000
Score: 0.00000
Score: 0.00000
Score: 0.00000
INFO:__main__:Score: 0.00000
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: [1][0/1526] Elapsed 0m 0s (remain 20m 50s) Loss: 0.4231 
Epoch: [1][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1880 
Epoch: [1][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1810 
Epoch: [1][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1786 
Epoch: [1][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1773 
Epoch: [1][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1765 
Epoch: [1][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1760 
Epoch: [1][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1757 
Epoch: [1][800/1526] Elapsed 4m 3s (remain 3m 39s) Loss: 0.1754 
Epoch: [1][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1751 
Epoch: [1][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1750 
Epoch: [1][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1748 
Epoch: [1][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1747 
Epoch: [1][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1746 
Epoch: [1][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1745 
Epoch: [1][1500/1

Epoch 1 - avg_train_loss: 0.1744  avg_val_loss: 0.1733  time: 479s


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Epoch 1 - avg_train_loss: 0.1744  avg_val_loss: 0.1733  time: 479s
Epoch 1 - avg_train_loss: 0.1744  avg_val_loss: 0.1733  time: 479s
Epoch 1 - avg_train_loss: 0.1744  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1744  avg_val_loss: 0.1733  time: 479s
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
INFO:__main__:Epoch 1 - Score: 0.0
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.0000 Model


Epoch: [2][0/1526] Elapsed 0m 0s (remain 20m 1s) Loss: 0.1733 
Epoch: [2][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [2][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [2][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [2][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [2][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [2][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [2][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [2][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [2][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [2][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [2][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [2][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1733 
Epoch: [2][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [2][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1733 
Epoch: [2][1500/15

Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
INFO:__main__:Epoch 2 - Score: 0.0


Epoch: [3][0/1526] Elapsed 0m 0s (remain 19m 58s) Loss: 0.1733 
Epoch: [3][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [3][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [3][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [3][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [3][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [3][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [3][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [3][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [3][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [3][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [3][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [3][1200/1526] Elapsed 6m 3s (remain 1m 38s) Loss: 0.1733 
Epoch: [3][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [3][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1733 
Epoch: [3][1500/1

Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


INFO:__main__:Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
INFO:__main__:Epoch 3 - Score: 0.0


Epoch: [4][0/1526] Elapsed 0m 0s (remain 20m 3s) Loss: 0.1733 
Epoch: [4][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [4][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [4][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [4][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [4][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [4][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [4][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [4][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [4][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [4][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [4][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [4][1200/1526] Elapsed 6m 3s (remain 1m 38s) Loss: 0.1733 
Epoch: [4][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [4][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1733 
Epoch: [4][1500/15

Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


INFO:__main__:Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
INFO:__main__:Epoch 4 - Score: 0.0


Epoch: [5][0/1526] Elapsed 0m 0s (remain 19m 39s) Loss: 0.1733 
Epoch: [5][100/1526] Elapsed 0m 31s (remain 7m 17s) Loss: 0.1733 
Epoch: [5][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [5][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [5][400/1526] Elapsed 2m 1s (remain 5m 42s) Loss: 0.1733 
Epoch: [5][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [5][600/1526] Elapsed 3m 2s (remain 4m 41s) Loss: 0.1733 
Epoch: [5][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [5][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1733 
Epoch: [5][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [5][1000/1526] Elapsed 5m 4s (remain 2m 39s) Loss: 0.1733 
Epoch: [5][1100/1526] Elapsed 5m 34s (remain 2m 9s) Loss: 0.1733 
Epoch: [5][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1733 
Epoch: [5][1300/1526] Elapsed 6m 35s (remain 1m 8s) Loss: 0.1733 
Epoch: [5][1400/1526] Elapsed 7m 5s (remain 0m 37s) Loss: 0.1733 
Epoch: [5][1500/1

Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
INFO:__main__:Epoch 5 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Score: 0.00000
Score: 0.00000
Score: 0.00000
Score: 0.00000
INFO:__main__:Score: 0.00000
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: [1][0/1526] Elapsed 0m 0s (remain 21m 7s) Loss: 0.3467 
Epoch: [1][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1818 
Epoch: [1][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1778 
Epoch: [1][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1764 
Epoch: [1][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1757 
Epoch: [1][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1752 
Epoch: [1][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1749 
Epoch: [1][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1747 
Epoch: [1][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1745 
Epoch: [1][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1744 
Epoch: [1][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1743 
Epoch: [1][1100/1526] Elapsed 5m 34s (remain 2m 9s) Loss: 0.1742 
Epoch: [1][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1741 
Epoch: [1][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1741 
Epoch: [1][1400/1526] Elapsed 7m 5s (remain 0m 37s) Loss: 0.1740 
Epoch: [1][1500/15

Epoch 1 - avg_train_loss: 0.1739  avg_val_loss: 0.1733  time: 480s
Epoch 1 - avg_train_loss: 0.1739  avg_val_loss: 0.1733  time: 480s
Epoch 1 - avg_train_loss: 0.1739  avg_val_loss: 0.1733  time: 480s
Epoch 1 - avg_train_loss: 0.1739  avg_val_loss: 0.1733  time: 480s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1739  avg_val_loss: 0.1733  time: 480s
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
INFO:__main__:Epoch 1 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.0000 Model


Epoch: [2][0/1526] Elapsed 0m 0s (remain 19m 43s) Loss: 0.1733 
Epoch: [2][100/1526] Elapsed 0m 31s (remain 7m 17s) Loss: 0.1733 
Epoch: [2][200/1526] Elapsed 1m 1s (remain 6m 43s) Loss: 0.1733 
Epoch: [2][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [2][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [2][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [2][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [2][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [2][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1733 
Epoch: [2][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [2][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [2][1100/1526] Elapsed 5m 34s (remain 2m 8s) Loss: 0.1733 
Epoch: [2][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1733 
Epoch: [2][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [2][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1733 
Epoch: [2][1500/1

Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
INFO:__main__:Epoch 2 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 
Epoch: [3][0/1526] Elapsed 0m 0s (remain 19m 42s) Loss: 0.1733 
Epoch: [3][100/1526] Elapsed 0m 31s (remain 7m 17s) Loss: 0.1733 
Epoch: [3][200/1526] Elapsed 1m 1s (remain 6m 43s) Loss: 0.1733 
Epoch: [3][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [3][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [3][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [3][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [3][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [3][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [3][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [3][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [3][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [3][1200/1526] Elapsed 6m 3s (remain 1m 38s) Loss: 0.1733 
Epoch: [3][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [3][1400/1526] E

Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


INFO:__main__:Epoch 3 - Score: 0.0


Epoch: [4][0/1526] Elapsed 0m 0s (remain 19m 48s) Loss: 0.1733 
Epoch: [4][100/1526] Elapsed 0m 31s (remain 7m 17s) Loss: 0.1733 
Epoch: [4][200/1526] Elapsed 1m 1s (remain 6m 43s) Loss: 0.1733 
Epoch: [4][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [4][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [4][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [4][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [4][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [4][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [4][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [4][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [4][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [4][1200/1526] Elapsed 6m 3s (remain 1m 38s) Loss: 0.1733 
Epoch: [4][1300/1526] Elapsed 6m 33s (remain 1m 8s) Loss: 0.1733 
Epoch: [4][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1733 
Epoch: [4][1500/1

Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
INFO:__main__:Epoch 4 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 
Epoch: [5][0/1526] Elapsed 0m 0s (remain 19m 26s) Loss: 0.1733 
Epoch: [5][100/1526] Elapsed 0m 31s (remain 7m 17s) Loss: 0.1733 
Epoch: [5][200/1526] Elapsed 1m 1s (remain 6m 43s) Loss: 0.1733 
Epoch: [5][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [5][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [5][500/1526] Elapsed 2m 31s (remain 5m 10s) Loss: 0.1733 
Epoch: [5][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [5][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [5][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [5][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [5][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [5][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [5][1200/1526] Elapsed 6m 3s (remain 1m 38s) Loss: 0.1733 
Epoch: [5][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [5][1400/1526] E

Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
INFO:__main__:Epoch 5 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Score: 0.00000
Score: 0.00000
Score: 0.00000
Score: 0.00000
INFO:__main__:Score: 0.00000
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: [1][0/1526] Elapsed 0m 0s (remain 20m 34s) Loss: 0.3914 
Epoch: [1][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1845 
Epoch: [1][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1792 
Epoch: [1][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1774 
Epoch: [1][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1764 
Epoch: [1][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1758 
Epoch: [1][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1754 
Epoch: [1][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1751 
Epoch: [1][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1749 
Epoch: [1][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1747 
Epoch: [1][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1746 
Epoch: [1][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1745 
Epoch: [1][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1744 
Epoch: [1][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1743 
Epoch: [1][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1742 
Epoch: [1][1500/1

Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
INFO:__main__:Epoch 1 - Score: 0.0
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.0000 Model


Epoch: [2][0/1526] Elapsed 0m 0s (remain 19m 31s) Loss: 0.1733 
Epoch: [2][100/1526] Elapsed 0m 31s (remain 7m 17s) Loss: 0.1733 
Epoch: [2][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [2][300/1526] Elapsed 1m 31s (remain 6m 13s) Loss: 0.1733 
Epoch: [2][400/1526] Elapsed 2m 2s (remain 5m 42s) Loss: 0.1733 
Epoch: [2][500/1526] Elapsed 2m 32s (remain 5m 12s) Loss: 0.1733 
Epoch: [2][600/1526] Elapsed 3m 2s (remain 4m 41s) Loss: 0.1733 
Epoch: [2][700/1526] Elapsed 3m 33s (remain 4m 11s) Loss: 0.1733 
Epoch: [2][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1733 
Epoch: [2][900/1526] Elapsed 4m 34s (remain 3m 10s) Loss: 0.1733 
Epoch: [2][1000/1526] Elapsed 5m 4s (remain 2m 39s) Loss: 0.1733 
Epoch: [2][1100/1526] Elapsed 5m 35s (remain 2m 9s) Loss: 0.1733 
Epoch: [2][1200/1526] Elapsed 6m 5s (remain 1m 38s) Loss: 0.1733 
Epoch: [2][1300/1526] Elapsed 6m 35s (remain 1m 8s) Loss: 0.1733 
Epoch: [2][1400/1526] Elapsed 7m 6s (remain 0m 38s) Loss: 0.1733 
Epoch: [2][1500/

Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 2 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
INFO:__main__:Epoch 2 - Score: 0.0


Epoch: [3][0/1526] Elapsed 0m 0s (remain 19m 44s) Loss: 0.1733 
Epoch: [3][100/1526] Elapsed 0m 31s (remain 7m 20s) Loss: 0.1733 
Epoch: [3][200/1526] Elapsed 1m 1s (remain 6m 46s) Loss: 0.1733 
Epoch: [3][300/1526] Elapsed 1m 32s (remain 6m 14s) Loss: 0.1733 
Epoch: [3][400/1526] Elapsed 2m 2s (remain 5m 43s) Loss: 0.1733 
Epoch: [3][500/1526] Elapsed 2m 32s (remain 5m 12s) Loss: 0.1733 
Epoch: [3][600/1526] Elapsed 3m 3s (remain 4m 42s) Loss: 0.1733 
Epoch: [3][700/1526] Elapsed 3m 33s (remain 4m 11s) Loss: 0.1733 
Epoch: [3][800/1526] Elapsed 4m 4s (remain 3m 40s) Loss: 0.1733 
Epoch: [3][900/1526] Elapsed 4m 34s (remain 3m 10s) Loss: 0.1733 
Epoch: [3][1000/1526] Elapsed 5m 5s (remain 2m 39s) Loss: 0.1733 
Epoch: [3][1100/1526] Elapsed 5m 35s (remain 2m 9s) Loss: 0.1733 
Epoch: [3][1200/1526] Elapsed 6m 5s (remain 1m 39s) Loss: 0.1733 
Epoch: [3][1300/1526] Elapsed 6m 36s (remain 1m 8s) Loss: 0.1733 
Epoch: [3][1400/1526] Elapsed 7m 6s (remain 0m 38s) Loss: 0.1733 
Epoch: [3][1500/

Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 482s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 482s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 482s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 482s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 482s
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
INFO:__main__:Epoch 3 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 
Epoch: [4][0/1526] Elapsed 0m 0s (remain 19m 44s) Loss: 0.1733 
Epoch: [4][100/1526] Elapsed 0m 31s (remain 7m 20s) Loss: 0.1733 
Epoch: [4][200/1526] Elapsed 1m 1s (remain 6m 46s) Loss: 0.1733 
Epoch: [4][300/1526] Elapsed 1m 32s (remain 6m 14s) Loss: 0.1733 
Epoch: [4][400/1526] Elapsed 2m 2s (remain 5m 43s) Loss: 0.1733 
Epoch: [4][500/1526] Elapsed 2m 32s (remain 5m 12s) Loss: 0.1733 
Epoch: [4][600/1526] Elapsed 3m 3s (remain 4m 42s) Loss: 0.1733 
Epoch: [4][700/1526] Elapsed 3m 33s (remain 4m 11s) Loss: 0.1733 
Epoch: [4][800/1526] Elapsed 4m 4s (remain 3m 41s) Loss: 0.1733 
Epoch: [4][900/1526] Elapsed 4m 34s (remain 3m 10s) Loss: 0.1733 
Epoch: [4][1000/1526] Elapsed 5m 5s (remain 2m 39s) Loss: 0.1733 
Epoch: [4][1100/1526] Elapsed 5m 35s (remain 2m 9s) Loss: 0.1733 
Epoch: [4][1200/1526] Elapsed 6m 5s (remain 1m 38s) Loss: 0.1733 
Epoch: [4][1300/1526] Elapsed 6m 36s (remain 1m 8s) Loss: 0.1733 
Epoch: [4][1400/1526] 

Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 481s
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
INFO:__main__:Epoch 4 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 
Epoch: [5][0/1526] Elapsed 0m 0s (remain 20m 10s) Loss: 0.1733 
Epoch: [5][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [5][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [5][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [5][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [5][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [5][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [5][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [5][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [5][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [5][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [5][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [5][1200/1526] Elapsed 6m 3s (remain 1m 38s) Loss: 0.1733 
Epoch: [5][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [5][1400/1526] E

Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
INFO:__main__:Epoch 5 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Score: 0.00000
Score: 0.00000
Score: 0.00000
Score: 0.00000
INFO:__main__:Score: 0.00000
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: [1][0/1526] Elapsed 0m 0s (remain 20m 45s) Loss: 0.3736 
Epoch: [1][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1849 
Epoch: [1][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1794 
Epoch: [1][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1775 
Epoch: [1][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1765 
Epoch: [1][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1759 
Epoch: [1][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1755 
Epoch: [1][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1752 
Epoch: [1][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1750 
Epoch: [1][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1748 
Epoch: [1][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1746 
Epoch: [1][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1745 
Epoch: [1][1200/1526] Elapsed 6m 3s (remain 1m 38s) Loss: 0.1744 
Epoch: [1][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1743 
Epoch: [1][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1743 
Epoch: [1][1500/1

Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
INFO:__main__:Epoch 1 - Score: 0.0
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.0000 Model


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 
Epoch: [2][0/1526] Elapsed 0m 0s (remain 19m 37s) Loss: 0.1733 
Epoch: [2][100/1526] Elapsed 0m 31s (remain 7m 17s) Loss: 0.1733 
Epoch: [2][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [2][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [2][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [2][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [2][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [2][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [2][800/1526] Elapsed 4m 3s (remain 3m 39s) Loss: 0.1733 
Epoch: [2][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [2][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [2][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [2][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1733 
Epoch: [2][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [2][1400/1526] E

Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


INFO:__main__:Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
INFO:__main__:Epoch 2 - Score: 0.0


Epoch: [3][0/1526] Elapsed 0m 0s (remain 20m 7s) Loss: 0.1733 
Epoch: [3][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [3][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [3][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [3][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [3][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [3][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [3][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [3][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [3][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [3][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [3][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [3][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1733 
Epoch: [3][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [3][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1733 
Epoch: [3][1500/15

Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


INFO:__main__:Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
INFO:__main__:Epoch 3 - Score: 0.0


Epoch: [4][0/1526] Elapsed 0m 0s (remain 19m 34s) Loss: 0.1733 
Epoch: [4][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [4][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [4][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [4][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [4][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [4][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [4][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [4][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [4][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [4][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [4][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [4][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1733 
Epoch: [4][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [4][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1733 
Epoch: [4][1500/1

Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
INFO:__main__:Epoch 4 - Score: 0.0


Epoch: [5][0/1526] Elapsed 0m 0s (remain 19m 52s) Loss: 0.1733 
Epoch: [5][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [5][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [5][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [5][400/1526] Elapsed 2m 1s (remain 5m 42s) Loss: 0.1733 
Epoch: [5][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [5][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [5][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [5][800/1526] Elapsed 4m 3s (remain 3m 39s) Loss: 0.1733 
Epoch: [5][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [5][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [5][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [5][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1733 
Epoch: [5][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [5][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1733 
Epoch: [5][1500/1

Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


INFO:__main__:Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
INFO:__main__:Epoch 5 - Score: 0.0
Score: 0.00000
Score: 0.00000
Score: 0.00000
Score: 0.00000
INFO:__main__:Score: 0.00000
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: [1][0/1526] Elapsed 0m 0s (remain 21m 3s) Loss: 0.4147 
Epoch: [1][100/1526] Elapsed 0m 31s (remain 7m 19s) Loss: 0.1858 
Epoch: [1][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1799 
Epoch: [1][300/1526] Elapsed 1m 31s (remain 6m 13s) Loss: 0.1778 
Epoch: [1][400/1526] Elapsed 2m 1s (remain 5m 42s) Loss: 0.1767 
Epoch: [1][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1761 
Epoch: [1][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1756 
Epoch: [1][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1753 
Epoch: [1][800/1526] Elapsed 4m 3s (remain 3m 39s) Loss: 0.1751 
Epoch: [1][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1749 
Epoch: [1][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1747 
Epoch: [1][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1746 
Epoch: [1][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1745 
Epoch: [1][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1744 
Epoch: [1][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1743 
Epoch: [1][1500/15

Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
INFO:__main__:Epoch 1 - Score: 0.0
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.0000 Model


Epoch: [2][0/1526] Elapsed 0m 0s (remain 19m 33s) Loss: 0.1733 
Epoch: [2][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [2][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [2][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [2][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [2][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [2][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [2][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [2][800/1526] Elapsed 4m 3s (remain 3m 39s) Loss: 0.1733 
Epoch: [2][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [2][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [2][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [2][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1733 
Epoch: [2][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [2][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1733 
Epoch: [2][1500/1

Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
INFO:__main__:Epoch 2 - Score: 0.0


Epoch: [3][0/1526] Elapsed 0m 0s (remain 20m 7s) Loss: 0.1733 
Epoch: [3][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [3][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [3][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [3][400/1526] Elapsed 2m 1s (remain 5m 42s) Loss: 0.1733 
Epoch: [3][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [3][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [3][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [3][800/1526] Elapsed 4m 3s (remain 3m 39s) Loss: 0.1733 
Epoch: [3][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [3][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [3][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [3][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1733 
Epoch: [3][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [3][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1733 
Epoch: [3][1500/15

Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
INFO:__main__:Epoch 3 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 
Epoch: [4][0/1526] Elapsed 0m 0s (remain 19m 55s) Loss: 0.1733 
Epoch: [4][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [4][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [4][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [4][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [4][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [4][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [4][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [4][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [4][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [4][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [4][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [4][1200/1526] Elapsed 6m 3s (remain 1m 38s) Loss: 0.1733 
Epoch: [4][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [4][1400/1526] E

Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
INFO:__main__:Epoch 4 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 
Epoch: [5][0/1526] Elapsed 0m 0s (remain 19m 34s) Loss: 0.1733 
Epoch: [5][100/1526] Elapsed 0m 31s (remain 7m 17s) Loss: 0.1733 
Epoch: [5][200/1526] Elapsed 1m 1s (remain 6m 43s) Loss: 0.1733 
Epoch: [5][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [5][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [5][500/1526] Elapsed 2m 31s (remain 5m 10s) Loss: 0.1733 
Epoch: [5][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [5][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [5][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [5][900/1526] Elapsed 4m 32s (remain 3m 9s) Loss: 0.1733 
Epoch: [5][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [5][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [5][1200/1526] Elapsed 6m 3s (remain 1m 38s) Loss: 0.1733 
Epoch: [5][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [5][1400/1526] E

Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
INFO:__main__:Epoch 5 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Score: 0.00000
Score: 0.00000
Score: 0.00000
Score: 0.00000
INFO:__main__:Score: 0.00000
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: [1][0/1526] Elapsed 0m 0s (remain 20m 51s) Loss: 0.3569 
Epoch: [1][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1846 
Epoch: [1][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1793 
Epoch: [1][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1774 
Epoch: [1][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1764 
Epoch: [1][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1758 
Epoch: [1][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1754 
Epoch: [1][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1751 
Epoch: [1][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1749 
Epoch: [1][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1747 
Epoch: [1][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1746 
Epoch: [1][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1745 
Epoch: [1][1200/1526] Elapsed 6m 3s (remain 1m 38s) Loss: 0.1744 
Epoch: [1][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1743 
Epoch: [1][1400/1526] Elapsed 7m 4s (remain 0m 37s) Loss: 0.1742 
Epoch: [1][1500/1

Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1742  avg_val_loss: 0.1733  time: 479s
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
Epoch 1 - Score: 0.0
INFO:__main__:Epoch 1 - Score: 0.0
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
Epoch 1 - Save Best Score: 0.0000 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.0000 Model


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 
Epoch: [2][0/1526] Elapsed 0m 0s (remain 19m 24s) Loss: 0.1733 
Epoch: [2][100/1526] Elapsed 0m 31s (remain 7m 17s) Loss: 0.1733 
Epoch: [2][200/1526] Elapsed 1m 1s (remain 6m 43s) Loss: 0.1733 
Epoch: [2][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [2][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [2][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [2][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [2][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [2][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [2][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [2][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [2][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [2][1200/1526] Elapsed 6m 3s (remain 1m 38s) Loss: 0.1733 
Epoch: [2][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [2][1400/1526] E

Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
Epoch 2 - Score: 0.0
INFO:__main__:Epoch 2 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 
Epoch: [3][0/1526] Elapsed 0m 0s (remain 19m 27s) Loss: 0.1733 
Epoch: [3][100/1526] Elapsed 0m 31s (remain 7m 17s) Loss: 0.1733 
Epoch: [3][200/1526] Elapsed 1m 1s (remain 6m 43s) Loss: 0.1733 
Epoch: [3][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [3][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [3][500/1526] Elapsed 2m 32s (remain 5m 10s) Loss: 0.1733 
Epoch: [3][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [3][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [3][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [3][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [3][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [3][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [3][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1733 
Epoch: [3][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [3][1400/1526] E

Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
Epoch 3 - Score: 0.0
INFO:__main__:Epoch 3 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 
Epoch: [4][0/1526] Elapsed 0m 0s (remain 19m 37s) Loss: 0.1733 
Epoch: [4][100/1526] Elapsed 0m 31s (remain 7m 17s) Loss: 0.1733 
Epoch: [4][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [4][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [4][400/1526] Elapsed 2m 1s (remain 5m 41s) Loss: 0.1733 
Epoch: [4][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [4][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [4][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [4][800/1526] Elapsed 4m 2s (remain 3m 39s) Loss: 0.1733 
Epoch: [4][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [4][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [4][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [4][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1733 
Epoch: [4][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [4][1400/1526] E

Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 479s
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
Epoch 4 - Score: 0.0
INFO:__main__:Epoch 4 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 
Epoch: [5][0/1526] Elapsed 0m 0s (remain 19m 57s) Loss: 0.1733 
Epoch: [5][100/1526] Elapsed 0m 31s (remain 7m 18s) Loss: 0.1733 
Epoch: [5][200/1526] Elapsed 1m 1s (remain 6m 44s) Loss: 0.1733 
Epoch: [5][300/1526] Elapsed 1m 31s (remain 6m 12s) Loss: 0.1733 
Epoch: [5][400/1526] Elapsed 2m 1s (remain 5m 42s) Loss: 0.1733 
Epoch: [5][500/1526] Elapsed 2m 32s (remain 5m 11s) Loss: 0.1733 
Epoch: [5][600/1526] Elapsed 3m 2s (remain 4m 40s) Loss: 0.1733 
Epoch: [5][700/1526] Elapsed 3m 32s (remain 4m 10s) Loss: 0.1733 
Epoch: [5][800/1526] Elapsed 4m 3s (remain 3m 40s) Loss: 0.1733 
Epoch: [5][900/1526] Elapsed 4m 33s (remain 3m 9s) Loss: 0.1733 
Epoch: [5][1000/1526] Elapsed 5m 3s (remain 2m 39s) Loss: 0.1733 
Epoch: [5][1100/1526] Elapsed 5m 33s (remain 2m 8s) Loss: 0.1733 
Epoch: [5][1200/1526] Elapsed 6m 4s (remain 1m 38s) Loss: 0.1733 
Epoch: [5][1300/1526] Elapsed 6m 34s (remain 1m 8s) Loss: 0.1733 
Epoch: [5][1400/1526] E

Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1733  avg_val_loss: 0.1733  time: 480s
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
Epoch 5 - Score: 0.0
INFO:__main__:Epoch 5 - Score: 0.0


EVAL: [169/170] Elapsed 0m 16s (remain 0m 0s) Loss: 0.1733 


Score: 0.00000
Score: 0.00000
Score: 0.00000
Score: 0.00000
INFO:__main__:Score: 0.00000
Score: 0.00000
Score: 0.00000
Score: 0.00000
Score: 0.00000
INFO:__main__:Score: 0.00000
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/2553 [00:00<?, ?it/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/2553 [00:00<?, ?it/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/2553 [00:00<?, ?it/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/2553 [00:00<?, ?it/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/2553 [00:00<?, ?it/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/2553 [00:00<?, ?it/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/2553 [00:00<?, ?it/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/2553 [00:00<?, ?it/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/2553 [00:00<?, ?it/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/2553 [00:00<?, ?it/s]

In [75]:
!pip freeze > requirements.txt

# 新しいセクション