In [9]:
import torch
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers import BertTokenizer, BertForSequenceClassification, BertConfig
from transformers import Trainer, TrainingArguments
from transformers import BertTokenizer, BertModel
from transformers import AdamW, get_linear_schedule_with_warmup

import sys
import os
from tqdm import tqdm
sys.path.append("/home/jovyan/20230406_ArticleClassifier/ArticleClassifier")

import src.general.global_variables as gv
sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname('data_loader.py'), os.path.pardir)))
from src.data.data_loader import DataLoader

from src.general.utils import cc_path

In [10]:
# Load the pre-trained SciBERT tokenizer and model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

tokenizer = BertTokenizer.from_pretrained('allenai/scibert_scivocab_uncased')


In [11]:
print('__Python VERSION:', sys.version)
print('__pyTorch VERSION:', torch.__version__)
print('__CUDA VERSION', )
from subprocess import call
# call(["nvcc", "--version"]) does not work
! nvcc --version
print('__CUDNN VERSION:', torch.backends.cudnn.version())
print('__Number CUDA Devices:', torch.cuda.device_count())
print('__Devices')
# call(["nvidia-smi", "--format=csv", "--query-gpu=index,name,driver_version,memory.total,memory.used,memory.free"])
print('Active CUDA Device: GPU', torch.cuda.current_device())
print ('Available devices ', torch.cuda.device_count())

__Python VERSION: 3.9.16 | packaged by conda-forge | (main, Feb  1 2023, 21:39:03) 
[GCC 11.3.0]
__pyTorch VERSION: 2.0.0
__CUDA VERSION
/bin/bash: nvcc: command not found
__CUDNN VERSION: None
__Number CUDA Devices: 0
__Devices


AssertionError: Torch not compiled with CUDA enabled

In [22]:
class BertClassifier(torch.nn.Module):
    """
        Bert Model for classification Tasks.
    """
    def __init__(self, freeze_bert=False):
        """
        @param   bert: a BertModel object
        @param   classifier: a torch.nn.Module classifier
        @param   freeze_bert (bool): Set `False` to fine_tune the Bert model
        """
        super(BertClassifier,self).__init__()
        # Specify hidden size of Bert, hidden size of our classifier, and number of labels
        D_in, H,D_out = 768,60,52
        
#         self.bert = RobertaModel.from_pretrained('roberta-base')
        self.bert = BertModel.from_pretrained('scibert_scivocab_uncased')
        
        self.classifier = torch.nn.Sequential(
                            torch.nn.Linear(D_in, H),
                            torch.nn.ReLU(),
                            torch.nn.Linear(H, D_out))
        self.sigmoid = torch.nn.Sigmoid()
        
        # Freeze the Bert Model
        # Freeze all layers except the last two
        for name, param in self.bert.named_parameters():
            if 'layer.10' in name or 'layer.11' in name:
                param.requires_grad = True
            else:
                param.requires_grad = False

    def forward(self,input_ids,attention_mask):
        """
        Feed input to BERT and the classifier to compute logits.
        @param    input_ids (torch.Tensor): an input tensor with shape (batch_size,
                      max_length)
        @param    attention_mask (torch.Tensor): a tensor that hold attention mask
                      information with shape (batch_size, max_length)
        @return   logits (torch.Tensor): an output tensor with shape (batch_size,
                      num_labels)
        """
        outputs = self.bert(input_ids=input_ids,
                           attention_mask = attention_mask)
        
        # Extract the last hidden state of the token `[CLS]` for classification task
        last_hidden_state_cls = outputs[0][:,0,:]
        
        # Feed input to classifier to compute logits
        logit = self.classifier(last_hidden_state_cls)
        
#         logits = self.sigmoid(logit)
        
        return logit

In [6]:
def initialize_model(epochs=4):
    """Initialize the Bert Classifier, the optimizer and the learning rate scheduler.
    """
    
    # Instantiate Bert Classifier
    bert_classifier = BertClassifier(freeze_bert=False)
    
    bert_classifier.to(device)
    
    # Create the optimizer
    optimizer = AdamW(bert_classifier.parameters(),
                     lr=5e-5, #Default learning rate
                     eps=1e-8 #Default epsilon value
                     )
    
    # Total number of training steps
    total_steps = len(train_dataloader) * epochs
    
    # Set up the learning rate scheduler
    scheduler = get_linear_schedule_with_warmup(optimizer, 
                                              num_warmup_steps=0, # Default value
                                              num_training_steps=total_steps)
    return bert_classifier, optimizer, scheduler

In [49]:
# Load the custom dataset
print('Start loading data...')
loc_dict = {
    'processed_csv': cc_path('data/processed/canary/articles_cleaned.csv'),
    'abstract_embeddings': cc_path('data/processed/canary/embeddings_fasttext_20230410.csv'),
    'scibert_embeddings': cc_path('data/processed/canary/embeddings_scibert_20230413.csv'),
    'keyword_network': cc_path('data/processed/canary/keyword_network_weighted.pickle'),
    'xml_embeddings': cc_path('data/processed/canary/embeddings_xml.ftr'),
    'author_network': cc_path('data/processed/canary/author_network.pickle'), 
    'label_network': cc_path('data/processed/canary/label_network_weighted.pickle')
}
data_loader = DataLoader(loc_dict)
processed_df = data_loader.load_processed_csv().iloc[:10000]
processed_df.dropna(subset=['abstract'], inplace=True)

label_columns = processed_df.loc[:, ~processed_df.columns.isin(
    ['file_name', 'title', 'keywords', 'abstract', 'abstract_2', 'authors', 'organization', 'chemicals',
     'num_refs', 'date-delivered', 'labels_m', 'labels_a'])]
label_columns[label_columns.columns.difference(['pui'])] = label_columns[
    label_columns.columns.difference(['pui'])].astype(int)

Start loading data...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label_columns[label_columns.columns.difference(['pui'])] = label_columns[


In [50]:
import json

with open(cc_path("data/pui_idx_mapping.json"), "r") as outfile:
    node_label_mapping = json.load(outfile)
    
with open(cc_path(f'data/train_indices.txt')) as f:
    train_puis = f.read().splitlines()
    train_indices = list(map(node_label_mapping.get, train_puis))
with open(cc_path(f'data/val_indices.txt')) as f:
    val_puis = f.read().splitlines()
    val_indices = list(map(node_label_mapping.get, val_puis))
with open(cc_path(f'data/test_indices.txt')) as f:
    test_puis = f.read().splitlines()
    test_indices = list(map(node_label_mapping.get, test_puis))

In [51]:
train_texts = processed_df.loc[processed_df.pui.isin(train_puis), 'abstract'].to_list()
train_labels = label_columns.loc[processed_df.pui.isin(train_puis), label_columns.columns.difference(['pui'])].to_numpy().tolist()

val_texts = processed_df.loc[processed_df.pui.isin(val_puis), 'abstract'].to_list()
val_labels = label_columns.loc[processed_df.pui.isin(val_puis), label_columns.columns.difference(['pui'])].to_numpy().tolist()

test_texts = processed_df.loc[processed_df.pui.isin(test_puis), 'abstract'].to_list()
test_labels = label_columns.loc[processed_df.pui.isin(test_puis), label_columns.columns.difference(['pui'])].to_numpy().tolist()


In [14]:
train_texts = processed_df['abstract'].to_list()
train_labels = label_columns[label_columns.columns.difference(['pui'])].to_numpy().tolist()  # binary indicator matrix of labels



In [15]:
def preprocessing_for_bert(data):
    """Perform required preprocessing steps for pretrained BERT.
    @param    data (np.array): Array of texts to be processed.
    @return   input_ids (torch.Tensor): Tensor of token ids to be fed to a model.
    @return   attention_masks (torch.Tensor): Tensor of indices specifying which
                  tokens should be attended to by the model.
    """
    # create empty lists to store outputs
    input_ids = []
    attention_masks = []
    
    #for every sentence...
    
    for sent in tqdm(data):
        # 'encode_plus will':
        # (1) Tokenize the sentence
        # (2) Add the `[CLS]` and `[SEP]` token to the start and end
        # (3) Truncate/Pad sentence to max length
        # (4) Map tokens to their IDs
        # (5) Create attention mask
        # (6) Return a dictionary of outputs
        encoded_sent = tokenizer.encode_plus(
            text = sent,   #preprocess sentence
            add_special_tokens = True,         #Add `[CLS]` and `[SEP]`
            max_length= MAX_LEN  ,             #Max length to truncate/pad
            pad_to_max_length = True,          #pad sentence to max length 
            return_attention_mask= True        #Return attention mask 
        )
        # Add the outputs to the lists
        input_ids.append(encoded_sent.get('input_ids'))
        attention_masks.append(encoded_sent.get('attention_mask'))
        
    #convert lists to tensors
    input_ids = torch.tensor(input_ids)
    attention_masks = torch.tensor(attention_masks)
    
    return input_ids,attention_masks

In [52]:
MAX_LEN = 500

# Print sentece 0 and its encoded token ids
train_inputs, train_masks = preprocessing_for_bert(train_texts)
train_labels = torch.tensor(train_labels)

val_inputs, val_masks = preprocessing_for_bert(val_texts)
val_labels = torch.tensor(val_labels)

test_inputs, test_masks = preprocessing_for_bert(test_texts)
test_labels = torch.tensor(test_labels)


100%|██████████| 2947/2947 [00:11<00:00, 252.47it/s]
100%|██████████| 684/684 [00:02<00:00, 255.82it/s]
100%|██████████| 904/904 [00:03<00:00, 254.31it/s]


In [53]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

train_data = TensorDataset(train_inputs,train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=64)

val_data = TensorDataset(val_inputs, val_masks, val_labels)
val_sampler = RandomSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=64)

test_data = TensorDataset(test_inputs, test_masks, test_labels)
test_sampler = RandomSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=64)

In [76]:
# train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=512)

In [19]:
class ArticleDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        '''
        encoding.items() -> 
          -> input_ids : [1,34, 32, 67,...]
          -> attention_mask : [1,1,1,1,1,....]
        '''
        item = {key:torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len((self.labels))

In [125]:
# #datasets
# train_dataset = ArticleDataset(train_encodings, train_labels)

# #dataloaders
# bs = 64
# train_loader = data_utils.DataLoader([train_encoding for train_encoding in train_encodings['input_ids']], batch_size = bs, shuffle = bs)

In [57]:
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy='steps',
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,
    push_to_hub=False,
    logging_dir='./logs',
    logging_steps=500,
    load_best_model_at_end=True,
    save_strategy='steps',
    save_steps=500
)

In [58]:
# Define the evaluation function
def compute_metrics(eval_preds):
    labels = eval_preds.label_ids
    preds = torch.round(torch.sigmoid(eval_preds.predictions))
    accuracy = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    return {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1}

In [59]:
# Define the trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_encodings,
    eval_dataset=val_encodings,
    data_collator=lambda data: {'input_ids': torch.stack([f[0] for f in data]),
                                'attention_mask': torch.stack([f[1] for f in data]),
                                'labels': torch.tensor([f[2] for f in data])},
    compute_metrics=compute_metrics
)

In [45]:
# Specify loss function
import random
import time
#loss_fn = nn.CrossEntropyLoss()
loss_fn = torch.nn.BCEWithLogitsLoss()


def set_seed(seed_value=42):
    """Set seed for reproducibility.
    """
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

def train(model, train_dataloader, val_dataloader=None, epochs=4, evaluation=False):
    """Train the BertClassifier model.
    """
    # Start training loop
    print("Start training...\n")
    for epoch_i in range(epochs):
        # =======================================
        #               Training
        # =======================================
        # Print the header of the result table
        print(f"{'Epoch':^7} | {'Batch':^7} | {'Train Loss':^12} | {'Val Loss':^10} | {'Val Acc':^9} | {'Elapsed':^9}")
        print("-"*70)

        # Measure the elapsed time of each epoch
        t0_epoch, t0_batch = time.time(), time.time()

        # Reset tracking variables at the beginning of each epoch
        total_loss, batch_loss, batch_counts = 0, 0, 0

        # Put the model into the training mode
        model.train()

        # For each batch of training data...
        for step, batch in tqdm(enumerate(train_dataloader)):
            batch_counts +=1
            # Load batch to GPU
            b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)

            # Zero out any previously calculated gradients
            model.zero_grad()

            # Perform a forward pass. This will return logits.
            logits = model(b_input_ids, b_attn_mask)

            # Compute loss and accumulate the loss values
            loss = loss_fn(logits, b_labels.float())
            batch_loss += loss.item()
            total_loss += loss.item()

            # Perform a backward pass to calculate gradients
            loss.backward()

            # Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            # Update parameters and the learning rate
            optimizer.step()
            scheduler.step()

            # Print the loss values and time elapsed for every 20--50000 batches
            if (step % 50000 == 0 and step != 0) or (step == len(train_dataloader) - 1):
                # Calculate time elapsed for 20 batches
                time_elapsed = time.time() - t0_batch

                # Print training results
                print(f"{epoch_i + 1:^7} | {step:^7} | {batch_loss / batch_counts:^12.6f} | {'-':^10} | {'-':^9} | {time_elapsed:^9.2f}")

                # Reset batch tracking variables
                batch_loss, batch_counts = 0, 0
                t0_batch = time.time()

        # Calculate the average loss over the entire training data
        avg_train_loss = total_loss / len(train_dataloader)

        print("-"*70)
        # =======================================
        #               Evaluation
        # =======================================
        if evaluation == True:
            # After the completion of each training epoch, measure the model's performance
            # on our validation set.
            val_loss, val_accuracy = evaluate(model, val_dataloader)

            # Print performance over the entire training data
            time_elapsed = time.time() - t0_epoch
            
            print(f"{epoch_i + 1:^7} | {'-':^7} | {avg_train_loss:^12.6f} | {val_loss:^10.6f} | {val_accuracy:^9.2f} | {time_elapsed:^9.2f}")
            print("-"*70)
        print("\n")
    
    print("Training complete!")


def evaluate(model, val_dataloader):
    """After the completion of each training epoch, measure the model's performance
    on our validation set.
    """
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()

    # Tracking variables
    val_accuracy = []
    val_loss = []

    # For each batch in our validation set...
    for batch in tqdm(val_dataloader):
        # Load batch to GPU
        b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)

        # Compute logits
        with torch.no_grad():
            logits = model(b_input_ids, b_attn_mask)

        # Compute loss
        loss = loss_fn(logits, b_labels.float())
        val_loss.append(loss.item())

        # Get the predictions
        #preds = torch.argmax(logits, dim=1).flatten()
        
        # Calculate the accuracy rate
        #accuracy = (preds == b_labels).cpu().numpy().mean() * 100
        accuracy = accuracy_thresh(logits.view(-1,52),b_labels.view(-1,52))
        
        val_accuracy.append(accuracy)

    # Compute the average accuracy and loss over the validation set.
    val_loss = np.mean(val_loss)
    val_accuracy = np.mean(val_accuracy)

    return val_loss, val_accuracy

def accuracy_thresh(y_pred, y_true, thresh:float=0.5, sigmoid:bool=True):
    "Compute accuracy when `y_pred` and `y_true` are the same size."
    if sigmoid: 
        y_pred = y_pred.sigmoid()
    return ((y_pred>thresh)==y_true.byte()).float().mean().item()
    #return np.mean(((y_pred>thresh).float()==y_true.float()).float().cpu().numpy(), axis=1).sum()

In [54]:
set_seed(42)    # Set seed for reproducibility
bert_classifier, optimizer, scheduler = initialize_model(epochs=20)
train(bert_classifier, train_dataloader, val_dataloader, epochs=20, evaluation=True)

Some weights of the model checkpoint at scibert_scivocab_uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Start training...

 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [16:52, 21.54s/it]


   1    |   46    |   0.539649   |     -      |     -     |  1012.61 
----------------------------------------------------------------------


100%|██████████| 11/11 [02:25<00:00, 13.25s/it]


   1    |    -    |   0.539649   |  0.457490  |   0.80    |  1158.31 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [17:02, 21.76s/it]


   2    |   46    |   0.421045   |     -      |     -     |  1022.63 
----------------------------------------------------------------------


100%|██████████| 11/11 [01:54<00:00, 10.37s/it]


   2    |    -    |   0.421045   |  0.373239  |   0.87    |  1136.66 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [16:51, 21.53s/it]


   3    |   46    |   0.345559   |     -      |     -     |  1011.92 
----------------------------------------------------------------------


100%|██████████| 11/11 [01:59<00:00, 10.86s/it]


   3    |    -    |   0.345559   |  0.305916  |   0.92    |  1131.38 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [16:56, 21.62s/it]


   4    |   46    |   0.287365   |     -      |     -     |  1016.24 
----------------------------------------------------------------------


100%|██████████| 11/11 [02:19<00:00, 12.64s/it]


   4    |    -    |   0.287365   |  0.256661  |   0.93    |  1155.27 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [17:02, 21.76s/it]


   5    |   46    |   0.246949   |     -      |     -     |  1022.79 
----------------------------------------------------------------------


100%|██████████| 11/11 [02:03<00:00, 11.19s/it]


   5    |    -    |   0.246949   |  0.224808  |   0.93    |  1145.91 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [16:49, 21.48s/it]


   6    |   46    |   0.219738   |     -      |     -     |  1009.70 
----------------------------------------------------------------------


100%|██████████| 11/11 [02:14<00:00, 12.25s/it]


   6    |    -    |   0.219738   |  0.203048  |   0.94    |  1144.50 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [16:53, 21.56s/it]


   7    |   46    |   0.199321   |     -      |     -     |  1013.22 
----------------------------------------------------------------------


100%|██████████| 11/11 [02:17<00:00, 12.47s/it]


   7    |    -    |   0.199321   |  0.188409  |   0.94    |  1150.42 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [17:08, 21.89s/it]


   8    |   46    |   0.186187   |     -      |     -     |  1028.63 
----------------------------------------------------------------------


100%|██████████| 11/11 [02:15<00:00, 12.33s/it]


   8    |    -    |   0.186187   |  0.178657  |   0.94    |  1164.24 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [16:46, 21.42s/it]


   9    |   46    |   0.175761   |     -      |     -     |  1006.62 
----------------------------------------------------------------------


100%|██████████| 11/11 [02:16<00:00, 12.37s/it]


   9    |    -    |   0.175761   |  0.170334  |   0.94    |  1142.75 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [16:53, 21.56s/it]


  10    |   46    |   0.168085   |     -      |     -     |  1013.27 
----------------------------------------------------------------------


100%|██████████| 11/11 [02:20<00:00, 12.75s/it]


  10    |    -    |   0.168085   |  0.166245  |   0.94    |  1153.51 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [17:06, 21.83s/it]


  11    |   46    |   0.164846   |     -      |     -     |  1026.21 
----------------------------------------------------------------------


100%|██████████| 11/11 [02:27<00:00, 13.45s/it]


  11    |    -    |   0.164846   |  0.161234  |   0.94    |  1174.17 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [16:59, 21.70s/it]


  12    |   46    |   0.159108   |     -      |     -     |  1019.72 
----------------------------------------------------------------------


100%|██████████| 11/11 [01:59<00:00, 10.84s/it]


  12    |    -    |   0.159108   |  0.159723  |   0.94    |  1138.92 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [16:57, 21.66s/it]


  13    |   46    |   0.156947   |     -      |     -     |  1018.00 
----------------------------------------------------------------------


100%|██████████| 11/11 [02:14<00:00, 12.19s/it]


  13    |    -    |   0.156947   |  0.155942  |   0.95    |  1152.11 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [16:53, 21.56s/it]


  14    |   46    |   0.154538   |     -      |     -     |  1013.36 
----------------------------------------------------------------------


100%|██████████| 11/11 [02:21<00:00, 12.87s/it]


  14    |    -    |   0.154538   |  0.153576  |   0.95    |  1154.98 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [17:02, 21.75s/it]


  15    |   46    |   0.150623   |     -      |     -     |  1022.26 
----------------------------------------------------------------------


100%|██████████| 11/11 [02:18<00:00, 12.55s/it]


  15    |    -    |   0.150623   |  0.153324  |   0.95    |  1160.34 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [16:54, 21.59s/it]


  16    |   46    |   0.147245   |     -      |     -     |  1014.86 
----------------------------------------------------------------------


100%|██████████| 11/11 [01:55<00:00, 10.47s/it]


  16    |    -    |   0.147245   |  0.150694  |   0.95    |  1130.07 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [16:52, 21.55s/it]


  17    |   46    |   0.148394   |     -      |     -     |  1012.65 
----------------------------------------------------------------------


100%|██████████| 11/11 [01:55<00:00, 10.51s/it]


  17    |    -    |   0.148394   |  0.150683  |   0.95    |  1128.28 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [17:04, 21.81s/it]


  18    |   46    |   0.146519   |     -      |     -     |  1024.97 
----------------------------------------------------------------------


100%|██████████| 11/11 [01:59<00:00, 10.85s/it]


  18    |    -    |   0.146519   |  0.150264  |   0.95    |  1144.30 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [16:57, 21.65s/it]


  19    |   46    |   0.143914   |     -      |     -     |  1017.48 
----------------------------------------------------------------------


100%|██████████| 11/11 [02:16<00:00, 12.43s/it]


  19    |    -    |   0.143914   |  0.149393  |   0.95    |  1154.23 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


47it [16:54, 21.59s/it]


  20    |   46    |   0.142956   |     -      |     -     |  1014.81 
----------------------------------------------------------------------


100%|██████████| 11/11 [02:02<00:00, 11.15s/it]

  20    |    -    |   0.142956   |  0.149068  |   0.95    |  1137.42 
----------------------------------------------------------------------


Training complete!





In [55]:
torch.save(bert_classifier.bert, cc_path(f'models/embedders/finetuned_bert_10k_20e_2lay.pt'))