In [1]:
'''Code based on: https://www.kaggle.com/code/vpkprasanna/bert-model-with-0-845-accuracy/notebook'''

import torch
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers import BertTokenizer, BertForSequenceClassification, BertConfig
from transformers import Trainer, TrainingArguments
from transformers import BertTokenizer, BertModel
from transformers import AdamW, get_linear_schedule_with_warmup

import sys
import os
from tqdm import tqdm
sys.path.append("/home/jovyan/20230406_ArticleClassifier/ArticleClassifier")

import src.general.global_variables as gv
sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname('data_loader.py'), os.path.pardir)))
from src.data.data_loader import DataLoader

from src.general.utils import cc_path

In [2]:
# Load the pre-trained SciBERT tokenizer and model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

tokenizer = BertTokenizer.from_pretrained('allenai/scibert_scivocab_uncased')
# tokenizer = BertTokenizer.from_pretrained('dmis-lab/biobert-base-cased-v1.1')


In [3]:
print('__Python VERSION:', sys.version)
print('__pyTorch VERSION:', torch.__version__)
print('__CUDA VERSION', )
from subprocess import call
# call(["nvcc", "--version"]) does not work
! nvcc --version
print('__CUDNN VERSION:', torch.backends.cudnn.version())
print('__Number CUDA Devices:', torch.cuda.device_count())
print('__Devices')
# call(["nvidia-smi", "--format=csv", "--query-gpu=index,name,driver_version,memory.total,memory.used,memory.free"])
print('Active CUDA Device: GPU', torch.cuda.current_device())
print ('Available devices ', torch.cuda.device_count())

__Python VERSION: 3.9.16 | packaged by conda-forge | (main, Feb  1 2023, 21:39:03) 
[GCC 11.3.0]
__pyTorch VERSION: 2.0.0+cu117
__CUDA VERSION
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Tue_May__3_18:49:52_PDT_2022
Cuda compilation tools, release 11.7, V11.7.64
Build cuda_11.7.r11.7/compiler.31294372_0
__CUDNN VERSION: 8500
__Number CUDA Devices: 1
__Devices
Active CUDA Device: GPU 0
Available devices  1


In [4]:
class BertClassifier(torch.nn.Module):
    """
        Bert Model for classification Tasks.
    """
    def __init__(self, freeze_bert=False):
        """
        @param   bert: a BertModel object
        @param   classifier: a torch.nn.Module classifier
        @param   freeze_bert (bool): Set `False` to fine_tune the Bert model
        """
        super(BertClassifier,self).__init__()
        # Specify hidden size of Bert, hidden size of our classifier, and number of labels
        D_in, H,D_out = 768,60,7
        
#         self.bert = RobertaModel.from_pretrained('roberta-base')
        self.bert = BertModel.from_pretrained('scibert_scivocab_uncased')
#         self.bert = torch.load(cc_path(f'models/baselines/paula_finetuned_bert_56k_10e_tka.pt')).base_model
#         self.bert = BertModel.from_pretrained('dmis-lab/biobert-base-cased-v1.1')

        self.classifier = torch.nn.Sequential(
                            torch.nn.Linear(D_in, H),
                            torch.nn.ReLU(),
                            torch.nn.Linear(H, D_out))
        self.sigmoid = torch.nn.Sigmoid()
        
        # Freeze the Bert Model
        # Freeze all layers except the last two
        for name, param in self.bert.named_parameters():
            if 'layer.9' in name or 'layer.10' in name or 'layer.11' in name:
                param.requires_grad = True
            else:
                param.requires_grad = False

    def forward(self,input_ids,attention_mask):
        """
        Feed input to BERT and the classifier to compute logits.
        @param    input_ids (torch.Tensor): an input tensor with shape (batch_size,
                      max_length)
        @param    attention_mask (torch.Tensor): a tensor that hold attention mask
                      information with shape (batch_size, max_length)
        @return   logits (torch.Tensor): an output tensor with shape (batch_size,
                      num_labels)
        """
        outputs = self.bert(input_ids=input_ids,
                           attention_mask = attention_mask)
        
        # Extract the last hidden state of the token `[CLS]` for classification task
        last_hidden_state_cls = outputs[0][:,0,:]
#         last_hidden_state_cls = outputs[0]

        # Feed input to classifier to compute logits
        logit = self.classifier(last_hidden_state_cls)
        
#         logits = self.sigmoid(logit)
        
        return logit

In [5]:
def initialize_model(epochs=4):
    """Initialize the Bert Classifier, the optimizer and the learning rate scheduler.
    """
    
    # Instantiate Bert Classifier
    bert_classifier = BertClassifier(freeze_bert=False)
    
    bert_classifier.to(device)
    
    # Create the optimizer
    optimizer = AdamW(bert_classifier.parameters(),
                     lr=5e-5, #Default learning rate
                     eps=1e-8 #Default epsilon value
                     )
    
    # Total number of training steps
    total_steps = len(train_dataloader) * epochs
    
    # Set up the learning rate scheduler
    scheduler = get_linear_schedule_with_warmup(optimizer, 
                                              num_warmup_steps=0, # Default value
                                              num_training_steps=total_steps)
    return bert_classifier, optimizer, scheduler

In [6]:
list(bert_classifier.bert.named_parameters())

NameError: name 'bert_classifier' is not defined

In [22]:
# Load the custom dataset
print('Start loading data...')
loc_dict = {
    'processed_csv': cc_path('data/processed/canary/articles_cleaned.csv'),
    'abstract_embeddings': cc_path('data/processed/canary/embeddings_fasttext_20230410.csv'),
    'scibert_embeddings': cc_path('data/processed/canary/embeddings_scibert_20230413.csv'),
    'keyword_network': cc_path('data/processed/canary/keyword_network_weighted.pickle'),
    'xml_embeddings': cc_path('data/processed/canary/embeddings_xml.ftr'),
    'author_network': cc_path('data/processed/canary/author_network.pickle'), 
    'label_network': cc_path('data/processed/canary/label_network_weighted.pickle')
}
data_loader = DataLoader(loc_dict)
processed_df = data_loader.load_processed_csv()
processed_df.dropna(subset=['abstract'], inplace=True)

label_columns = processed_df.loc[:, ~processed_df.columns.isin(
    ['file_name', 'title', 'keywords', 'abstract', 'abstract_2', 'authors', 'organization', 'chemicals',
     'num_refs', 'date-delivered', 'labels_m', 'labels_a'])]
label_columns.loc[:, label_columns.columns.difference(['pui'])] = label_columns.loc[:, 
    label_columns.columns.difference(['pui'])].astype(int)

processed_df['str_keywords'] = processed_df['keywords'].str.replace('[', ' ').str.replace(']', ' ').str.replace(', ', ' ').str.replace("'", '')
processed_df['embedding_text'] = processed_df['title'] + processed_df['str_keywords'] + processed_df['abstract']


Start loading data...


AttributeError: 'DataLoader' object has no attribute 'load_processed_csv'

In [7]:
# Load the custom dataset
print('Start loading data...')
loc_dict = {
    'processed_csv': cc_path('data/processed/litcovid/litcovid_articles_cleaned_20230529.csv'),
    'scibert_embeddings': cc_path('data/processed/litcovid/litcovid_embeddings_scibert_finetuned_20230425.csv'),
    'keyword_network': cc_path('data/processed/litcovid/litcovid_keyword_network_weighted.pickle'),
    'xml_embeddings': cc_path('data/processed/litcovid/litcovid_embeddings_xml_20230518_68.ftr'),
    'label_network': cc_path('data/processed/litcovid/litcovid_label_network_weighted.pickle')
}
data_loader = DataLoader(loc_dict)
processed_df = data_loader.load_processed_csv()
processed_df.dropna(subset=['abstract'], inplace=True)

label_columns = processed_df.loc[:, ~processed_df.columns.isin(
    ['file_name', 'title', 'keywords', 'abstract', 'abstract_2', 'authors', 'organization', 'chemicals',
     'num_refs', 'date-delivered', 'labels_m', 'labels_a', 'journal', 'pub_type', 'doi', 'label', 'label_m', 'list_label'])]
label_columns.loc[:, label_columns.columns.difference(['pui'])] = label_columns.loc[:, 
    label_columns.columns.difference(['pui'])].astype(int)

processed_df['str_keywords'] = processed_df['keywords'].str.replace('[', ' ').str.replace(']', ' ').str.replace(', ', ' ').str.replace("'", '')
processed_df['embedding_text'] = processed_df['title'] + " " + processed_df['journal'] + " " + processed_df['pub_type'].str.replace(';', ' ') + " " + processed_df['str_keywords'] + processed_df['abstract']


Start loading data...


In [8]:
processed_df['embedding_text'].iloc[0]

'Potential role for tissue factor in the pathogenesis of hypercoagulability associated with in COVID-19. J Thromb Thrombolysis Journal Article Review  il-6 tnf-alpha thrombosis tissue factor in december 2019 a new and highly contagious infectious disease emerged in wuhan china the etiologic agent was identified as a novel coronavirus now known as severe acute syndrome coronavirus2 sarscov2 recent research has revealed that virus entry takes place upon the union of the virus s surface protein with the type i transmembrane metallocarboxypeptidase angiotensin converting enzyme 2 ace2 identified on epithelial cells of the host respiratory tract virus triggers the synthesis and release of proinflammatory cytokines including il6 and tnfalpha and also promotes downregulation of ace2 which promotes a concomitant increase in levels of angiotensin ii atii both tnfalpha and atii have been implicated in promoting overexpression of tissue factor tf in platelets and macrophages additionally the gene

In [9]:
import json


In [24]:

with open(cc_path("data/pui_idx_mapping.json"), "r") as outfile:
    node_label_mapping = json.load(outfile)
    
with open(cc_path(f'data/train_indices.txt')) as f:
    train_puis = f.read().splitlines()
    train_indices = list(map(node_label_mapping.get, train_puis))
with open(cc_path(f'data/val_indices.txt')) as f:
    val_puis = f.read().splitlines()
    val_indices = list(map(node_label_mapping.get, val_puis))
with open(cc_path(f'data/test_indices.txt')) as f:
    test_puis = f.read().splitlines()
    test_indices = list(map(node_label_mapping.get, test_puis))

FileNotFoundError: [Errno 2] No such file or directory: '/home/jovyan/20230406_ArticleClassifier/ArticleClassifier/data/pui_idx_mapping.json'

In [10]:
with open(cc_path(f'data/litcovid_train_indices.txt')) as f:
    train_puis = f.read().splitlines()
with open(cc_path(f'data/litcovid_val_indices.txt')) as f:
    val_puis = f.read().splitlines()
with open(cc_path(f'data/litcovid_test_indices.txt')) as f:
    test_puis = f.read().splitlines()


In [11]:
train_texts = processed_df.loc[processed_df.pui.isin(train_puis), 'embedding_text'].to_list()
train_labels = label_columns.loc[processed_df.pui.isin(train_puis), label_columns.columns.difference(['pui'])].to_numpy().tolist()

val_texts = processed_df.loc[processed_df.pui.isin(val_puis), 'embedding_text'].to_list()
val_labels = label_columns.loc[processed_df.pui.isin(val_puis), label_columns.columns.difference(['pui'])].to_numpy().tolist()

test_texts = processed_df.loc[processed_df.pui.isin(test_puis), 'embedding_text'].to_list()
test_labels = label_columns.loc[processed_df.pui.isin(test_puis), label_columns.columns.difference(['pui'])].to_numpy().tolist()


In [12]:
def preprocessing_for_bert(data):
    """Perform required preprocessing steps for pretrained BERT.
    @param    data (np.array): Array of texts to be processed.
    @return   input_ids (torch.Tensor): Tensor of token ids to be fed to a model.
    @return   attention_masks (torch.Tensor): Tensor of indices specifying which
                  tokens should be attended to by the model.
    """
    # create empty lists to store outputs
    input_ids = []
    attention_masks = []
    
    #for every sentence...
    
    for sent in tqdm(data):
        # 'encode_plus will':
        # (1) Tokenize the sentence
        # (2) Add the `[CLS]` and `[SEP]` token to the start and end
        # (3) Truncate/Pad sentence to max length
        # (4) Map tokens to their IDs
        # (5) Create attention mask
        # (6) Return a dictionary of outputs
        encoded_sent = tokenizer.encode_plus(
            text = sent,   #preprocess sentence
            add_special_tokens = True,         #Add `[CLS]` and `[SEP]`
            max_length= MAX_LEN  ,             #Max length to truncate/pad
            pad_to_max_length = True,          #pad sentence to max length 
            return_attention_mask= True        #Return attention mask 
        )
        # Add the outputs to the lists
        input_ids.append(encoded_sent.get('input_ids'))
        
        attention_masks.append(encoded_sent.get('attention_mask'))
        
    #convert lists to tensors
    input_ids = torch.tensor(input_ids)
    attention_masks = torch.tensor(attention_masks)
    
    return input_ids,attention_masks

In [13]:
MAX_LEN = 512

# Print sentece 0 and its encoded token ids
train_inputs, train_masks = preprocessing_for_bert(train_texts)
train_labels = torch.tensor(train_labels)

val_inputs, val_masks = preprocessing_for_bert(val_texts)
val_labels = torch.tensor(val_labels)

test_inputs, test_masks = preprocessing_for_bert(test_texts)
test_labels = torch.tensor(test_labels)


  0%|          | 0/24947 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
100%|██████████| 24947/24947 [02:36<00:00, 159.43it/s]
100%|██████████| 6236/6236 [00:39<00:00, 158.11it/s]
100%|██████████| 2489/2489 [00:16<00:00, 150.31it/s]


In [14]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

train_data = TensorDataset(train_inputs,train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=32)

val_data = TensorDataset(val_inputs, val_masks, val_labels)
val_sampler = RandomSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=32)

test_data = TensorDataset(test_inputs, test_masks, test_labels)
test_sampler = RandomSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=32)

In [15]:
# Define the evaluation function
def compute_metrics(eval_preds):
    labels = eval_preds.label_ids
    preds = torch.round(torch.sigmoid(eval_preds.predictions))
    accuracy = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    return {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1}

In [30]:
# Specify loss function
import random
import time
import copy
from sklearn.metrics import f1_score
loss_fn = torch.nn.BCEWithLogitsLoss()


def set_seed(seed_value=42):
    """Set seed for reproducibility.
    """
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

def train(model, train_dataloader, val_dataloader=None, epochs=4, evaluation=False):
    """Train the BertClassifier model.
    """
    # Start training loop
    print("Start training...\n")
    max_val_accuracy = 0
    not_improved = 0
    for epoch_i in range(epochs):
        # =======================================
        #               Training
        # =======================================
        # Print the header of the result table
        print(f"{'Epoch':^7} | {'Batch':^7} | {'Train Loss':^12} | {'Val Loss':^10} | {'Val Acc':^9} | {'Elapsed':^9}")
        print("-"*70)

        # Measure the elapsed time of each epoch
        t0_epoch, t0_batch = time.time(), time.time()

        # Reset tracking variables at the beginning of each epoch
        total_loss, batch_loss, batch_counts = 0, 0, 0

        # Put the model into the training mode
        model.train()

        # For each batch of training data...
        for step, batch in tqdm(enumerate(train_dataloader)):
            batch_counts +=1
            # Load batch to GPU
            b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)

            # Zero out any previously calculated gradients
            model.zero_grad()

            # Perform a forward pass. This will return logits.
            logits = model(b_input_ids, b_attn_mask)

            # Compute loss and accumulate the loss values
            loss = loss_fn(logits, b_labels.float())
            batch_loss += loss.item()
            total_loss += loss.item()

            # Perform a backward pass to calculate gradients
            loss.backward()

            # Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            # Update parameters and the learning rate
            optimizer.step()
            scheduler.step()

            # Print the loss values and time elapsed for every 20--50000 batches
            if (step % 50000 == 0 and step != 0) or (step == len(train_dataloader) - 1):
                # Calculate time elapsed for 20 batches
                time_elapsed = time.time() - t0_batch

                # Print training results
                print(f"{epoch_i + 1:^7} | {step:^7} | {batch_loss / batch_counts:^12.6f} | {'-':^10} | {'-':^9} | {time_elapsed:^9.2f}")

                # Reset batch tracking variables
                batch_loss, batch_counts = 0, 0
                t0_batch = time.time()

        # Calculate the average loss over the entire training data
        avg_train_loss = total_loss / len(train_dataloader)

        print("-"*70)
        # =======================================
        #               Evaluation
        # =======================================
        if evaluation == True:
            # After the completion of each training epoch, measure the model's performance
            # on our validation set.
            val_loss, val_accuracy, _ = evaluate(model, val_dataloader)

            # Print performance over the entire training data
            time_elapsed = time.time() - t0_epoch
            
            print(f"{epoch_i + 1:^7} | {'-':^7} | {avg_train_loss:^12.6f} | {val_loss:^10.6f} | {val_accuracy:^9.5f} | {time_elapsed:^9.2f}")
            print("-"*70)
            
            if val_accuracy > max_val_accuracy:
                max_val_accuracy = val_accuracy
                best_model = copy.deepcopy(model)
                torch.save(model.bert, cc_path(f'models/embedders/litcovid_pretrained_best_iter_meta_stopwords.pt'))
                not_improved = 0
            else:
                not_improved += 1
            
            if not_improved == 5:
                break
        print("\n")
    
    print("Training complete!")
    return best_model


import torch

def evaluate(model, val_dataloader):
    """After the completion of each training epoch, measure the model's performance
    on our validation set.
    """
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()

    # Tracking variables
    list_val_f1_micro = []
    list_val_f1_macro = []
    val_loss = []

    predictions = []
    all_labels = []

    # For each batch in our validation set...
    for batch in tqdm(val_dataloader):
        # Load batch to GPU
        b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)

        # Compute logits
        with torch.no_grad():
            logits = model(b_input_ids, b_attn_mask)

        # Compute loss
        loss = loss_fn(logits, b_labels.float())
        val_loss.append(loss.item())

        # Append batch predictions to the list
        predictions.append(logits)
        all_labels.append(b_labels)

    # Combine predictions for all batches into a single tensor
    predictions = torch.cat(predictions, dim=0)
    all_labels = torch.cat(all_labels, dim=0)

    # Calculate f1 scores
    f1_micro, f1_macro = accuracy_thresh(predictions.view(-1, 7), all_labels.view(-1, 7))

    # Compute the average loss over the validation set.
    val_loss = np.mean(val_loss)
    # Calculate the average f1 scores
    val_f1_micro = f1_micro.mean().item()
    val_f1_macro = f1_macro.mean().item()

    return val_loss, val_f1_micro, val_f1_macro


def accuracy_thresh(y_pred, y_true, thresh:float=0.5, sigmoid:bool=True):
    "Compute accuracy when `y_pred` and `y_true` are the same size."
    if sigmoid: 
        y_pred = y_pred.sigmoid()

    y_pred[(y_pred>thresh)] = 1
    y_pred[(y_pred<thresh)] = 0
    return f1_score(y_true.byte().cpu(), y_pred.cpu(), average='micro'), f1_score(y_true.byte().cpu(), y_pred.cpu(), average='macro')
    #return np.mean(((y_pred>thresh).float()==y_true.float()).float().cpu().numpy(), axis=1).sum()

In [24]:
set_seed(42)    # Set seed for reproducibility
bert_classifier, optimizer, scheduler = initialize_model(epochs=15)
best_model = train(bert_classifier, train_dataloader, val_dataloader, epochs=15, evaluation=True)

Some weights of the model checkpoint at scibert_scivocab_uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Start training...

 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


780it [05:44,  2.26it/s]


   1    |   779   |   0.156363   |     -      |     -     |  344.76  
----------------------------------------------------------------------


100%|██████████| 195/195 [00:56<00:00,  3.43it/s]


   1    |    -    |   0.156363   |  0.108553  |  0.89542  |  401.58  
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


780it [05:45,  2.26it/s]


   2    |   779   |   0.098123   |     -      |     -     |  345.16  
----------------------------------------------------------------------


100%|██████████| 195/195 [00:56<00:00,  3.43it/s]


   2    |    -    |   0.098123   |  0.101684  |  0.89923  |  401.97  
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


780it [05:45,  2.26it/s]


   3    |   779   |   0.084347   |     -      |     -     |  345.39  
----------------------------------------------------------------------


100%|██████████| 195/195 [00:56<00:00,  3.43it/s]


   3    |    -    |   0.084347   |  0.100348  |  0.90190  |  402.21  
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


780it [05:46,  2.25it/s]


   4    |   779   |   0.072762   |     -      |     -     |  346.63  
----------------------------------------------------------------------


100%|██████████| 195/195 [00:56<00:00,  3.43it/s]


   4    |    -    |   0.072762   |  0.102647  |  0.90229  |  403.55  
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


780it [05:45,  2.26it/s]


   5    |   779   |   0.059894   |     -      |     -     |  345.75  
----------------------------------------------------------------------


100%|██████████| 195/195 [00:56<00:00,  3.43it/s]


   5    |    -    |   0.059894   |  0.104822  |  0.89977  |  402.61  
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


780it [05:46,  2.25it/s]


   6    |   779   |   0.049583   |     -      |     -     |  346.51  
----------------------------------------------------------------------


100%|██████████| 195/195 [00:56<00:00,  3.43it/s]


   6    |    -    |   0.049583   |  0.112176  |  0.90181  |  403.33  
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


780it [05:46,  2.25it/s]


   7    |   779   |   0.039246   |     -      |     -     |  346.14  
----------------------------------------------------------------------


100%|██████████| 195/195 [00:56<00:00,  3.43it/s]


   7    |    -    |   0.039246   |  0.117805  |  0.90201  |  402.95  
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


780it [05:45,  2.26it/s]


   8    |   779   |   0.030517   |     -      |     -     |  345.74  
----------------------------------------------------------------------


100%|██████████| 195/195 [00:56<00:00,  3.42it/s]


   8    |    -    |   0.030517   |  0.126527  |  0.89854  |  402.72  
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


780it [05:45,  2.25it/s]


   9    |   779   |   0.023950   |     -      |     -     |  345.98  
----------------------------------------------------------------------


100%|██████████| 195/195 [00:56<00:00,  3.43it/s]

   9    |    -    |   0.023950   |  0.138084  |  0.89840  |  402.80  
----------------------------------------------------------------------
Training complete!





In [18]:
del bert_classifier

In [20]:
torch.save(best_model.bert, cc_path(f'models/embedders/litcovid_finetuned_bert_20e_3lay_meta.pt'))

# evaluate BERT model

In [31]:
 evaluate(best_model, test_dataloader)

100%|██████████| 78/78 [00:22<00:00,  3.53it/s]


(0.09495743330663596, 0.909576712136127, 0.8659726267674196)