In [2]:
import os
import random
from tqdm.notebook import tqdm
from config import BaseConfig
from models.utils import load_pkl, load_json, save_pkl, save_json
import warnings
import numpy as np
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
from sklearn.metrics import classification_report

warnings.filterwarnings('ignore')
CONFIG = BaseConfig().get_args()
label_encoder = load_pkl(CONFIG.path_saved_le)

In [3]:
import torch
import transformers
from torch.utils.data import Dataset, DataLoader
from transformers import BertModel, BertTokenizer

In [4]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

print(f"device is: {device}")

device is: cuda


In [5]:
train_data = load_json(CONFIG.train_path)
test_data = load_json(CONFIG.test_path)
dev_data = load_json(CONFIG.dev_path)

train_rel = [[example['tail']['word'], example['head']['word'], example['relation']]
             for example in tqdm(train_data)]

dev_rel = [[example['tail']['word'], example['head']['word'], example['relation']]
           for example in tqdm(dev_data)]

test_rel = [[example['tail']['word'], example['head']['word'], example['relation']]
            for example in tqdm(test_data)]

  0%|          | 0/534277 [00:00<?, ?it/s]

  0%|          | 0/114506 [00:00<?, ?it/s]

  0%|          | 0/114565 [00:00<?, ?it/s]

In [6]:
# Defining some key variables that will be used later on in the training
MAX_LEN = 50
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 8
EPOCHS = 2
LEARNING_RATE = 1e-05

bert_path = 'assets/transformers/biobert-base-cased-v1.1'

tokenizer = BertTokenizer.from_pretrained(bert_path)

In [7]:
class BioRel(Dataset):
    def __init__(self, data, tokenizer, max_len, label_encoder):
        self.len = len(data)
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.label_encoder = label_encoder
        
    def __getitem__(self, index):
        tail = " ".join(self.data[index][0].split())
        head = " ".join(self.data[index][1].split())
        target = self.label_encoder.transform([self.data[index][2]])[0]
        
        inputs_tail = self.tokenizer.encode_plus(
            tail,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True,
            truncation=True
        )
        inputs_head = self.tokenizer.encode_plus(
            head,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True,
            truncation=True
        )
        ids = inputs_tail['input_ids'] + inputs_head['input_ids'] 
        mask = inputs_tail['attention_mask'] + inputs_head['attention_mask']

        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'targets': torch.tensor(target, dtype=torch.long)
        } 
    
    def __len__(self):
        return self.len

In [8]:
train_dataset = BioRel(train_rel, tokenizer, MAX_LEN, label_encoder)
dev_dataset = BioRel(dev_rel, tokenizer, MAX_LEN, label_encoder)
test_dataset = BioRel(test_rel, tokenizer, MAX_LEN, label_encoder)

In [9]:
train_params = {
    'batch_size': TRAIN_BATCH_SIZE,
    'shuffle': True,
    'num_workers': 0
}

val_params = {
    'batch_size': VALID_BATCH_SIZE,
    'shuffle': True,
    'num_workers': 0
}

test_params = { 
    'batch_size': VALID_BATCH_SIZE,
    'shuffle': False,
    'num_workers': 0
}


train_loader = DataLoader(train_dataset, **train_params)
dev_loader = DataLoader(dev_dataset, **val_params)
test_loader = DataLoader(test_dataset, **test_params)

In [10]:
class BioBERTClass(torch.nn.Module):
    def __init__(self, C, path):
        super(BioBERTClass, self).__init__()
        self.l1 = BertModel.from_pretrained(path)
        self.pre_classifier = torch.nn.Linear(768, 768)
        self.dropout = torch.nn.Dropout(0.3)
        self.classifier = torch.nn.Linear(768, C)

    def forward(self, input_ids, attention_mask):
        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask)
        hidden_state = output_1[0]
        pooler = hidden_state[:, 0]
        pooler = self.pre_classifier(pooler)
        pooler = torch.nn.ReLU()(pooler)
        pooler = self.dropout(pooler)
        output = self.classifier(pooler)
        return output

In [11]:
path_to_save_model = "assets/model/biobert-finetuned"

model = BioBERTClass(C=len(label_encoder.classes_), path=bert_path)

model_file = os.path.join(path_to_save_model, "pytorch_biobert_biorel.pth")

model.load_state_dict(torch.load(model_file,device))

model.to(device)

Some weights of the model checkpoint at assets/transformers/biobert-base-cased-v1.1 were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BioBERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
 

In [12]:
# Creating the loss function and optimizer
#loss_function = torch.nn.CrossEntropyLoss()
#optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)

In [13]:
# Function to calcuate the accuracy of the model
def calcuate_accu(big_idx, targets):
    n_correct = (big_idx==targets).sum().item()
    return n_correct

In [14]:
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

def evaluation_method(y_true, y_pred):
    f1 = f1_score(y_true, y_pred, average='macro')
    acc = accuracy_score(y_true, y_pred)
    pre = precision_score(y_true, y_pred, average='macro')
    rec = recall_score(y_true, y_pred, average='macro')
    return {
                "f1": f1, "accuracy": acc, 
                "precision": pre, "recall": rec, 
            }

In [15]:
def valid(model, loader, msg):
    model.eval()
    tr_loss = 0
    n_correct = 0
    nb_tr_steps = 0
    nb_tr_examples = 0
    n_correct = 0 
    n_wrong = 0
    total = 0
    y_predict = []
    y_true = []
    with torch.no_grad():
        for _, data in tqdm(enumerate(loader, 0)):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.long)
            outputs = model(ids, mask).squeeze()
            #loss = loss_function(outputs, targets)
            #tr_loss += loss.item()
            big_val, big_idx = torch.max(outputs.data, dim=1)
            n_correct += calcuate_accu(big_idx, targets)
            
            for y_pred in big_idx.cpu().numpy():
                y_predict.append(y_pred)
            for y_target in targets.cpu().numpy():
                y_true.append(y_target)
                
            nb_tr_steps += 1
            nb_tr_examples+=targets.size(0)
            
            if _%10000==0:
                #loss_step = tr_loss/nb_tr_steps
                accu_step = (n_correct*100)/nb_tr_examples
                #print(f"{msg} Loss per 10000 steps: {loss_step}")
                print(f"{msg} Accuracy per 10000 steps: {accu_step}")
                print("-------------------------------------")
    #epoch_loss = tr_loss/nb_tr_steps
    epoch_accu = (n_correct*100)/nb_tr_examples
    #print(f"{msg} Loss Epoch: {epoch_loss}")
    #print(f"{msg} Accuracy Epoch: {epoch_accu}")
    print("========================================================")
    print(f"Epoch Evaluations on {msg}: \n")
    print(evaluation_method(y_true, y_predict))
    print("========================================================")
    return y_true, y_predict

In [16]:
y_train_true, y_train_predict = valid(model, train_loader, "Training")

0it [00:00, ?it/s]

Training Accuracy per 10000 steps: 100.0
-------------------------------------
Training Accuracy per 10000 steps: 96.95280471952805
-------------------------------------
Training Accuracy per 10000 steps: 96.89890505474726
-------------------------------------
Training Accuracy per 10000 steps: 96.84052198260058
-------------------------------------
Training Accuracy per 10000 steps: 96.85851603709908
-------------------------------------
Training Accuracy per 10000 steps: 96.85756284874303
-------------------------------------
Training Accuracy per 10000 steps: 96.84484425259579
-------------------------------------
Epoch Evaluations on Training: 

{'f1': 0.9205682826334349, 'accuracy': 0.9682898571340335, 'precision': 0.9501299521560033, 'recall': 0.9020254775850373}


In [17]:
y_dev_true, y_dev_predict = valid(model, dev_loader, "Validation")

0it [00:00, ?it/s]

Validation Accuracy per 10000 steps: 87.5
-------------------------------------
Validation Accuracy per 10000 steps: 95.42170782921708
-------------------------------------
Epoch Evaluations on Validation: 

{'f1': 0.8823007078658177, 'accuracy': 0.9541683405236406, 'precision': 0.9304871479309936, 'recall': 0.8531706382100304}


In [18]:
y_test_true, y_test_predict = valid(model, test_loader, "Testing")

0it [00:00, ?it/s]

Testing Accuracy per 10000 steps: 100.0
-------------------------------------
Testing Accuracy per 10000 steps: 95.67793220677932
-------------------------------------
Epoch Evaluations on Testing: 

{'f1': 0.8850699612376427, 'accuracy': 0.9529001003796971, 'precision': 0.9269310941751902, 'recall': 0.8617277723995966}


In [19]:
def evaluation_method(y_true, y_pred):
    f1 = f1_score(y_true, y_pred, average='macro')
    acc = accuracy_score(y_true, y_pred)
    pre = precision_score(y_true, y_pred, average='macro')
    rec = recall_score(y_true, y_pred, average='macro')
    
    clf_report = classification_report(y_true, y_pred)
    return {
                "y-true": [int(l) for l in list(y_true)], "y-pred": [int(l) for l in list(y_pred)],
                "f1": f1, "accuracy": acc, 
                "precision": pre, "recall": rec, 
                "clf-report": clf_report
            }

In [20]:
biobert_results = {
    "train": evaluation_method(y_train_true, y_train_predict),
    "dev": evaluation_method(y_dev_true, y_dev_predict),
    "test": evaluation_method(y_test_true, y_test_predict)
}

print(f"TRAIN, F1-Score: {biobert_results['train']['f1']}, Accuracy: {biobert_results['train']['accuracy']}")
print(f"DEV, F1-Score: {biobert_results['dev']['f1']}, Accuracy: {biobert_results['dev']['accuracy']}")
print(f"TEST, F1-Score: {biobert_results['test']['f1']}, Accuracy: {biobert_results['test']['accuracy']}")

save_json(os.path.join("assets/predictions", "biobert-model.json"), biobert_results)

TRAIN, F1-Score: 0.9205682826334349, Accuracy: 0.9682898571340335
DEV, F1-Score: 0.8823007078658177, Accuracy: 0.9541683405236406
TEST, F1-Score: 0.8850699612376427, Accuracy: 0.9529001003796971
