In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from simpletransformers.ner import NERModel,NERArgs
import torch

In [5]:
class bert_training:
    
    def __init__(self,bio_data_path):
        self.data = pd.read_csv(bio_data_path,encoding="latin1" )
        self.data = self.data.replace(r'^\s*$', np.nan, regex=True)
        self.data = self.data.fillna(method ="ffill")
        self.data["Sentence #"] = LabelEncoder().fit_transform(self.data["Sentence #"])
        self.data.rename(columns={"Sentence #":"sentence_id","Word":"words","Tag":"labels"}, inplace =True)  
        X = self.data[["sentence_id","words"]]
        Y = self.data["labels"]
        x_train, x_test, y_train, y_test = train_test_split(X,Y, test_size =0.2)
        #building up train and test data
        self.train_data = pd.DataFrame({"sentence_id":x_train["sentence_id"],"words":x_train["words"],"labels":y_train})
        self.test_data = pd.DataFrame({"sentence_id":x_test["sentence_id"],"words":x_test["words"],"labels":y_test})
        self.label = self.data["labels"].unique().tolist()
        
    def give_Args(self,num_epochs,learning_rate,train_batch_size,eval_batch_size):
        args = NERArgs()
        args.num_train_epochs = num_epochs
        args.learning_rate = learning_rate
        args.overwrite_output_dir =True
        args.train_batch_size = train_batch_size
        args.eval_batch_size = eval_batch_size
        print("DOWNLOADING Model")
        self.model = NERModel('bert', 'bert-base-uncased',labels=self.label,args =args)
        print("TRAINING Begins")
        self.model.train_model(self.train_data,eval_data =self.test_data,acc=accuracy_score)
        print("TRAINING Ends")
        result, model_outputs, preds_list = self.model.eval_model(self.test_data)
        print(result) #after fine tuning on test data

    def save_model(self,path):
        torch.save(self.model,path)
        print("Model Saved at given ",path)

In [None]:
data_path = "/content/drive/MyDrive/2000_BIO_taggingdata_ALL_ROW_WISE.csv"
obj_name = bert_training(data_path) #DATA READ
obj_name.give_Args(2,1e-4,32,32)
obj_name.save_model("/content/drive/MyDrive/model_check")