In [None]:
!pip install transformers
!pip install sentencepiece

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/fd/1a/41c644c963249fd7f3836d926afa1e3f1cc234a1c40d80c5f03ad8f6f1b2/transformers-4.8.2-py3-none-any.whl (2.5MB)
[K     |████████████████████████████████| 2.5MB 8.1MB/s 
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/d4/e2/df3543e8ffdab68f5acc73f613de9c2b155ac47f162e725dcac87c521c11/tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3MB)
[K     |████████████████████████████████| 3.3MB 38.8MB/s 
[?25hCollecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)
[K     |████████████████████████████████| 901kB 48.5MB/s 
Collecting huggingface-hub==0.0.12
  Downloading https://files.pythonhosted.org/packages/2f/ee/97e253668fda9b17e968b3f97b2f8e53aa0127e8807d24a547

In [None]:
import transformers
import pandas as pd
import numpy as np
import transformers
from sklearn import model_selection
from tqdm import trange
import torch

EPOCH = 5
MAX_LEN = 128
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODEL_BASE_PATH = "roberta-base"
HIDDEN_SIZE = 512
NUMBER_OF_LABEL = 30
HIDDEN_DROPOUT_PROB = 0.3
TRAIN_BATCH_SIZE = 16
VALID_BATCH_SIZE = 8
TOKENIZER = transformers.RobertaTokenizer.from_pretrained(MODEL_BASE_PATH,do_lower_case = True)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=898823.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1355863.0, style=ProgressStyle(descript…




In [None]:

class GoogleQnADataset(torch.nn.Module):
    def __init__(self,qTitle,qBody,answer,targets,TOKENIZER,MAX_LEN):
      super(GoogleQnADataset,self).__init__()
      self.qTitle = qTitle
      self.qBody = qBody
      self.answer = answer
      self.targets = targets
      self.tokenizer = TOKENIZER
      self.max_len = MAX_LEN

    def __len__(self):
        return len(self.qTitle)

    def __getitem__(self,item):
        question_title = str(self.qTitle[item])
        question_body = str(self.qBody[item])
        answer = str(self.answer[item])

        # [CLS] [Q-TITLE] [Q-BODY] [SEP] [ANSWER] [SEP]

        inputs = self.tokenizer.encode_plus(
            question_title + " " + question_body,
            answer,
            add_special_tokens = True,
            max_length = self.max_len,
            truncation = True,
            padding = True,
            stride = 16
            )

        ids = inputs["input_ids"]
        masks = inputs["attention_mask"]


        padding_length = self.max_len - len(ids)
        ids = ids + ([0] * padding_length)
        masks = masks + ([0] * padding_length)

        return {
            "ids" : torch.tensor(ids,dtype = torch.long),
            "masks" : torch.tensor(masks,dtype = torch.long),
            "targets" : torch.tensor(self.targets[item, :],dtype = torch.float)
        }

In [None]:
class ROBERTAModel(torch.nn.Module):
    def __init__(self,conf):
        super(ROBERTAModel,self).__init__()
        self.conf = conf
        self.roberta = transformers.RobertaModel(self.conf,add_pooling_layer = False)
        self.dense = torch.nn.Linear(768,768)
        self.dropout = torch.nn.Dropout(0.3)
        self.classifier = torch.nn.Linear(768,30)

    def forward(self,input_ids,attention_mask):
        output,_ = self.roberta(input_ids,attention_mask,return_dict = False)
        output = output[:,0,:]
        output = self.dropout(output)
        output = self.dense(output)
        output = torch.tanh(output)
        output = self.dropout(output)
        output = self.classifier(output)
        return output


In [None]:
def Loss(output,target):
    return torch.nn.BCEWithLogitsLoss()(output,target)
    


def Train(DataLoader,Model,Optimizer,Device,Scheduler = None):
    Model.train()

    for index,batch in enumerate(DataLoader):
        ids = batch["ids"]
        masks = batch["masks"]
        target = batch["targets"]

        ids = ids.to(Device,dtype = torch.long)
        masks = masks.to(Device,dtype = torch.long)
        target = target.to(Device,dtype = torch.float)

        Optimizer.zero_grad()

        output = Model(
            input_ids = ids,
            attention_mask = masks
        )
        loss = Loss(output,target)
        loss.backward()

        # Clip the norm of the gradients to 1.0.
        # This is to help prevent the "exploding gradients" problem.
        torch.nn.utils.clip_grad_norm_(Model.parameters(), 1.0)

        Optimizer.step()

        if Scheduler is not None:
            Scheduler.step()

        if index % 10 == 0:
            print(f"Index {index} >>>====================>>> Train Loss {loss}")



def Eval(DataLoader,Model,Device):
    Model.eval()
    final_outputs = []
    final_targets = []

    for index,batch in enumerate(DataLoader):

        ids = batch["ids"]
        masks = batch["masks"]
        target = batch["targets"]


        ids = ids.to(Device,dtype = torch.long)
        masks = masks.to(Device,dtype = torch.long)
        target = target.to(Device,dtype = torch.float)


        output = Model(
            input_ids = ids,
            attention_mask = masks
        )

        loss = Loss(output,target)

        final_targets.extend(target.cpu().detach().numpy().tolist())
        final_outputs.extend(torch.sigmoid(output).cpu().detach().numpy().tolist())

        if index % 10 == 0:
            print(f"Index : {index} >>>====================>>> Valid Loss : {loss}")
    
    return loss,np.vstack(final_targets), np.vstack(final_outputs)

        

In [None]:
def optimizer_params(Model):
    param_optimizer = list(Model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0},
    ]
    return optimizer_parameters   


In [None]:

def train():
    # Loading the Data

    df1 = pd.read_csv("/content/drive/MyDrive/Neuron/QnA/train.csv")
    df2 = pd.read_csv("/content/drive/MyDrive/Neuron/QnA/sample_submission.csv")
    # defining the target columns
    target_cols = df2.columns[1:]

    # Spliting the data into Train and Validation

    df_train ,df_valid = model_selection.train_test_split(
        df1,
        test_size = 0.2,
        random_state = 2021,
        )


    # Creating Training and Validation Dataset

    Train_Dataset = GoogleQnADataset(
        qTitle = df_train.question_title.values,
        qBody = df_train.question_body.values,
        answer = df_train.answer.values,
        targets = df_train[target_cols].values,
        TOKENIZER = TOKENIZER,
        MAX_LEN = 128)


    Valid_Dataset = GoogleQnADataset(
        qTitle = df_valid.question_title.values,
        qBody = df_valid.question_body.values,
        answer = df_valid.answer.values,
        targets = df_valid[target_cols].values,
        TOKENIZER = TOKENIZER,
        MAX_LEN = 128)

    # Initilization of Train DataLoader and Validation DataLoader

    Train_DataLoader = torch.utils.data.DataLoader(
        Train_Dataset,
        batch_size =  TRAIN_BATCH_SIZE,
        sampler = torch.utils.data.RandomSampler(Train_Dataset)
    )

    Valid_DataLoader = torch.utils.data.DataLoader(
        Valid_Dataset,
        batch_size = VALID_BATCH_SIZE,
        sampler = torch.utils.data.SequentialSampler(Valid_Dataset)
    )

    # Initilizing the Model
    config = transformers.RobertaConfig.from_pretrained(MODEL_BASE_PATH)
    model = ROBERTAModel(
        conf = config
        )
    model.to(DEVICE)
    
    # Initilizing the optimizer 

    optimizer_grouped_parameters = optimizer_params(Model = model)

    optimizer = transformers.AdamW(
        optimizer_grouped_parameters, 
        lr = 3e-5,
        correct_bias = True)

    # Initilizing the Scheduler

    total_steps = int(len(df1) / TRAIN_BATCH_SIZE * EPOCH)

    scheduler = transformers.get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)


    for epoch in trange(EPOCH, desc = "EPOCH"):
        Train(
            DataLoader = Train_DataLoader,
            Model = model, 
            Optimizer = optimizer,
            Device=DEVICE,
            Scheduler=scheduler
        )

        valid_loss,targets,output = Eval(
            DataLoader = Valid_DataLoader,
            Model=model,
            Device = DEVICE
        )

    best_loss = np.inf
    if valid_loss < best_loss:
        torch.save(model.state_dict(),"model.bin")
        valid_loss = best_loss




In [None]:
train()


EPOCH:   0%|          | 0/5 [00:00<?, ?it/s][A




EPOCH:  20%|██        | 1/5 [02:16<09:07, 136.79s/it][A




EPOCH:  40%|████      | 2/5 [04:34<06:51, 137.16s/it][A




EPOCH:  60%|██████    | 3/5 [06:54<04:35, 137.95s/it][A




EPOCH:  80%|████████  | 4/5 [09:15<02:18, 138.74s/it][A




EPOCH: 100%|██████████| 5/5 [11:35<00:00, 139.15s/it]




