# Common imports


In [1]:
import torch

#DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
DEVICE = torch.device("cpu")

# Text Transformations

# Dataset

In [2]:
import pandas as pd
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchtext.transforms as T
from torch.hub import load_state_dict_from_url
import torch.nn as nn

class Custom_IMDB(Dataset):
    def __init__(self, data_split_type, data_path='../data/IMDB Dataset.csv'):
        
        self.data = self.get_data(data_split_type, data_path)

        padding_idx = 1
        bos_idx = 0
        eos_idx = 2
        max_seq_len = 512
        xlmr_vocab_path = r"https://download.pytorch.org/models/text/xlmr.vocab.pt"
        xlmr_spm_model_path = r"https://download.pytorch.org/models/text/xlmr.sentencepiece.bpe.model"

        self.text_transform = T.Sequential(
            T.SentencePieceTokenizer(xlmr_spm_model_path),
            T.VocabTransform(load_state_dict_from_url(xlmr_vocab_path)),
            T.Truncate(max_seq_len - 2),
            T.AddToken(token=bos_idx, begin=True),
            T.AddToken(token=eos_idx, begin=False),
            T.ToTensor()
        )

    def label_to_index(self, index):
        label = self.data['sentiment'][index]
        label_to_index = {"positive" : [1, 0] , "negative" : [0, 1]}

        return label_to_index[label]
    
    def __len__(self):
        return len(self.data.index)

    def __getitem__(self, index):
        review_text = self.data['review'][index]
        transformed_text = self.text_transform(review_text)

        label = self.label_to_index(index)

        return {'text': transformed_text, 'label': label}
    
    def get_data(self, data_split_type, data_path):
        data = pd.read_csv(data_path)
        self.train_slice = int(len(data)*0.7)
        self.val_slice = self.train_slice + int(len(data)*0.2)
        #self.test_slice = val_slice + int(len(data)*0.1)

        if data_split_type == 'train':
            return data[:self.train_slice].reset_index()
        elif data_split_type == 'val':
            return data[self.train_slice:self.val_slice].reset_index()
        elif data_split_type == 'test':
            return data[self.val_slice:].reset_index()
        else:
            print('Error!!')



# Data Preparations

In [3]:
#from torchtext.datasets import IMDB

batch_size = 10

train_datapipe = Custom_IMDB(data_split_type="train")
val_datapipe = Custom_IMDB(data_split_type="val")
test_datapipe = Custom_IMDB(data_split_type="test")


# Transform the raw dataset using non-batched API (i.e apply transformation line by line)
# def apply_transform(x):
#     return text_transform(x[0]), x[1]


def collate_fn(batch):
        xs = [b['text'] for b in batch]
        ys = [b['label'] for b in batch]
        ls = [b['text'].shape[0] for b in batch]
        padded = nn.utils.rnn.pad_sequence(xs, batch_first=True, padding_value=0)
        return {'seq' : padded, 'seq_len' : ls, 'labels' : torch.tensor(ys)}

# train_datapipe = train_datapipe.map(apply_transform)
# train_datapipe = train_datapipe.batch(batch_size)
# train_datapipe = train_datapipe.rows2columnar(["token_ids", "target"])
train_dataloader = DataLoader(train_datapipe, batch_size=batch_size, collate_fn=collate_fn)

# dev_datapipe = dev_datapipe.map(apply_transform)
# dev_datapipe = dev_datapipe.batch(batch_size)
# dev_datapipe = dev_datapipe.rows2columnar(["token_ids", "target"])
val_dataloader = DataLoader(val_datapipe, batch_size=batch_size, collate_fn=collate_fn)

test_dataloader = DataLoader(test_datapipe, batch_size=batch_size, collate_fn=collate_fn)

# Model Preparation

In [4]:
num_classes = 2
input_dim = 768

from torchtext.models import RobertaClassificationHead, XLMR_BASE_ENCODER

classifier_head = RobertaClassificationHead(num_classes=num_classes, input_dim=input_dim)
model = XLMR_BASE_ENCODER.get_model(head=classifier_head)
model.to(DEVICE)

RobertaModel(
  (encoder): RobertaEncoder(
    (transformer): TransformerEncoder(
      (token_embedding): Embedding(250002, 768, padding_idx=1)
      (layers): TransformerEncoder(
        (layers): ModuleList(
          (0-11): 12 x TransformerEncoderLayer(
            (self_attn): MultiheadAttention(
              (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
            )
            (linear1): Linear(in_features=768, out_features=3072, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
            (linear2): Linear(in_features=3072, out_features=768, bias=True)
            (norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (dropout1): Dropout(p=0.1, inplace=False)
            (dropout2): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (positional_embedding): PositionalEmbedding(
        (embedding): Embedding(5

# Training methods

In [8]:
import torchtext.functional as F
from torch.optim import AdamW

learning_rate = 1e-5
optim = AdamW(model.parameters(), lr=learning_rate)
criteria = nn.CrossEntropyLoss()


def train_step(input, target):
    output = model(input)
    for label in target:
        loss = criteria(output[1], label)
        optim.zero_grad()
        loss.backward(retain_graph=True)
        optim.step()


def eval_step(input, target):
    output = model(input)
    loss = criteria(output, target).item()
    return float(loss), (output.argmax(1) == target).type(torch.float).sum().item()


def evaluate():
    model.eval()
    total_loss = 0
    correct_predictions = 0
    total_predictions = 0
    counter = 0
    with torch.no_grad():
        for batch in val_dataloader:
            #input = F.to_tensor(batch["token_ids"], padding_value=padding_idx).to(DEVICE)
            input = batch['text'].to(DEVICE)
            target = torch.tensor(batch['label']).float().to(DEVICE)
            loss, predictions = eval_step(input, target)
            total_loss += loss
            correct_predictions += predictions
            total_predictions += len(target)
            counter += 1

    return total_loss / counter, correct_predictions / total_predictions

# Train

In [9]:
num_epochs = 1

for e in range(num_epochs):
    for index, batch in enumerate(train_dataloader):
        #print('index ', index, end=' \r')
        #input = F.to_tensor(batch[0], padding_value=padding_idx).to(DEVICE)
        input = batch['seq'].to(DEVICE)
        target = torch.tensor(batch['labels']).float().to(DEVICE)
        train_step(input, target)

    loss, accuracy = evaluate()
    print("Epoch = [{}], loss = [{}], accuracy = [{}]".format(e, loss, accuracy))

  target = torch.tensor(batch['labels']).float().to(DEVICE)


RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [768, 2]], which is output 0 of AsStridedBackward0, is at version 8; expected version 6 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).