In [1]:
import torch
import torch.nn as nn
import random

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
MODEL_NAME = 'roberta-base'
random.seed(a=42)

In [4]:
from transformers import RobertaModel, AutoTokenizer

In [5]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [6]:
from torch.utils.data import DataLoader, Dataset

In [7]:
class NLIDataset(Dataset):
    def __init__(self, data):
        self.encodings = tokenizer(list(data['premise']), list(data['hypothesis']), padding=True, truncation=True, return_tensors="pt", max_length=MAX_LENGTH)

    def __len__(self):
        return self.encodings["input_ids"].shape[0]

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        return item

In [8]:
class DNNTransformerModel(nn.Module):
    def __init__(self, model_name, num_labels):
        super(DNNTransformerModel, self).__init__()
        self.transformer = RobertaModel.from_pretrained(model_name)
        self.dnn = nn.Sequential(
            nn.Linear(self.transformer.config.hidden_size, 512),
            nn.LayerNorm(512),
            nn.GELU(),
            nn.Dropout(0.2),
            nn.Linear(512, 256),
            nn.LayerNorm(256),
            nn.GELU(),
            nn.Dropout(0.2),
            nn.Linear(256, num_labels)
        )

    def forward(self, input_ids, attention_mask):
        outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        hidden_state = outputs.last_hidden_state[:, 0, :]
        logits = self.dnn(hidden_state)
        return logits

In [9]:
modelPath = input("Input the path to the model with extension")

Input the path to the model with extension /kaggle/input/nli_roberta_model/pytorch/default/1/nli_dnn_transformer.pth


In [10]:
model = DNNTransformerModel(MODEL_NAME, num_labels=2)
model.load_state_dict(torch.load(modelPath))
model = model.to(DEVICE)
model.eval()

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  model.load_state_dict(torch.load(modelPath))


DNNTransformerModel(
  (transformer): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (Lay

In [11]:
documentPath = input("Input the path to the document")

Input the path to the document /kaggle/input/testss/test.csv


In [13]:
import pandas as pd

In [14]:
document = pd.read_csv(documentPath)

In [17]:
BATCH_SIZE = 16
MAX_LENGTH = 256

In [18]:
doc_dataset = NLIDataset(document)
doc_loader = DataLoader(doc_dataset, batch_size=BATCH_SIZE)

In [20]:
predictions = []
with torch.no_grad():
    for batch in doc_loader:
        input_ids = batch['input_ids'].to(DEVICE)
        attention_mask = batch['attention_mask'].to(DEVICE)
        
        logits = model(input_ids, attention_mask)
        predictions.extend(torch.argmax(logits, dim=1).cpu().numpy())

In [21]:
results_df = pd.DataFrame({'predictions': predictions})
results_df.to_csv('output.csv.predict', index=False)