In [None]:
import torch
import pandas as pd
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer

In [None]:
class Dataset:
    def __init__(self, text, tokenizer, max_len):
        self.text = text
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, item):
        text = str(self.text[item])
        inputs = self.tokenizer(
            text, 
            max_length=self.max_len, 
            padding="max_length", 
            truncation=True
        )

        ids = inputs["input_ids"]
        mask = inputs["attention_mask"]
        # token_type_ids = inputs["token_type_ids"]

        return {
            "input_ids": torch.tensor(ids, dtype=torch.long),
            "attention_mask": torch.tensor(mask, dtype=torch.long),
            #"token_type_ids": torch.tensor(token_type_ids, dtype=torch.long),
        }

In [None]:
def generate_predictions(model_path, max_len):
    model = AutoModelForSequenceClassification.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)

    model.to("cuda")
    model.eval()
    
    df = pd.read_csv("../input/nlp-getting-started/test.csv")
    
    dataset = Dataset(text=df.text.values, tokenizer=tokenizer, max_len=max_len)
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=32, num_workers=4, pin_memory=True, shuffle=False
    )

    final_output = []

    for b_idx, data in enumerate(data_loader):
        with torch.no_grad():
            for key, value in data.items():
                data[key] = value.to("cuda")
            output = model(**data)
            output = torch.nn.functional.softmax(output.logits, dim=1)
            output = output.detach().cpu().numpy()[:, 1]
            output = (output >= 0.6).astype(int).tolist()
            final_output.extend(output)
    
    torch.cuda.empty_cache()
    return np.array(final_output)

In [None]:
preds = generate_predictions("abhishek/autonlp-fred2-2682064", max_len=64)

In [None]:
sample = pd.read_csv("../input/nlp-getting-started/sample_submission.csv")
sample.target = preds
sample.to_csv("submission.csv", index=False)

In [None]:
sample.target.value_counts()