In [None]:
!pip install transformers[torch] -U
!pip install wandb -qU
! pip install sklearn

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score
from transformers import AutoModelForSequenceClassification, Trainer, BertTokenizer
import torch

In [None]:
def read_data(input):

  inputs = []
  labels = []

  with open(input,'r',encoding='utf-8') as f:
    for line in f:
      label = line.split()[-1]
      input = ' '.join(line.split()[:-1])

      inputs.append(input)
      labels.append(label)

    return inputs, labels


def change_labels(labels):
  converted_labels = []

  for label in labels:
    if label == 'OFF':
      converted_labels.append(0)
    if label == 'NOT':
      converted_labels.append(1)

  return converted_labels

class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        if self.labels:
            item["labels"] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])

In [None]:
X_test, Y_test = read_data('data/OLID/test_preprocessed.tsv')

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [None]:
X_test_tokenized = tokenizer(X_test, padding=True, truncation=True, max_length=512)
Y_test_converted = change_labels(Y_test)
test_dataset = Dataset(X_test_tokenized)

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("models/name")
trainer = Trainer(model=model)

In [None]:
preds = trainer.predict(test_dataset)

In [None]:
import torch
import torch.nn.functional as F

In [None]:
real_preds = []

for pred in preds[0]:
  bigmoid = F.softmax(torch.tensor(pred), dim=0).tolist()
  if bigmoid[1] > bigmoid[0]:
    real_preds.append(1)
  else:
    real_preds.append(0)

In [None]:
for item in zip(real_preds,Y_test_converted):
  print(item)

In [None]:
precision = round(precision_score(Y_test_converted, real_preds, average='macro'), 3)
recall = round(recall_score(Y_test_converted, real_preds, average='macro'), 3)
f1 = round(f1_score(Y_test_converted, real_preds, average='macro'), 3)

In [None]:
print('Macro-averaged precision: ', precision)
print('Macro-averaged recall: ', recall)
print('Macro-averaged f1: ', f1)