# Test a RoBERTa model on a hold-out dataset

In [1]:
import sys

sys.path.append("..")

In [2]:
import pandas as pd
from sklearn import metrics
from transformers import (
    RobertaForSequenceClassification,
    RobertaTokenizerFast,
    TextClassificationPipeline,
)

from adna.pylib import bpe_dataset
from adna.pylib import consts

In [3]:
MODEL_PATH = consts.MT_DIR / "finetune" / "checkpoint-65868"
# MODEL_PATH = consts.MT_DIR / "train" / "checkpoint-16"

## Build the tokenizer

In [4]:
tokenizer_path = str(consts.MT_DIR)
tokenizer = RobertaTokenizerFast.from_pretrained(tokenizer_path)

## Build the dataset

In [5]:
SEQS, LABELS = bpe_dataset.read_dataset("test", limit=10_000)

## Get the trained model

In [6]:
model = RobertaForSequenceClassification.from_pretrained(
    MODEL_PATH, local_files_only=True
)

## Build the inference pipeline

In [7]:
pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, batch_size=64)

## Test the model

In [8]:
y_true, y_pred = [], []

for pred, label in zip(pipe(SEQS), LABELS):
    y_true.append(label)
    pred = pred["label"]
    pred = int(pred[-1])
    y_pred.append(pred)

KeyboardInterrupt: 

## Display results

In [None]:
y_actual = pd.Series(y_true, name="Actual")
y_predicted = pd.Series(y_pred, name="Predicted")

print(pd.crosstab(y_actual, y_predicted))

In [None]:
metrics.precision_score(y_true, y_pred)

In [None]:
metrics.recall_score(y_true, y_pred)

In [None]:
metrics.f1_score(y_true, y_pred)

In [None]:
metrics.accuracy_score(y_true, y_pred)