# Test a RoBERTa model on a hold-out dataset

In [1]:
import sys

sys.path.append("..")

In [2]:
import pandas as pd
from sklearn import metrics
from tqdm import tqdm
from transformers import (
    RobertaForSequenceClassification,
    RobertaTokenizerFast,
    TextClassificationPipeline,
)

from adna.pylib import adna_dataset
from adna.pylib import consts

In [3]:
MODEL_PATH = consts.MT_DIR / "finetune" / "checkpoint-5986"
# MODEL_PATH = consts.MT_DIR / "train" / "checkpoint-16"

## Build the tokenizer

In [4]:
tokenizer_path = str(consts.MT_DIR)
tokenizer = RobertaTokenizerFast.from_pretrained(tokenizer_path)

## Build the dataset

In [5]:
SEQS, LABELS = adna_dataset.read_dataset("test", limit=10_000)

## Get the trained model

In [6]:
model = RobertaForSequenceClassification.from_pretrained(
    MODEL_PATH, local_files_only=True
)

## Build the inference pipeline

In [7]:
pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer)

In [8]:
pipe(SEQS[0][0])

[{'label': 'LABEL_1', 'score': 0.9427714943885803}]

## Test the model

In [9]:
y_true, y_pred = [], []
for seq, label in tqdm(zip(SEQS, LABELS)):
    y_true.append(label)
    pred = pipe(seq)
    pred = pred[0]["label"]
    pred = int(pred[-1])
    y_pred.append(pred)

10000it [03:34, 46.56it/s]


In [10]:
y_actual = pd.Series(y_true, name="Actual")
y_predicted = pd.Series(y_pred, name="Predicted")

print(pd.crosstab(y_actual, y_predicted))

Predicted     0     1
Actual               
0          7830   496
1            74  1600


In [11]:
metrics.precision_score(y_true, y_pred)

0.7633587786259542

In [12]:
metrics.recall_score(y_true, y_pred)

0.955794504181601

In [13]:
metrics.f1_score(y_true, y_pred)

0.8488063660477455

In [14]:
metrics.accuracy_score(y_true, y_pred)

0.943