In [1]:
import warnings
warnings.filterwarnings("ignore")

from transformers import BartForConditionalGeneration, AutoTokenizer
import pandas as pd
from pathlib import Path

from src.metrics import preds_time_tps_lacc
from src.inference_utils import GenerativeModel

In [2]:
CURDIR = Path.cwd()

DATADIR = CURDIR / "data" / "original"
assert DATADIR.exists()

MODELS_DIR = CURDIR / "models"
assert MODELS_DIR.exists()

MODEL_ID = MODELS_DIR / 'baseline'
assert MODEL_ID.exists()

In [3]:
df = pd.read_csv(DATADIR / "test.csv", index_col=0, sep="\t")

df_unknown = df[df["split"] == "unknown"]
df_holdout = df[df["split"] == "holdout"]

df.shape[0], df_holdout.shape[0], df_unknown.shape[0]

(153991, 138882, 15109)

In [4]:
model = BartForConditionalGeneration.from_pretrained(MODEL_ID).to("cuda").eval()
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

In [5]:
gen_model = GenerativeModel(model, tokenizer)

In [6]:
_, elapsed_time, tps, lacc = preds_time_tps_lacc(gen_model.predict, df.head(30000))
print(f"Elapsed time: {elapsed_time:.3f} seconds")
print(f"TPS: {tps:.3f}")
print(f"lAcc: {lacc:.3f}")
print()

_, elapsed_time, tps, lacc = preds_time_tps_lacc(gen_model.predict, df_holdout.head(30000))
print(f"Elapsed time: {elapsed_time:.3f} seconds")
print(f"TPS: {tps:.3f}")
print(f"lAcc: {lacc:.3f}")
print()

_, elapsed_time, tps, lacc = preds_time_tps_lacc(gen_model.predict, df_unknown)
print(f"Elapsed time: {elapsed_time:.3f} seconds")
print(f"TPS: {tps:.3f}")
print(f"lAcc: {lacc:.3f}")
print()

938it [01:36,  9.77it/s]                         


Elapsed time: 96.030 seconds
TPS: 312.403
lAcc: 0.976



938it [01:29, 10.48it/s]                         


Elapsed time: 89.509 seconds
TPS: 335.161
lAcc: 0.992



473it [00:55,  8.52it/s]                         

Elapsed time: 55.520 seconds
TPS: 272.135
lAcc: 0.938




