In [None]:
import torch
from pathlib import Path
from tqdm import tqdm
from huggingface_hub import constants as hub_c

from evals import load_eval_dataset, get_tensors
from utils import get_dataset_args

assert torch.cuda.is_available(), "CUDA is not available"
device = torch.device("cuda")

## FinBERT-tone
From huggingface [yiyanghkust/finbert-tone](https://huggingface.co/yiyanghkust/finbert-tone)

In [2]:
from transformers import BertTokenizer, BertForSequenceClassification

model_id = 'yiyanghkust/finbert-tone'
tokenizer = BertTokenizer.from_pretrained(model_id)
model = BertForSequenceClassification.from_pretrained(model_id, num_labels=3).to(device)

labels = ["Neutral", "Positive", "Negative"]

finbert_args = get_dataset_args(tokenizer, Path(hub_c.HF_HUB_CACHE))
finbert_args.prompt_templates["FPB"] = "{0}"

In [3]:
def classifier_evaluate(model, testset, labels):
    correct = 0
    prog_bar = tqdm(testset)
    for i, example in enumerate(prog_bar):
        input_ids, attn_mask = get_tensors(example)

        out = model.forward(input_ids=input_ids.to(device),
                            attention_mask=attn_mask.to(device))
        pred = torch.argmax(out.logits.cpu())

        if example["options"][example["gold_index"]] == labels[pred]:
            correct += 1

        prog_bar.set_description(f"{100 * correct / (i+1):.2f}")
    
    return {
        "accuracy": correct / len(testset)
    }

In [None]:
dataset_id = "FPB"
testset = load_eval_dataset(tokenizer, dataset_id, finbert_args)

results = classifier_evaluate(model, testset, labels)
print(results)