In [76]:
import torch
from pathlib import Path

from transformers import AutoModelForSequenceClassification,AutoTokenizer

from data.q_and_a.train_and_eval import TrainAndEval
from data.q_and_a.eval_with_answers import EvalWithAnswers

from models_.building.llama_tokenizer import  load_tokenizer

from data.pubmed.from_json import FromJsonDataset
from data.pubmed.contents import ContentsDataset

from storage.faiss_ import FaissStorage

from rag.tokenization.llama import build_tokenizer_function
from rag.quering import build_querier
import os
from q_and_a.forward import build_enhanced_forwarder
from q_and_a.prompts import prompt
from q_and_a.picking.from_logits import build_from_logits
from q_and_a.eval import evaluate
from q_and_a.forward import build_forwarder

train = TrainAndEval("../../data/pubmed_QA_train.json")
evaluationData = TrainAndEval("../../data/pubmed_QA_eval.json")
evaluateWithAnswers = EvalWithAnswers(evaluationData)

augmented_data = FromJsonDataset(json_file="../../data/pubmed_500K.json")
augmented_data = ContentsDataset(augmented_data)

from huggingface_hub import notebook_login
notebook_login()

storage = FaissStorage(
    dimension=800,
)

storage.load("../../outputs/store/pubmed_500K.index")

tokenizer = load_tokenizer()
tokenizer_fn = build_tokenizer_function(tokenizer)

querier = build_querier(storage, augmented_data, tokenizer_fn)
storage = FaissStorage(
    dimension=800,
)


device = "cuda" if torch.cuda.is_available() else "cpu"
checkpoint_path = Path("/workspace/pytorch_training/10_rag/notebooks/train/last-checkpoint/model")

adapted_tokenizer = AutoTokenizer.from_pretrained("/workspace/pytorch_training/10_rag/notebooks/train/last-checkpoint/tokenizer")

from peft import PeftModel

base_model = AutoModelForSequenceClassification.from_pretrained(checkpoint_path,
    load_in_8bit=True,
    pad_token_id=adapted_tokenizer.pad_token_id,
    num_labels=4,
)

base_model

model = PeftModel.from_pretrained(base_model, "/workspace/pytorch_training/10_rag/notebooks/train/last-checkpoint/trainer").to("cuda")


model.eval()

forward = build_forwarder(
    model,
    adapted_tokenizer,
    querier,
    k_augmentations=1,
    prompt_builder=prompt,
    device=device,
)

forward_and_get_arg_max = lambda question, options: forward(
    question,
    options=options,
)

def pick_from_classifier(out):
    return torch.argmax(out.logits[0])

accuracy = evaluate(
    forward_fn=forward_and_get_arg_max,
    picker_fn=pick_from_classifier,
    eval_dataset=evaluateWithAnswers,
)

print(f"Accuracy: {accuracy:.2f}")



VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Right answer: 1, picked: 2
Accuracy at 0: 0.00
Right answer: 1, picked: 2
Right answer: 3, picked: 2
Right answer: 3, picked: 2
Right answer: 2, picked: 2
Right answer: 2, picked: 2
Right answer: 2, picked: 3
Right answer: 3, picked: 2
Right answer: 3, picked: 2
Right answer: 1, picked: 3
Right answer: 0, picked: 2
Accuracy at 100: 0.24
Right answer: 3, picked: 2


KeyboardInterrupt: 