In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import pandas as pd
from peft import PeftModel, PeftModelForCausalLM

In [142]:
model_name_or_path = "meta-llama/Llama-3.2-1B"
# model_name_or_path = "Llama-3.2-1B-abstract"
device = "cuda" if torch.cuda.is_available() else "cpu"
model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    device_map=device,
)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
tokenizer.pad_token = "<|finetune_right_pad_id|>"                                     

In [116]:
model_name_or_path = "meta-llama/Llama-3.2-1B"
peft_model_id = "Llama-3.2-1B-abstract-4bit"
device = "cuda" if torch.cuda.is_available() else "cpu"
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
tokenizer.pad_token = "<|finetune_right_pad_id|>"  
model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path, device_map=device, quantization_config=quantization_config
)
model = PeftModelForCausalLM.from_pretrained(model, peft_model_id)

In [143]:
model.device

device(type='cuda', index=0)

In [37]:
df = pd.read_csv("simple_med_eval.csv")
correct_ans_mapping = {
    0: "A",
    1: "B",
    2: "C",
    3: "D"
}
df

Unnamed: 0,question,ans0,ans1,ans2,ans3,label
0,What is the gold standard method for detecting...,ELISA,RT-PCR,Western,Flowcytometry,1
1,Which neurotransmitter system is primarily ass...,Dopaminergic,Serotonergic,Opioid,Cholinergic,2
2,What is the primary embryonic origin of microg...,Liver,Yolk Sac,Bone Marrow,Thymus,1
3,Which pediatric vasculitis is classically asso...,Kawasaki,Takayasu,Behçet,Sarcoidosis,0
4,Which immune cell type is primarily responsibl...,Bcells,CD4,CD8,NKcells,2


In [144]:
question_idx = 0
question = df.iloc[question_idx]["question"]
ans0 = df.iloc[question_idx]["ans0"]
ans1 = df.iloc[question_idx]["ans1"]
ans2 = df.iloc[question_idx]["ans2"]
ans3 = df.iloc[question_idx]["ans3"]
correct_ans = df.iloc[question_idx]["label"]
prompt = f"""
Medicine and Health
Multiple choice questions
Solution set:
Question: Which type of model does the DUST framework use for virtual staining in histopathology?
A: GANs
B: Diffusion
C: Transformers
D: CNNs
Answer: B
Question: Which organization provides national-level hospital accreditation in India?
A: WHO
B: KASH
C: NABH
D: CDC
Answer: C
Question: {question}
A: {ans0}
B: {ans1}
C: {ans2}
D: {ans3}
Answer:"""

In [145]:
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(device)

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_length=150,
        do_sample=True,
        top_k=10,
        temperature=0.01,
    )
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# print(f"Response: {response[len(prompt):]}")
print(f"Response: {response}")
print("--------------------")
print(f"Correct Answer: {correct_ans_mapping[correct_ans]}")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Response: 
Medicine and Health
Multiple choice questions
Solution set:
Question: Which type of model does the DUST framework use for virtual staining in histopathology?
A: GANs
B: Diffusion
C: Transformers
D: CNNs
Answer: B
Question: Which organization provides national-level hospital accreditation in India?
A: WHO
B: KASH
C: NABH
D: CDC
Answer: C
Question: What is the gold standard method for detecting SARS-CoV-2 in clinical samples?
A: ELISA
B: RT-PCR
C: Western
D: Flowcytometry
Answer: B
Question: Which of the following is the most common cause
--------------------
Correct Answer: B
