In [1]:
from datasets import load_from_disk
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
from datasets import load_from_disk
from eval.cola import evaluate, generate_answer
import torch
from datasets import load_dataset, load_from_disk
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, matthews_corrcoef
from tqdm import tqdm
from eval.cola import evaluate

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from utils.model import load_peft_model
MODEL_ID = "mistralai/Mistral-7B-v0.1"
GLUECOLA_ID = "predibase/glue_cola"
HELLASWAG_ID = "predibase/hellaswag_processed"

adapter_ids = {
    "gluecola": GLUECOLA_ID,
    "hellaswag": HELLASWAG_ID,
}

model, tokenizer = load_peft_model(
    model_id=MODEL_ID,
    adapter_ids=adapter_ids,
    device_map="auto",
    combination_type="linear",
)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.


In [None]:
# Load the base model and tokenizer
MODEL_ID = "mistralai/Mistral-7B-v0.1"
ADAPTER_NAME = "gluecola_hellaswag_avg_svd"
WEIGHTS_PATH = f"weights/{ADAPTER_NAME}/gluecola_hellaswag_avg"

# For evaluation tasks, AutoModelForSequenceClassification is more appropriate
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

# Make sure padding token is set
tokenizer.pad_token = tokenizer.eos_token    
tokenizer.pad_token_id = tokenizer.eos_token_id

In [None]:
adapted_model = PeftModel.from_pretrained(
    base_model,
    WEIGHTS_PATH,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

In [4]:
dataset = load_from_disk("data/blimp_adjunct_anaphor_refined")
dataset['sentence'][0]

"Tara hasn't cured themselves."

In [10]:
prompt_template = (
    "Is the following sentence grammatically correct?\n"
    "Sentence: \"{sentence}\"\n"
    "Answer:"
)

all_preds = []
all_labels = []

for example in tqdm(dataset):
    sentence = example["sentence"]
    label = example["label"]
    
    prompt = prompt_template.format(sentence=sentence)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=3)
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True).lower()
    print(response)
    print(label)
    # Crude but effective classification
    if "yes" in response:
        pred = 1
    elif "no" in response:
        pred = 0
    else:
        pred = 0  # default fallback

    all_preds.append(pred)
    all_labels.append(label)
    if len(all_preds) == 10:
        break
    
accuracy = accuracy_score(all_labels, all_preds)
print(f"Accuracy (CoLA): {accuracy:.4f}")

  0%|          | 0/6000 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  0%|          | 1/6000 [00:01<2:54:29,  1.75s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


is the following sentence grammatically correct?
sentence: "tara hasn't cured themselves."
answer: yes, it
0


  0%|          | 2/6000 [00:03<2:55:05,  1.75s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


is the following sentence grammatically correct?
sentence: "this government alarms themselves."
answer: yes, it
0


  0%|          | 3/6000 [00:05<2:55:42,  1.76s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


is the following sentence grammatically correct?
sentence: "who had every actress hidden before scaring jessica?"
answer: yes, it
1


  0%|          | 4/6000 [00:07<2:55:27,  1.76s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


is the following sentence grammatically correct?
sentence: "barbara isn't escaping from himself."
answer: no, it
0


  0%|          | 5/6000 [00:08<2:55:15,  1.75s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


is the following sentence grammatically correct?
sentence: "edward had distracted themselves."
answer: no, it
0


  0%|          | 6/6000 [00:10<2:54:55,  1.75s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


is the following sentence grammatically correct?
sentence: "the lutherans found itself."
answer: no, it
0


  0%|          | 7/6000 [00:12<2:54:58,  1.75s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


is the following sentence grammatically correct?
sentence: "patricia scared himself."
answer: yes, it
0


  0%|          | 8/6000 [00:14<2:55:28,  1.76s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


is the following sentence grammatically correct?
sentence: "kevin doesn't care for himself."
answer: yes, it
1


  0%|          | 9/6000 [00:15<2:54:53,  1.75s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


is the following sentence grammatically correct?
sentence: "who can this cart worry after upsetting waiters?"
answer: yes, it
1


  0%|          | 9/6000 [00:17<3:14:29,  1.95s/it]

is the following sentence grammatically correct?
sentence: "what was debra breaking after cleaning some forks?"
answer: "she was
1
Accuracy (CoLA): 0.6000





In [5]:
evaluate(
    model=model,
    tokenizer=tokenizer,
    dataset=dataset,
    batch_size=128,
    )

  0%|          | 0/6000 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  0%|          | 1/6000 [01:50<184:45:58, 110.88s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  0%|          | 1/6000 [02:23<238:21:43, 143.04s/it]


KeyboardInterrupt: 

In [6]:
# sentence = dataset["sentence"][0]
# prompt = (
#             'Determine if the sentence below is syntactically and semantically correct. If it is syntactically and semantically correct, respond "1". Otherwise, respond "0". '
#             f"\nSentence: {sentence}\n"
#             "Answer:"
#         )
# inputs = tokenizer(
#             prompt, 
#             return_tensors="pt", 
#             padding=True,
#             truncation=True,
#         ).to(model.device)
# with torch.no_grad():
#             outputs = model.generate(
#                 **inputs,
#                 max_new_tokens=256,
#                 temperature=0.2,
#                 do_sample=True,
#                 pad_token_id=tokenizer.eos_token_id  # Explicitly set pad_token_id'
#             )
            
# output = tokenizer.decode(outputs[0], skip_special_tokens=True)
# print(output)
