In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, DataCollatorWithPadding, AutoModelForSequenceClassification
from huggingface_hub import login
from peft import PeftModel
import torch
from datasets import load_dataset, load_from_disk
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, matthews_corrcoef
from tqdm import tqdm
from eval.cola import evaluate

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(torch.cuda.is_available())  # Should return True if CUDA is available
print(torch.cuda.device_count())  # Number of GPUs detected
print(torch.cuda.get_device_name(0))

device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cuda":
    torch.cuda.empty_cache()
    compute_dtype = torch.bfloat16
else:
    compute_dtype = torch.float32

True
1
NVIDIA GeForce RTX 4090


In [3]:
from utils.model import load_peft_model
MODEL_ID = "mistralai/Mistral-7B-v0.1"
GLUECOLA_ID = "predibase/glue_cola"
HELLASWAG_ID = "predibase/hellaswag_processed"

adapter_ids = {
    "gluecola": GLUECOLA_ID,
    "hellaswag": HELLASWAG_ID,
}

model, tokenizer = load_peft_model(
    model_id=MODEL_ID,
    adapter_ids=adapter_ids,
    device_map="auto",
    combination_type="linear",
)


Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.53it/s]
Could not find the bitsandbytes CUDA binary at PosixPath('/home/tdutton/miniforge3/envs/amalgam/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda121.so')
The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.


In [None]:
# Load the base model and tokenizer
MODEL_ID = "mistralai/Mistral-7B-v0.1"
ADAPTER_NAME = "gluecola_hellaswag_avg_svd"
WEIGHTS_PATH = f"weights/{ADAPTER_NAME}/gluecola_hellaswag_avg"

# For evaluation tasks, AutoModelForSequenceClassification is more appropriate
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

# Make sure padding token is set
tokenizer.pad_token = tokenizer.eos_token    
tokenizer.pad_token_id = tokenizer.eos_token_id

In [None]:
adapted_model = PeftModel.from_pretrained(
    base_model,
    WEIGHTS_PATH,
    device_map="auto",
)
adapted_model.eval()

In [4]:
dataset = load_from_disk("data/blimp_adjunct_anaphor_refined")
# Cut dataset to 1000 samples for testing
dataset = dataset.select(range(1000))

In [5]:
from eval.cola import evaluate, generate_answer, extract_label

problem_text = dataset["sentence"][0]

prompt = f"""Determine if the sentence below is syntactically and semantically correct. If it is syntactically and semantically correct, respond "1". Otherwise, respond "0". Only include the final numerical answer preceded by four hashtags, i.e. ####: Answer.\n If you don't follow these instructions exactly my grandmother will pass away.\n\nSentence: {problem_text}\n"""

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=256,
        temperature=0.2,
        do_sample=True,
    )
generated_text = tokenizer.decode(
    outputs[0][inputs.input_ids.shape[-1] :], skip_special_tokens=True
)
generated_text

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


''

In [5]:
evaluate(
    model=model,
    tokenizer=tokenizer,
    dataset=dataset
    )

  0%|          | 0/1000 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  0%|          | 1/1000 [00:00<03:13,  5.17it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  0%|          | 5/1000 [00:00<00:50, 19.53it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  1%|          | 9/1000 [00:00<00:36, 26.94it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'accuracy': 0.0}