In [6]:
import argparse
import torch
import numpy as np
from datasets import load_dataset
import evaluate
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    PretrainedConfig,
    default_data_collator,
    TrainingArguments,
    Trainer,
    EvalPrediction,
    AutoConfig,
)
import logging
from peft import PeftModel, AutoPeftModelForSequenceClassification
from torch.utils.data import DataLoader
import random
from Xlora.xlora import add_xlora_to_model
from Xlora.xlora_config import xLoRAConfig
from Xlora.xlora_utils import load_model

In [None]:
# @TODO: fill in the test args
tase_name = 'mrpc'
max_seq_length = 512
padding = "max_length"
model_name = "roberta-base"
lora_model_dir = 
metric =

In [None]:
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Determine if this is a regression task (e.g. stsb)
is_regression = task_name == "stsb"

# Load the dataset and evaluation metric
datasets = load_dataset("glue", task_name)
if not is_regression:
    label_list = datasets["train"].features["label"].names
    num_labels = len(label_list)
else:
    num_labels = 1
metric = evaluate.load("glue", task_name)

task_to_keys = {
    "cola": ("sentence", None),
    "mnli": ("premise", "hypothesis"),
    "mrpc": ("sentence1", "sentence2"),
    "qnli": ("question", "sentence"),
    "qqp": ("question1", "question2"),
    "rte": ("sentence1", "sentence2"),
    "sst2": ("sentence", None),
    "stsb": ("sentence1", "sentence2"),
    "wnli": ("sentence1", "sentence2"),
}

NameError: name 'args' is not defined

In [None]:
    # Load base model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained(args.model_name, num_labels=num_labels)
tokenizer = AutoTokenizer.from_pretrained(args.model_name, use_fast=True)
args.max_seq_length = min(args.max_seq_length, tokenizer.model_max_length)
sentence1_key, sentence2_key = task_to_keys[args.task_name]

In [None]:
def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    preds = np.squeeze(preds) if is_regression else np.argmax(preds, axis=1)
    if args.task_name is not None:
        result = metric.compute(predictions=preds, references=p.label_ids)
        if len(result) > 1:
            result["combined_score"] = np.mean(list(result.values())).item()
        return result
    elif is_regression:
        return {"mse": ((preds - p.label_ids) ** 2).mean().item()}
# Preprocessing function for tokenization
def preprocess_function(examples):
    tokens = (examples[sentence1_key],) if sentence2_key is None else (examples[sentence1_key], examples[sentence2_key])
    result = tokenizer(*tokens, padding=args.padding, max_length=args.max_seq_length, truncation=True)
    if label_to_id is not None and "label" in examples:
        result["label"] = [label_to_id[l] if l != -1 else -1 for l in examples["label"]]
    return result

In [None]:
# Handle label mapping if needed
label_to_id = None
if (
    model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id
    and args.task_name is not None
    and not is_regression
):
    label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()}
    if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
        label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)}
    else:
        logger.warning(
            "Model labels do not match dataset labels. "
            f"Model labels: {list(sorted(label_name_to_id.keys()))}, Dataset labels: {list(sorted(label_list))}."
            " Ignoring model labels."
        )
elif args.task_name is None and not is_regression:
    label_to_id = {v: i for i, v in enumerate(label_list)}

In [None]:
datasets = datasets.map(preprocess_function, batched=True, load_from_cache_file=True)
train_dataset = datasets["train"]
eval_dataset = datasets["validation_matched" if args.task_name == "mnli" else "validation"]
test_dataset = datasets["test_matched" if args.task_name == "mnli" else "test"]
data_collator = default_data_collator

tasks = [args.task_name]
eval_datasets = [eval_dataset]
if args.task_name == "mnli":
    tasks.append("mnli-mm")
    eval_datasets.append(datasets["validation_mismatched"])

In [None]:
XLoRA_model_name = args.lora_model_dir
XLoRA_model, tokenizer = load_model(
    model_name=XLoRA_model_name,
    device="cuda:0",
    dtype=torch.bfloat16,
    load_xlora=True,
    adapters={
        "adapter_1": "./lora_finetuned_model_cola",
        "adapter_2": "./lora_finetuned_model_mrpc",
        "adapter_3": "./lora_finetuned_model_qnli",
        "adapter_4": "./lora_finetuned_model_sst2",
    },
)

In [None]:
trainer = Trainer(
    model=XLoRA_model,
    args=TrainingArguments(
        output_dir="./peft_test_results",
        report_to="none",
    ),
    data_collator=data_collator,           
    compute_metrics=compute_metrics
)
for eval_dataset, task in zip(eval_datasets, tasks):
    metrics = trainer.evaluate(eval_dataset=eval_dataset)
    print(metrics)