In [5]:
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer, 
    AutoModelForSequenceClassification, 
    TrainingArguments, 
    Trainer,
    DataCollatorWithPadding
)
from peft import PeftModel, PeftConfig
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [6]:
# 加载测试数据集
test_dataset = load_dataset("ag_news", split="test")

In [8]:

# 加载分词器
model_name = "llama3"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [10]:
# 定义预处理函数
def preprocess_function(examples):
    tokenized = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=256)
    tokenized["labels"] = examples["label"]  # 保留标签，并直接重命名为 "labels"
    return tokenized

In [12]:
# 对测试数据集进行预处理
encoded_test_dataset = test_dataset.map(
    preprocess_function, 
    batched=True, 
    remove_columns=[col for col in test_dataset.column_names if col != "label"]
)

Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

In [13]:
# 加载基础模型
base_model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=4,
    load_in_8bit=True,
    device_map="auto"
)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at llama3 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
# 设置填充标记ID
base_model.config.pad_token_id = tokenizer.pad_token_id

In [21]:
# 加载训练好的 PEFT 模型
peft_model_path = "final_model"
model = PeftModel.from_pretrained(base_model, peft_model_path)

In [15]:
# 设置评估指标
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

In [22]:

# 定义测试参数
testing_args = TrainingArguments(
    output_dir="./test_results",
    per_device_eval_batch_size=16,
    gradient_accumulation_steps=4,
    fp16=True,  # 启用混合精度
    dataloader_num_workers=4,
    remove_unused_columns=False,
)

In [23]:
# 创建数据整理器
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding="longest")

In [24]:
# 创建Trainer
trainer = Trainer(
    model=model,
    args=testing_args,
    eval_dataset=encoded_test_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [25]:
# 进行评估
results = trainer.evaluate()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33ms1820587[0m. Use [1m`wandb login --relogin`[0m to force relogin
