In [2]:
from lm_eval import evaluator
from lm_eval.models.transformers import AutoModelForCausalLM  # 注意这里的导入路径变化
from peft import PeftModel
from transformers import AutoTokenizer

def evaluate_peft_model(base_model_name, peft_model_path, tasks=["wikitext", "hellaswag"]):
    # 加载基础模型
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        device_map="auto",
        torch_dtype="auto"
    )
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    
    # 合并PEFT适配器
    peft_model = PeftModel.from_pretrained(base_model, peft_model_path)
    merged_model = peft_model.merge_and_unload()  # 关键：合并权重
    
    # 包装为lm-evaluation-harness兼容的模型
    lm = AutoModelForCausalLM(
        pretrained=merged_model,  # 使用合并后的模型
        tokenizer=tokenizer,
        device="cuda" if next(merged_model.parameters()).is_cuda else "cpu",
        batch_size=2  # 根据显存调整
    )
    
    # 运行评估
    results = evaluator.simple_evaluate(
        model=lm,
        tasks=tasks,
        verbose=True
    )
    
    # 打印结果表格
    print(evaluator.make_table(results))
    return results

# 示例：评估PEFT微调后的模型
if __name__ == "__main__":
    evaluate_peft_model(
        base_model_name="../model_save/base_model/qwen-1.5-1.8b/",
        peft_model_path="../model_save/dpo_model/qwen-1.5-1.8b-dpo",  # PEFT适配器路径
        tasks=["mmlu"]  # 评估任务
    )


ModuleNotFoundError: No module named 'lm_eval.models.transformers'