PEFT (Parameter-Efficient Fine-Tuning) 的工作原理是在预训练模型的基础上，冻结大部分参数，只微调少量新增的参数。PEFT 模型本身并不包含完整的模型结构和参数，它只保存了微调的部分。
因此，要使用 PEFT 模型，必须先加载预训练的主模型，然后将 PEFT 模块加载到主模型上，才能进行推理或训练。

In [None]:
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
import torch
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

if torch.cuda.is_available():
    print(f"CUDA is available. Using {torch.cuda.device_count()} GPU(s).")
    print(f"Current device: {torch.cuda.current_device()}")
else:
    print("CUDA is not available.") 

def print_lora_details(model, layer_idx=0, module_name="v_proj"):
    original_weight = eval(f"model.model.model.layers[{layer_idx}].self_attn.{module_name}.weight")
    lora_A = eval(f"model.model.model.layers[{layer_idx}].self_attn.{module_name}.lora_A.default.weight")
    lora_B = eval(f"model.model.model.layers[{layer_idx}].self_attn.{module_name}.lora_B.default.weight")

    print(f"Layer {layer_idx} '{module_name}' details:")
    print(f"Original weight:\n{original_weight}")
    print(f"LoRA A:\n{lora_A}")
    print(f"LoRA B:\n{lora_B}")

peft_model_id = "/root/datasetsSplitTrain/results/en_dataset_q_proj/"
config = PeftConfig.from_pretrained(peft_model_id)

base_model_name = config.base_model_name_or_path  
base_model = AutoModelForCausalLM.from_pretrained(base_model_name,
    device_map='cuda:0',
    torch_dtype=torch.bfloat16)

peft_model = PeftModel.from_pretrained(base_model, peft_model_id, torch_dtype=torch.bfloat16)

target_layer_number = 0

print_lora_details(peft_model, layer_idx=0, module_name="q_proj")
