In [16]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model


# 加载预训练模型和分词器
model_name = "gpt2"
model_src = AutoModelForCausalLM.from_pretrained(r"D:\Project\LLM\pretrained_models\Qwen\Qwen2.5-0.5B")
tokenizer = AutoTokenizer.from_pretrained(r"D:\Project\LLM\pretrained_models\Qwen\Qwen2.5-0.5B")
tokenizer.pad_token = tokenizer.eos_token

# 配置LoRA
config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

# 将LoRA配置应用到模型
get_peft_model(model_src, config)

# 示例数据集
texts = [
    "This is a sample sentence.",
    "Another example sentence here."
]
inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)

# 定义训练参数
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    save_steps=10_000,
    save_total_limit=2,
    prediction_loss_only=True,
)

# 定义Trainer
class DummyDataset(torch.utils.data.Dataset):
    def __len__(self):
        return len(inputs["input_ids"])

    def __getitem__(self, idx):
        return {k: v[idx] for k, v in inputs.items()}


train_dataset = DummyDataset()

# trainer = Trainer(
#     model=model_lora,
#     args=training_args,
#     train_dataset=train_dataset,
# )

# # 开始训练
# trainer.train()

# # 保存LoRA权重
# model_lora.save_pretrained("./lora_model")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [15]:
original_trainable_params = sum(p.numel() for p in model_src.parameters() if p.requires_grad)
# lora_trainable_params = sum(p.numel() for p in model_lora.parameters() if p.requires_grad)
# parameter_ratio = lora_trainable_params / original_trainable_params

def print_trainable_params(model): 
    trainable_params = 0
    all_params = 0
    for p in model.parameters():
        if p.requires_grad:
            trainable_params += p.numel()
        all_params += p.numel()
    print(f"可训练参数数量: {trainable_params}")
    print(f"总参数数量: {all_params}")
    print(f"参数比例: {trainable_params / all_params:.4f}")
print_trainable_params(model_src)
print(model_src)
# print_trainable_params(model_lora)


可训练参数数量: 540672
总参数数量: 494573440
参数比例: 0.0011
Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 896)
    (layers): ModuleList(
      (0): Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): lora.Linear(
            (base_layer): Linear(in_features=896, out_features=896, bias=True)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.1, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=896, out_features=8, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=8, out_features=896, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (k_proj): Linear(in_features=896, out_features=128, bias=True)
          (v_proj): lora.Linear(
            (base_layer): Linear(in_featur