In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, DataCollatorForSeq2Seq
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer
import pandas as pd
from datasets import Dataset


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Map:   0%|          | 0/3729 [00:00<?, ? examples/s]



In [None]:

# 定义模型名称
model_name = "/root/autodl-tmp/deepseek-r1-distill-llama-8b"  # 修改为可用的模型名称

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = 'right'  # 确保 padding_side 为 'right'

# 加载模型
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map={"": 0},  # 将模型加载到第一个 GPU
    trust_remote_code=True  # 确保加载自定义代码
)

# LoRA配置
lora_config = LoraConfig(
    task_type="CAUSAL_LM",  # 微调模型为自回归模型
    r=16,  # LoRA 低秩分解的秩
    lora_alpha=32,  # LoRA 缩放因子
    target_modules=["q_proj", "v_proj"],  # 目标模块，根据LLaMA3模型结构指定
    lora_dropout=0.05,  # Dropout 概率
    bias="none",  # 不训练 bias
    init_lora_weights=True,  # 初始化 LoRA 层权重
    inference_mode=False  # 允许训练
)

# 将LoRA配置应用到模型
model = get_peft_model(model, lora_config)

# 定义训练参数
training_arguments = TrainingArguments(
    output_dir="./Llama3_8b_LoRA",
    eval_strategy="no",  # 禁用评估
    optim="paged_adamw_8bit",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=8,
    log_level="debug",
    save_strategy="epoch",
    logging_steps=100,
    learning_rate=1e-4,
    fp16=False,  # 根据硬件支持选择
    bf16=False,  # 根据硬件支持选择
    num_train_epochs=3,
    warmup_ratio=0.1,
    lr_scheduler_type="linear",
)

# 数据预处理
def process_func(example):
    MAX_LENGTH = 384
    input_ids, attention_mask, labels = [], [], []
    instruction = tokenizer(f"User: {example['instruction']} {example['input']}\n\n", add_special_tokens=False)
    response = tokenizer(f"Assistant: {example['output']}{tokenizer.eos_token}", add_special_tokens=False)
    input_ids = instruction["input_ids"] + response["input_ids"]
    attention_mask = instruction["attention_mask"] + response["attention_mask"]
    labels = [-100] * len(instruction["input_ids"]) + response["input_ids"]
    if len(input_ids) > MAX_LENGTH:  # 截断
        input_ids = input_ids[:MAX_LENGTH]
        attention_mask = attention_mask[:MAX_LENGTH]
        labels = labels[:MAX_LENGTH]
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }

# 加载数据
df = pd.read_json('./huanhuan.json')
ds = Dataset.from_pandas(df)
tokenized_id = ds.map(process_func, remove_columns=ds.column_names)

# 创建训练器
trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=tokenized_id,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
)

In [2]:
# 使用训练前的模型回答问题
def generate_response(model, tokenizer, prompt, max_length=128):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_length=max_length)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# 示例问题
# question = "What is the capital of France?"
question = "你是谁？"
print("Answer before training:")
print(generate_response(model, tokenizer, question))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer before training:
你是谁？是谁派你来的？你的任务是什么？

你是谁？是谁派你来的？你的任务是什么？

你是谁？是谁派你来的？你的任务是什么？

你是谁？是谁派你来的？你的任务是什么？

你是谁？是谁派你来的？你的任务是什么。

你是谁？是谁派你来的？你的任务是什么。

是。

你是。

是。

是。

是。

是。

是。

是。

是。

是。

是是是是是是是是是是是是是是是是是是是是 ..




In [None]:
# 开始训练
trainer.train()
# # 保存模型
# trainer.save_model("./Llama3_8b_LoRA")

Currently training with a batch size of: 2
***** Running training *****
  Num examples = 3,729
  Num Epochs = 3
  Instantaneous batch size per device = 2
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 4
  Total optimization steps = 1,398
  Number of trainable parameters = 6,815,744


Step,Training Loss


In [18]:
# 使用训练后的模型回答问题
def generate_response(model, tokenizer, prompt, max_length=128):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_length=max_length, eos_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

question = "你是谁？"
print("Answer after training:")
print(generate_response(model, tokenizer, question))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer after training:
你是谁？我是李清欢，家父是太医少卿，家父是太医少卿，家父是太医少卿。
