In [None]:

from transformers import AutoTokenizer,AutoModelForCausalLM,TrainingArguments,Trainer,DataCollatorForSeq2Seq
import os
import pandas as pd
from datasets import  Dataset
import swanlab
from swanlab.integration.transformers import SwanLabCallback
from peft import LoraConfig,TaskType,get_peft_model
import torch
from transformers import BitsAndBytesConfig

In [None]:
swanlab_callback = SwanLabCallback(
    project="huanhuanchat", 
    experiment_name="qwen2.5 7b"
)

In [None]:
model_path = os.path.expanduser("./model/qwen2p5_7b")
model = AutoModelForCausalLM.from_pretrained(model_path,    
                                            dtype=torch.float16,   # 半精度加载
                                             )
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [None]:
model.gradient_checkpointing_enable()

In [None]:
df = pd.read_json("./data/huanhuan.json")
ds = Dataset.from_pandas(df)

In [None]:
ds

In [None]:
def process_func(example):
    MAX_LENGTH = 256
    instruction,output = example['instruction'],example['output']
    ins = tokenizer(f"<|im_start|>system\n现在你要扮演皇帝身边的女人--甄嬛<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n",add_special_tokens = False)
    res = tokenizer(f"{output}<|im_end|>",add_special_tokens = False)
    input_ids = ins["input_ids"] + res["input_ids"] + [tokenizer.pad_token_id]
    attention_mask = ins["attention_mask"] + res["attention_mask"] + [1]  
    labels = [-100] * len(ins["input_ids"]) + res["input_ids"] + [tokenizer.pad_token_id]  
    if len(input_ids) > MAX_LENGTH:  # 做一个截断
        input_ids = input_ids[:MAX_LENGTH]
        attention_mask = attention_mask[:MAX_LENGTH]
        labels = labels[:MAX_LENGTH]
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }

In [None]:
tokenized_id = ds.map(process_func,remove_columns=ds.column_names)

In [None]:
tokenized_id

In [None]:
config = LoraConfig(
    task_type=TaskType.CAUSAL_LM, 
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    inference_mode=False, # 训练模式
    r=8, # Lora 秩
    lora_alpha=32, # Lora alaph，具体作用参见 Lora 原理
    lora_dropout=0.1# Dropout 比例
)
model = get_peft_model(model, config)
model.print_trainable_parameters() # 打印总训练参数

In [None]:
args = TrainingArguments(
    output_dir="./output/qwen2p5_7b",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    logging_steps=10,
    num_train_epochs=3,
    save_steps=100, # 为了快速演示，这里设置10，建议你设置成100
    learning_rate=1e-4,
    save_on_each_node=True,
    gradient_checkpointing=True,
    save_total_limit = 4,
    fp16=True,
    report_to="none",
)

In [None]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_id,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
    callbacks=[swanlab_callback],
)

In [None]:
trainer.train() # 开始训练 