In [1]:

from transformers import AutoTokenizer,AutoModelForCausalLM,TrainingArguments,Trainer,DataCollatorForSeq2Seq
import os
import pandas as pd
from datasets import  Dataset
import swanlab
from swanlab.integration.transformers import SwanLabCallback
from peft import LoraConfig,TaskType,get_peft_model
import torch
from transformers import BitsAndBytesConfig

  import pynvml  # type: ignore[import]


In [2]:
swanlab_callback = SwanLabCallback(
    project="huanhuanchat", 
    experiment_name="qwen2.5 7b"
)

In [3]:
model_path = os.path.expanduser("./model/qwen2p5_7b")
model = AutoModelForCausalLM.from_pretrained(model_path,    
                                            dtype=torch.float16,   # 半精度加载
                                             )
tokenizer = AutoTokenizer.from_pretrained(model_path)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [4]:
model.gradient_checkpointing_enable()

In [5]:
df = pd.read_json("./data/huanhuan.json")
ds = Dataset.from_pandas(df)

In [6]:
ds

Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 3729
})

In [7]:
def process_func(example):
    MAX_LENGTH = 256
    instruction,output = example['instruction'],example['output']
    ins = tokenizer(f"<|im_start|>system\n现在你要扮演皇帝身边的女人--甄嬛<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n",add_special_tokens = False)
    res = tokenizer(f"{output}<|im_end|>",add_special_tokens = False)
    input_ids = ins["input_ids"] + res["input_ids"] + [tokenizer.pad_token_id]
    attention_mask = ins["attention_mask"] + res["attention_mask"] + [1]  
    labels = [-100] * len(ins["input_ids"]) + res["input_ids"] + [tokenizer.pad_token_id]  
    if len(input_ids) > MAX_LENGTH:  # 做一个截断
        input_ids = input_ids[:MAX_LENGTH]
        attention_mask = attention_mask[:MAX_LENGTH]
        labels = labels[:MAX_LENGTH]
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }

In [8]:
tokenized_id = ds.map(process_func,remove_columns=ds.column_names)

Map:   0%|          | 0/3729 [00:00<?, ? examples/s]

In [9]:
tokenized_id

Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 3729
})

In [10]:
config = LoraConfig(
    task_type=TaskType.CAUSAL_LM, 
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    inference_mode=False, # 训练模式
    r=8, # Lora 秩
    lora_alpha=32, # Lora alaph，具体作用参见 Lora 原理
    lora_dropout=0.1# Dropout 比例
)
model = get_peft_model(model, config)
model.print_trainable_parameters() # 打印总训练参数

trainable params: 20,185,088 || all params: 7,635,801,600 || trainable%: 0.2643


In [11]:
args = TrainingArguments(
    output_dir="./output/qwen2p5_7b",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    logging_steps=10,
    num_train_epochs=3,
    save_steps=100, # 为了快速演示，这里设置10，建议你设置成100
    learning_rate=1e-4,
    save_on_each_node=True,
    gradient_checkpointing=True,
    save_total_limit = 4,
    fp16=True,
    report_to="none",
)

In [12]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_id,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
    callbacks=[swanlab_callback],
)

In [13]:
trainer.train() # 开始训练 

Output()

Output()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
10,3.6498
20,3.6364
30,3.3728
40,3.2519
50,3.2758
60,3.2271
70,3.2415
80,3.2977
90,3.3036
100,3.2199


TrainOutput(global_step=702, training_loss=2.7965879718802253, metrics={'train_runtime': 1898.1348, 'train_samples_per_second': 5.894, 'train_steps_per_second': 0.37, 'total_flos': 4.626660412026778e+16, 'train_loss': 2.7965879718802253, 'epoch': 3.0})