# trl 微调

## trl跟随transformer包一同安装,安装transformer后无需后续安装
### 导入相关包并命名全局变量
模型依然选用下载的Qwen3-4B模型

In [1]:
import torch

from datasets import load_dataset
from trl import SFTConfig, SFTTrainer, DataCollatorForCompletionOnlyLM
from peft import LoraConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

DATASET_PATH = './autodl-tmp/data/train_zh_1000.jsonl'
MODEL_PATH = './autodl-tmp/Qwen3-4B'

### 利用datasets库加载数据集

In [2]:
dataset = load_dataset('json', data_files=DATASET_PATH)['train']
print(dataset)
print(dataset.num_rows)
print(dataset.shape)

Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 1000
})
1000
(1000, 3)


### 微调Qwen-4B模型
使用4-bit量化， 半精度浮点数加载模型

In [3]:
print(torch.cuda.is_available())

True


In [4]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)
print(bnb_config)

BitsAndBytesConfig {
  "_load_in_4bit": true,
  "_load_in_8bit": false,
  "bnb_4bit_compute_dtype": "bfloat16",
  "bnb_4bit_quant_storage": "uint8",
  "bnb_4bit_quant_type": "nf4",
  "bnb_4bit_use_double_quant": true,
  "llm_int8_enable_fp32_cpu_offload": false,
  "llm_int8_has_fp16_weight": false,
  "llm_int8_skip_modules": null,
  "llm_int8_threshold": 6.0,
  "load_in_4bit": true,
  "load_in_8bit": false,
  "quant_method": "bitsandbytes"
}



In [5]:
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map='auto',
    dtype=torch.bfloat16,
    quantization_config=bnb_config
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

### 转换数据格式

In [6]:
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['instruction'])):
        text = f'### Question: {example['instruction'][i]}\n ### Answer: {example['output'][i]}'
        output_texts.append(text)
    return output_texts

### 加载token

In [7]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

response_template = ' ### Answer:'
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)

### 使用PEFT配置LoRA

In [8]:
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    target_modules=[
        'q_proj', 'k_proj', 'v_proj', 'o_proj'
    ],
    task_type='CAUSAL_LM',
)

### 模型配置

In [9]:
training_args = SFTConfig(
    output_dir='./Qwen3-4B-SFT',
    per_device_train_batch_size=1,
    num_train_epochs=1,
    fp16=True,
    optim='adamw_torch_fused',
    max_seq_length=512,
    logging_steps=50,
)

trainer = SFTTrainer(
    model,
    train_dataset=dataset,
    args=training_args,
    peft_config=peft_config,
    formatting_func=formatting_prompts_func,
    data_collator=collator
)

  super().__init__(


In [10]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.


Step,Training Loss
50,3.0508
100,2.6722
150,2.4551
200,2.683
250,2.3429
300,2.2882
350,2.1493
400,2.4343
450,2.4732
500,2.5772


TrainOutput(global_step=1000, training_loss=2.4362313003540037, metrics={'train_runtime': 217.1585, 'train_samples_per_second': 4.605, 'train_steps_per_second': 4.605, 'total_flos': 3197294627819520.0, 'train_loss': 2.4362313003540037, 'epoch': 1.0})

In [11]:
trainer.save_model()

### 加载模型
对微调前和微调后的模型进行对比

In [13]:
# 微调前
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map='auto',
    torch_dtype=torch.bfloat16,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [27]:
def use_template(text):
    return f'### Question: {text}\n ### Answer:'

device = 'cuda'
query = use_template('癌症有哪些表现?')
inputs = tokenizer(query, return_tensors='pt').to(device)
outputs = model.generate(**inputs, max_new_tokens=256)

In [28]:
outputs

tensor([[ 14374,  15846,     25,  68294,    234,  99769, 104719, 101107,   5267,
          16600,  21806,     25,    220,     16,     13,    220, 102010, 100575,
           5122, 102010, 100575,  31235, 101536, 100146, 104723,  71138, 102785,
           3837, 110966, 102010,  17447, 100347,  56652, 100876,   5373,  56652,
         121021,   3837, 100631, 100347, 102010,  17447, 104723,   9370,   5373,
         104165,   9370,   5373,  99660,  38035,   9370,   5373, 110408, 100876,
          27442,   3837, 100631, 102010,  17447, 100347, 100676, 121021,   3837,
         100631, 108888, 121021, 108901,   5373,  92032,   5373, 107241,  18830,
         100682,   3837, 100631, 108888, 121021,  18830,  99577, 101776,   5373,
         107429,   5373, 105748,   5373,  28291, 108681,  49567,  70633, 101107,
           1773,     17,     13,   8908,    224,    118, 100575,   5122, 117684,
           9370, 105184, 101368, 104312, 109244,   5373, 110789,  99389,   5373,
         100277, 100406,   5

In [29]:
tokenizer.decode(outputs[0])

'### Question: 癌症有哪些表现?\n ### Answer: 1. 皮肤癌：皮肤癌最常见的是黑色素瘤，表现为皮肤上出现黑斑、黑痣，或者出现皮肤上黑色的、红色的、银色的、白色的斑点，或者皮肤上出现新的痣，或者原有痣的颜色、大小、形状有变化，或者原有痣有破溃、出血、疼痛、发痒等异常表现。2. 肺癌：肺癌的早期症状主要是咳嗽、咯血、胸痛、气短、声音嘶哑、吞咽困难、胸闷、发热、体重减轻、盗汗等。3. 胃癌：胃癌的早期症状主要是上腹痛、食欲减退、消化不良、恶心、呕吐、胃胀、嗳气、反酸、吞咽困难、胃排空延迟、体重减轻、腹泻、便秘等。4. 结直肠癌：结直肠癌的早期症状包括大便习惯改变，比如腹泻或便秘，或者大便变细，或者大便带血，或者大便颜色改变，或者有腹部不适，或者有不明原因的体重减轻，或者有贫血，或者有腹部包块等。5. 白血病：白血病的早期症状'

In [19]:
# 微调后
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained(
    './Qwen3-4B-SFT',
    device_map='auto',
    torch_dtype=torch.bfloat16,
)

tokenizer = AutoTokenizer.from_pretrained('./Qwen3-4B-SFT')

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [24]:
def use_template(text):
    return f'### Question: {text}\n ### Answer:'

device = 'cuda'
query = use_template('癌症有哪些表现?')
inputs = tokenizer(query, return_tensors='pt').to(device)
outputs = model.generate(**inputs, max_new_tokens=256)

In [25]:
outputs

tensor([[ 14374,  15846,     25,  68294,    234,  99769, 104719, 101107,   5267,
          16600,  21806,     25,  68294,    234,  99769,   9370, 104595, 101107,
          20412, 101312,  42140, 100535,   3837,  99604, 107397,   9370, 104595,
         101107,  74763,  99200,  16530, 102486,   1773, 107397,   9370, 104595,
         101107,  73670, 102239, 105418, 101368,  33108, 106304, 101368,   1773,
         105418, 101368, 100630,   5122,     16,   5373, 105748,   5122, 105748,
          20412, 107397,   9370, 101536, 101368,   3837, 100000, 107397, 101924,
          31235,  38953,  35568,  99482, 111492, 100653,   3837, 100575, 100406,
          20412, 107397, 101924, 116168, 101368, 100653,   3837, 105748,   9370,
         105155,  73670,  20412, 115483, 100406,   3837, 100253, 100406,   3837,
          99707, 100406,  49567,   3837, 105748,   9370, 105130,  73670,  20412,
         106304,   9370,   3837, 104047,  20412, 112484, 104196,   3837, 105748,
           9370, 102660, 104

In [26]:
tokenizer.decode(outputs[0])

'### Question: 癌症有哪些表现?\n ### Answer: 癌症的临床表现是多种多样的，不同癌症的临床表现也各不相同。癌症的临床表现可以分为全身症状和局部症状。全身症状包括：1、疼痛：疼痛是癌症的常见症状，也是癌症患者最常主诉的症状之一，癌痛是癌症患者最常见的症状之一，疼痛的性质可以是钝痛，刺痛，隐痛等，疼痛的部位可以是局部的，也可以是放射性的，疼痛的强度也可以是轻度的，也可以是剧烈的。2、发热：发热是癌症常见的全身症状之一，也是癌症患者最常见的症状之一。癌症患者发热的性质可以是低热，也可以是高热，发热的持续时间也可以是短的，也可以是长期的。3、乏力：乏力是癌症患者最常主诉的症状之一，也是癌症患者最常见的全身症状之一。乏力的性质可以是轻度的，也可以是重度的。4、消瘦：消瘦是癌症患者最常见的全身症状之一，也是癌症患者最常主诉的症状之一。5、食欲不振：食欲不振是癌症患者最常见的全身症状之一，也是癌症患者最常主诉的症状之一。6、体重减轻：体重减轻是癌症患者'