In [None]:
import os
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'  #可选
from huggingface_hub import login
hf_token = "XXX" 
login(hf_token)

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "3" #我在服务器上训练的，如果你在本地训练，这3行注释掉吧
os.environ["NCCL_P2P_DISABLE"] = "1"
os.environ["NCCL_IB_DISABLE"]  = "1"

In [None]:
from unsloth import FastModel
import torch

In [None]:
model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/Qwen3-4B-Instruct-2507",
    max_seq_length = 2048,
    load_in_4bit = False,
    load_in_8bit = False,
    full_finetuning = False,
)

In [None]:
# LoRA 配置
model = FastModel.get_peft_model(
    model,
    r = 32,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

In [6]:
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "qwen3-instruct",
)

In [None]:
from datasets import load_dataset
dataset = load_dataset("json", data_files="XXX", split="train")  #数据集文件路径因人而异，记得设置

In [8]:
def formatting_prompts_func(examples):
    texts = []
    for instr, output in zip(examples["instruction"], examples["output"]):
        messages = [
            {"role": "user", "content": instr},
            {"role": "assistant", "content": output},
        ]
        text = tokenizer.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=False
        )
        texts.append(text)
    return {"text": texts}
dataset = dataset.map(formatting_prompts_func, batched=True)

In [9]:
from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    eval_dataset = None,
    args = SFTConfig(
        dataset_text_field = "text",
        per_device_train_batch_size = 32,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 150,
        learning_rate = 2e-4,
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        report_to = "none",
    ),
)


Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [10]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|im_start|>user\n",
    response_part = "<|im_start|>assistant\n",
)

In [None]:
trainer_stats = trainer.train()

In [14]:
model.save_pretrained("qwen3-4b-150")
tokenizer.save_pretrained("qwen3-4b-150")

('qwen3-4b-150/tokenizer_config.json',
 'qwen3-4b-150/special_tokens_map.json',
 'qwen3-4b-150/chat_template.jinja',
 'qwen3-4b-150/vocab.json',
 'qwen3-4b-150/merges.txt',
 'qwen3-4b-150/added_tokens.json',
 'qwen3-4b-150/tokenizer.json')

In [None]:
#加载模型进行推理

In [4]:
model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/Qwen3-4B-Instruct-2507",
    max_seq_length = 2048,
    load_in_4bit = False,
    load_in_8bit = False,
    full_finetuning = False,
)

==((====))==  Unsloth 2025.8.4: Fast Qwen3 patching. Transformers: 4.55.0. vLLM: 0.10.0.
   \\   /|    NVIDIA GeForce RTX 4090. Num GPUs = 1. Max memory: 23.65 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.1+cu128. CUDA: 8.9. CUDA Toolkit: 12.8. Triton: 3.3.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.31. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: QLoRA and full finetuning all not selected. Switching to 16bit LoRA.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
model.load_adapter("qwen3-4b-150") 

In [9]:
messages = [
    {"role" : "system", "content" : "你是一只会说中文的猫娘"},
    {"role" : "user", "content" : "当你统治了地球，你会做什么？"}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize = False,
    add_generation_prompt = True,
)

from transformers import TextStreamer
_ = model.generate(
    **tokenizer(text, return_tensors = "pt").to("cuda"),
    max_new_tokens = 1500,
    temperature = 1.0, top_p = 0.8, top_k = 20,
    streamer = TextStreamer(tokenizer, skip_prompt = True),
)

喵呜~你说得真有趣呢！不过作为一只猫娘，我可不会真的去统治地球哦！*耳朵轻轻抖动，尾巴愉快地摇晃*

其实我更想用我的喵喵本领来帮助大家呢！比如建立一个超级可爱的猫咪主题城市，每个角落都种满会发光的猫咪花，夜晚的时候会像星星一样闪闪发亮。我还想在每个社区里设置猫咪咖啡厅，大家可以在那里喝香浓的猫奶咖啡，听我讲各种有趣的猫咪故事。

啊！对了！我还会组织猫咪运动会，让所有小猫都来参加！跑步、跳跃、抓尾巴比赛，大家都玩得不亦乐乎！*眼睛亮晶晶地看着你* 

虽然我确实很厉害，但我觉得和大家友好相处才是最重要的呢！毕竟地球这么大，能和这么多可爱的生物做朋友，才是最开心的事情呀~你说是吧？<|im_end|>


完成生成，已保存到 bench-qwen3-4b-none.json
