<a href="https://colab.research.google.com/github/t8101349/LLM-Finetune/blob/main/unsloth_Gemma3_%E5%BE%AE%E8%AA%BF2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
    !pip install --no-deps unsloth
!pip install --no-deps git+https://github.com/huggingface/transformers@v4.49.0-Gemma-3

In [3]:
# 从 unsloth 库中导入 FastModel 类，用于快速加载和操作预训练模型
from unsloth import FastModel
import torch

# 使用 FastModel.from_pretrained 方法加载预训练模型和对应的分词器
# 该方法返回一个元组 (model, tokenizer)，分别代表模型和分词器对象
model, tokenizer = FastModel.from_pretrained(

    model_name = "unsloth/gemma-3-4b-it",

    max_seq_length = 2048,  #  设置模型支持的最大序列长度，按需调整
    load_in_4bit = True,    # 是否启用 4 位量化，使用 4 位量化加载模型，可以显著减少内存占用，但可能会略微牺牲精度
    load_in_8bit = False,   #  8 位量化相比 4 位更精确，但内存占用要翻番，如果需要更高精度且内存充足，可以设置为 True
    full_finetuning = False, # 全参数微调

)

NotImplementedError: Unsloth: No NVIDIA GPU found? Unsloth currently only supports GPUs!

In [None]:
model = FastModel.get_peft_model(
    model,
    finetune_vision_layers     = False, # 纯文本任务，关闭视觉层的微调，以节省资源
    finetune_language_layers   = True,  # 微调模型中的语言相关层，语言层是文本任务的核心部分，要开
    finetune_attention_modules = True,  #微调模型中的注意力模块
    finetune_mlp_modules       = True,  # 微调模型中的MLP模块
    r = 8,           # LoRA中的秩，秩越高，模型表达能力越强，训练越慢，也越容易过拟合
    lora_alpha = 8,  # LoRA中的缩放因子
    lora_dropout = 0,  #LoRA中的dropout率
    bias = "none",
    random_state = 11,  #随机数种子，这个随便取
)

In [None]:
from datasets import load_dataset
import json

# 加载数据集，使用 streaming=True 避免一次性下载全部数据
dataset = load_dataset("Congliu/Chinese-DeepSeek-R1-Distill-data-110k-SFT", streaming=True)

# 过滤数据，只保留 repo_name 为 'zhihu/zhihu_score9.0-10_clean_v10' 的数据
filtered_dataset = dataset.filter(lambda example: example['repo_name'] == 'zhihu/zhihu_score9.0-10_clean_v10')



# 将过滤后的数据保存为 JSON 文件
def save_to_json(dataset, filename):
    with open(filename, 'w', encoding='utf-8') as f:
        for example in dataset:
            #filtered_dataset是一个可迭代对象，需要逐条example提取
            json.dump(example, f, ensure_ascii=False)
            f.write('\n')  # 每条数据之间添加换行符


save_to_json(filtered_dataset['train'], 'zhihu_data.json')


print("数据已保存到 xhs_data.json")

In [None]:
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "gemma-3",
)

import re
from datasets import load_dataset

# 加载本地的 json 数据集，文件名为 zhihu_data.json
dataset = load_dataset("json", data_files="zhihu_data.json", split="train")

def apply_chat_template_zhihu(examples):
    conversations = []
    for instruction, input_text, output_text in zip(examples["instruction"], examples["input"], examples["output"]):
        # 构建用户提问部分：若 input 非空则合并，否则仅使用 instruction
        if input_text.strip():
            user_message = instruction.strip() + "\n" + input_text.strip()
        else:
            user_message = instruction.strip()

        # 去除 output 中 <think> 和 </think> 包裹的思考内容
        cleaned_output = re.sub(r'<think>.*?</think>', '', output_text, flags=re.DOTALL).strip()

        # 构造最终对话格式
        conversation = (
            "<bos>\n"
            "<start_of_turn>user\n" + user_message + "\n<end_of_turn>\n"
            "<start_of_turn>model\n" + cleaned_output + "\n<end_of_turn>"
        )
        conversations.append(conversation)
    return {"text": conversations}

# 对数据集应用转换函数
dataset = dataset.map(apply_chat_template_zhihu, batched=True)

In [None]:
from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    eval_dataset = None, # 不进行评估，可以替换为具体数据集以启用评估
    args = SFTConfig(
        dataset_text_field = "text",
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4, # 梯度累积步数
        warmup_steps = 5,
        max_steps = 30,
        learning_rate = 2e-4, #学习率
        logging_steps = 1,
        optim = "adamw_8bit", #优化器，是 AdamW 优化器的 8 位版本，节省内存且适合大模型。
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 114514,
        report_to = "none", # 日志输出目标，这里表示不使用外部工具（如 WandB）记录日志
    ),
)

In [None]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<start_of_turn>user\n",
    response_part = "<start_of_turn>model\n",
)

In [None]:
trainer_stats = trainer.train()

In [None]:
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "gemma-3",
)
messages = [{
    "role": "user",
    "content": [{
        "type" : "text",
        "text" : "神经网络的参数都是随机的，有的效果很好，有的效果很差，这真的不是玄学吗？",
    }]
}]
text = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,
)
outputs = model.generate(
    **tokenizer([text], return_tensors = "pt").to("cuda"),
    max_new_tokens = 1024,

    temperature = 1.0, top_p = 0.95, top_k = 64,
)
print(tokenizer.batch_decode(outputs)[0])

In [None]:
#流式輸出

messages = [{
    "role": "user",
    "content": [{"type" : "text", "text" : "我亲爸和我亲妈结婚，算不算近亲结婚？",}]
}]
text = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,
)

from transformers import TextStreamer
_ = model.generate(
    **tokenizer([text], return_tensors = "pt").to("cuda"),
    max_new_tokens = 256,

    temperature = 1.0, top_p = 0.95, top_k = 64,
    streamer = TextStreamer(tokenizer, skip_prompt = True),
)