**引入模型**

In [5]:
%%capture
import os
!pip install --no-deps unsloth vllm
!pip install --no-deps git+https://github.com/huggingface/transformers@v4.49.0-Gemma-3
!pip install datasets huggingface_hub

**加载数据集**

In [3]:
from datasets import load_dataset

# 加载数学问题解答数据集
dataset = load_dataset("ecnu-icalk/cmm-math", split="train")

print(dataset[0])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/2.41k [00:00<?, ?B/s]

train_data.parquet:   0%|          | 0.00/7.73M [00:00<?, ?B/s]

test_data.parquet:   0%|          | 0.00/2.17M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/22248 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5821 [00:00<?, ? examples/s]

{'id': '19873', 'image': '[]', 'answer': 'C', 'solution': 'null', 'level': '高二', 'question': '已知关于 $x$ 的不等式 $a x^{2}-x+b \\geq 0$ 的解集为 $[-2,1]$, 则关于 $x$ 的不等式 $b x^{2}-x+a \\leq 0$ 的解集为 ( )', 'options': 'A. $[-1,2]$\nB. $\\left[-1, \\frac{1}{2}\\right]$\nC. $\\left[-\\frac{1}{2}, 1\\right]$\nD. $\\left[\\begin{array}{cc}-1, & -\\frac{1}{2}\\end{array}\\right]$', 'subject': '解析几何', 'analysis': '$:$ 关于 $\\mathrm{x}$ 的不等式 $\\mathrm{ax}{ }^{2}-\\mathrm{x}+\\mathrm{b} \\geq 0$ 的解集为 $[-2,1]$,\n\n$\\therefore-2,1$ 是关于 $\\mathrm{x}$ 的方程 $\\mathrm{ax}^{2}-\\mathrm{x}+\\mathrm{b}=0$ 的两个根, $\\therefore\\left\\{\\begin{array}{l}4 a+2+b=0 \\\\ a-1+b=0\\end{array}\\right.$, 解得 $\\mathrm{a}=-1, \\mathrm{~b}=2$,\n\n$\\therefore$ 关于 $\\mathrm{x}$ 的不等式 $\\mathrm{bx}^{2}-\\mathrm{x}+\\mathrm{a} \\leq 0$ 即 $2 \\mathrm{x}^{2}-\\mathrm{x}-1 \\leq 0$, 解方程 $2 \\mathrm{x}^{2}-\\mathrm{x}-1=0$, 得 $x_{1}=-\\frac{1}{2}, \\mathrm{x}_{2}=1$,\n\n$\\therefore$ 关于 $\\mathrm{x}$ 的不等式 $\\mathrm{bx}^{2}-\\mathrm{x}+\\math

**处理数据集**

In [12]:
from unsloth import FastModel
import torch

# 使用 Gemma-3 的对话模板
tokenizer = get_chat_template(tokenizer, chat_template="gemma-3")

# 将数据集转换为对话格式（适配数学问题-答案对）
def format_conversation(example):
    # 将选项拼接到问题后面
    question_with_options = example["question"] + "\n选项：\n" + example["options"]

    # 使用解析作为答案，如果没有解析就用 answer 字母
    model_answer = example["analysis"] if example["analysis"] != "null" else f"答案是：{example['answer']}"

    return {
        "conversations": [
            {"role": "user", "content": question_with_options},
            {"role": "assistant", "content": model_answer},
        ]
    }

# 应用格式转换
dataset = dataset.map(format_conversation)

# 标准化数据格式并应用对话模板
from unsloth.chat_templates import standardize_data_formats
dataset = standardize_data_formats(dataset)

def apply_chat_template(examples):
    texts = tokenizer.apply_chat_template(examples["conversations"])
    return {"text": texts}

dataset = dataset.map(apply_chat_template, batched=True)

Map:   0%|          | 0/22248 [00:00<?, ? examples/s]

Unsloth: Standardizing formats (num_proc=2):   0%|          | 0/22248 [00:00<?, ? examples/s]

Map:   0%|          | 0/22248 [00:00<?, ? examples/s]

进行LoRA

In [13]:
from unsloth import FastModel

# 加载 Gemma-3 4B 模型（4bit 量化）
model, tokenizer = FastModel.from_pretrained(
    model_name="unsloth/gemma-3-4b-it",
    max_seq_length=2048,
    load_in_4bit=True,
    full_finetuning=False,
)

# 配置 LoRA 参数
model = FastModel.get_peft_model(
    model,
    finetune_language_layers=True,
    finetune_attention_modules=True,
    finetune_mlp_modules=True,
    r=16,  # 提高 r 值以增强数学推理能力
    lora_alpha=16,
    lora_dropout=0.1,  # 轻微 dropout 防止过拟合
    bias="none",
)

==((====))==  Unsloth 2025.3.19: Fast Gemma3 patching. Transformers: 4.50.0.dev0. vLLM: 0.8.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.
Unsloth: Making `base_model.model.vision_tower.vision_model` require gradients


**模型训练参数**

In [14]:
from trl import SFTTrainer, SFTConfig

# 训练参数（针对数学任务优化）
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    args=SFTConfig(
        dataset_text_field="text",
        per_device_train_batch_size=2,
        gradient_accumulation_steps=8,  # 增大梯度累积步数以节省内存
        warmup_steps=10,
        max_steps=200,  # 增加训练步数以提升数学推理能力
        learning_rate=1e-5,  # 更低的学习率稳定训练
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="cosine",
        seed=3407,
        report_to="none",
    ),
)

# 仅对答案部分计算损失
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part="<start_of_turn>user\n",  # 匹配问题分隔符
    response_part="<start_of_turn>model\n",     # 匹配答案分隔符
)

Unsloth: Switching to float32 training since model cannot work with float16


Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/22248 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/22248 [00:00<?, ? examples/s]

**开始训练**

In [15]:
# 检查 GPU 内存
print(f"GPU Memory Reserved: {torch.cuda.max_memory_reserved() / 1024**3:.2f} GB")

# 开始训练
trainer_stats = trainer.train()

GPU Memory Reserved: 10.04 GB


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 22,248 | Num Epochs = 1 | Total steps = 200
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 8 x 1) = 16
 "-____-"     Trainable parameters = 38,497,792/4,000,000,000 (0.96% trained)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
10,1.9325
20,1.8357
30,1.7056
40,1.7338
50,1.4733
60,1.2939
70,1.2194
80,1.2626
90,1.2001
100,1.1346


**测试能力**

In [18]:
messages = [
    {
        "role": "user",
        "content": "已知函数 f(x) = x² - 4x + 3，在区间 [1, 5] 上的最小值是多少？请给出解题过程。"
    }
]

# 生成 prompt，和训练时一样
text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)

# 推理
outputs = model.generate(
    **tokenizer([text], return_tensors="pt").to("cuda"),
    max_new_tokens=1024,
    temperature=0.7,
    top_p=0.9,
)

# 解码输出
print(tokenizer.decode(outputs[0], skip_special_tokens=False))

<bos><start_of_turn>user
已知函数 f(x) = x² - 4x + 3，在区间 [1, 5] 上的最小值是多少？请给出解题过程。<end_of_turn>
<start_of_turn>model
解题过程如下：

1. **求函数 f(x) 的导数:**

   f'(x) = 2x - 4

2. **确定导数为零的点:**

   将 f'(x) = 0 求解，得到：

   2x - 4 = 0

   x = 2

3. **确定导数为零点的意义:**

   导数为零的点是可能的极值点。

4. **确定极值点是否在区间 [1, 5] 内:**

   x = 2 在区间 [1, 5] 内。

5. **计算函数在极值点和区间端点上的值:**

   *   f(1) = 1² - 4(1) + 3 = 0
   *   f(2) = 2² - 4(2) + 3 = -1
   *   f(5) = 5² - 4(5) + 3 = 12

6. **比较函数值，得出最小值:**

   比较 f(1) = 0, f(2) = -1, f(5) = 12，得到最小值是 -1。

**答案:**

函数 f(x) = x² - 4x + 3 在区间 [1, 5] 上的最小值是 -1。<end_of_turn>


**保存模型**

In [None]:
# 保存到本地
model.save_pretrained("gemma-3-math-solver")
tokenizer.save_pretrained("gemma-3-math-solver")

# 上传到 Hugging Face Hub
model.push_to_hub_merged(
    "your-profile",
    tokenizer,
    token="your-hf-code",
)

NameError: name 'model' is not defined