<a href="https://colab.research.google.com/github/skywalker0803r/LLM/blob/main/gemma%E5%BE%AE%E8%AA%BF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
from huggingface_hub import login
#login()
from transformers import pipeline
pipe = pipeline("text-generation", model="google/gemma-3-1b-it")
messages = [
    {"role": "user", "content": "你好嗎?"},
]
pipe(messages)

Device set to use cuda:0


[{'generated_text': [{'role': 'user', 'content': '你好嗎?'},
   {'role': 'assistant',
    'content': '我很好，謝謝你問！ 😊 \n\n你呢？今天過得怎麼樣？有什么我可以幫你的嗎？\n'}]}]

# SFT

In [20]:
data = [
    {
        "messages": [
            {"role": "user", "content": "SFT微調測試"},
            {"role": "assistant", "content": "SFT微調測試成功"}
        ]
    },
    {
        "messages": [
            {"role": "user", "content": "SFT微調測試"},
            {"role": "assistant", "content": "SFT微調測試成功"}
        ]
    }
]*100

import json

# 儲存成 JSON Lines 格式（每一行是一個 JSON object）
with open("your_data.json", "w", encoding="utf-8") as f:
    for example in data:
        json.dump(example, f, ensure_ascii=False)
        f.write("\n")


# SFT訓練

In [21]:
from trl import SFTTrainer
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, prepare_model_for_kbit_training
import torch

# 模型設定
model_name = "google/gemma-3-1b-it"

# 載入 tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # 防止 padding 錯誤

# 載入模型，使用 4bit 量化
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
)

# 準備 LoRA 微調
model = prepare_model_for_kbit_training(model)

peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj"],
)

# 載入資料集
dataset = load_dataset("json", data_files="your_data.json", split="train")

# 格式轉換：將 messages 組裝成模型輸入格式
def format_example(example):
    dialogue = ""
    for m in example["messages"]:
        role = m["role"]
        content = m["content"]
        dialogue += f"<|start_of_turn|>{role}\n{content}<|end_of_turn|>\n"
    return {"text": dialogue}

dataset = dataset.map(format_example)

# 訓練設定
training_args = TrainingArguments(
    output_dir="./gemma-sft",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    logging_steps=10,
    learning_rate=2e-4,
    fp16=True,
    save_strategy="epoch",
    report_to="none",
)

# 建立 Trainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    peft_config=peft_config,
)

# 開始訓練
trainer.train()

# 儲存模型
trainer.model.save_pretrained("./gemma-sft")
tokenizer.save_pretrained("./gemma-sft")


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,8.8823
20,6.0977
30,4.0539
40,2.5748
50,1.6423
60,1.1886
70,1.1224


('./gemma-sft/tokenizer_config.json',
 './gemma-sft/special_tokens_map.json',
 './gemma-sft/chat_template.jinja',
 './gemma-sft/tokenizer.model',
 './gemma-sft/added_tokens.json',
 './gemma-sft/tokenizer.json')

In [22]:
from transformers import pipeline

pipe = pipeline("text-generation", model="./gemma-sft", tokenizer="./gemma-sft")
pipe("<|start_of_turn|>user\nSFT微調測試<|end_of_turn|>\n<|start_of_turn|>assistant\n", max_new_tokens=50)


Device set to use cuda:0


[{'generated_text': '<|start_of_turn|>user\nSFT微調測試<|end_of_turn|>\n<|start_of_turn|>assistant\n好的，我明白了。\n\n請告訴我，你是否使用 Google 伺服器？\n\n<|end_of_text|>\n<|start_of_audio|>\n<|start_of_audio>\n<|start'}]

# DPO

In [23]:
import json
import random

# 讀取原始 SFT 資料
with open("your_data.json", "r", encoding="utf-8") as f:
    sft_data = [json.loads(line) for line in f]

# 定義 rejected 回應的預設（也可以用 GPT 產生更好品質的）
rejected_candidates = [
    "好的，我明白了。\n\n請告訴我，你是否使用 Google 伺服器？",
    "好的，我明白了。\n\n請告訴我，你是否使用 Google 伺服器？",
    "好的，我明白了。\n\n請告訴我，你是否使用 Google 伺服器？",
    "好的，我明白了。\n\n請告訴我，你是否使用 Google 伺服器？",
    "好的，我明白了。\n\n請告訴我，你是否使用 Google 伺服器？",
    "好的，我明白了。\n\n請告訴我，你是否使用 Google 伺服器？",
]

# 轉換為 DPO 格式
dpo_data = []
for item in sft_data:
    messages = item["messages"]
    prompt = ""
    response = ""

    for m in messages:
        if m["role"] == "user":
            prompt = m["content"]
        elif m["role"] == "assistant":
            response = m["content"]

    if prompt and response:
        dpo_data.append({
            "prompt": prompt,
            "chosen": response,
            "rejected": random.choice(rejected_candidates),
        })

# 寫入 dpo_data.jsonl
with open("dpo_data.json", "w", encoding="utf-8") as f:
    for item in dpo_data:
        json.dump(item, f, ensure_ascii=False)
        f.write("\n")

print("✅ 已轉換為 DPO 格式，儲存在 dpo_data.json")



✅ 已轉換為 DPO 格式，儲存在 dpo_data.json


# DPO訓練

In [24]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from trl import DPOTrainer
from datasets import load_dataset
from peft import LoraConfig, prepare_model_for_kbit_training
from trl import DPOConfig

# 載入資料集（JSONL 格式）
dataset = load_dataset("json", data_files="dpo_data.json", split="train")

training_args = DPOConfig(
    output_dir="./gemma-dpo",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    num_train_epochs=3,
    logging_steps=10,
    save_strategy="epoch",
    report_to="none",
    fp16=True,
    beta=0.1,  # DPO 需要指定 beta
    max_prompt_length=512,
    max_length=1024,
    truncation_mode="keep_end",
)

# 初始化 DPOTrainer
trainer = DPOTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    peft_config=peft_config,
)

# 開始訓練
trainer.train()

# 儲存模型
trainer.model.save_pretrained("./gemma-dpo")
tokenizer.save_pretrained("./gemma-dpo")

Generating train split: 0 examples [00:00, ? examples/s]



Extracting prompt in train dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,0.298
20,0.0122
30,0.0002
40,0.0
50,0.0
60,0.0
70,0.0


('./gemma-dpo/tokenizer_config.json',
 './gemma-dpo/special_tokens_map.json',
 './gemma-dpo/chat_template.jinja',
 './gemma-dpo/tokenizer.model',
 './gemma-dpo/added_tokens.json',
 './gemma-dpo/tokenizer.json')

In [25]:
from transformers import pipeline

pipe = pipeline("text-generation", model="./gemma-dpo", tokenizer="./gemma-dpo")
pipe("SFT微調測試", max_new_tokens=50)


Device set to use cuda:0


[{'generated_text': 'SFT微調測試\n**目標：** 測試 SFT (Soft Feature Tracking) 的性能，確保其能夠準確地追蹤和理解周邊信息，並在不同光照條件下保持良好的性能。\n\n**測試步驟：**\n\n1'}]