In [None]:
# cell 1 — 參數、路徑與基本設定

import os, json, random, re, unicodedata
import torch
import matplotlib.pyplot as plt
from collections import defaultdict, Counter
from datasets import load_dataset
from transformers import (
    AutoTokenizer, TrainingArguments, GenerationConfig,
    AutoModelForCausalLM, AutoModel
)
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer
import torch.nn as nn

# 固定種子
torch.manual_seed(42); random.seed(42)


MODEL_NAME = "MediaTek-Research/Llama-Breeze2-3B-Instruct-v0_1"

cot_train_data_file = r"./0801/train_chatml_cot.jsonl"
ao_train_data_file  = r"./0801/train_chatml.jsonl"
val_data_file       = r"./0801/val_chatml.jsonl"
test_data_file      = r"./0801/test_chatml.jsonl"

# 輸出
OUTPUT_DIR_COT = "./breeze2-lora-cot-02"
OUTPUT_DIR_AO  = "./breeze2-lora-ao-02"
os.makedirs(OUTPUT_DIR_COT, exist_ok=True)
os.makedirs(OUTPUT_DIR_AO,  exist_ok=True)
os.makedirs("results",      exist_ok=True)

# 超參
per_device_train_batch_size = 1
gradient_accumulation_steps = 16
num_train_epochs = 20
learning_rate = 2e-5
weight_decay = 0.05
warmup_ratio = 0.08
max_grad_norm = 0.5
logging_steps = 20
eval_steps = 200
save_steps = 200
max_seq_length = 1024
max_new_tokens = 1024  # 評估/存輸出

# 生成設定
gen_cfg = GenerationConfig(
    max_new_tokens=max_new_tokens,
    do_sample=False,         
    temperature=None,
    top_p=None,
    repetition_penalty=1.05,
)

# 提示詞
system_prompt_cot = (
    "你是一個歷史學家，請你針對問題在腦中進行詳細的邏輯推理。你的輸出必須包含完整的推理過程和最終答案。格式為：推理過程：\[你的完整推理過程\]最終答案：\[你的最終結論\]"
)
system_prompt_ao  = (
    "你是一個歷史學家。請針對問題，直接給出最終答案。格式為：最終答案：\[你的最終結論\]"
)

torch.set_float32_matmul_precision("high")
torch.backends.cuda.matmul.allow_tf32 = True
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer.truncation_side = "left"
tokenizer.img_context_token_id = None

  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda
分詞器已成功載入！


In [None]:
# cell 2 — 載入 Breeze2 wrapper → 抽出 language_model

# 按模型卡示例設置 img_context_token_id（對 wrapper 友善）
IMG_CTX_ID = 128212

# 只用官方 wrapper 來載入，避免 AutoConfig 對 'internvl_chat' 的直載問題
wrap = AutoModel.from_pretrained(
    MODEL_NAME,
    trust_remote_code=True,
    torch_dtype=(torch.float16 if torch.cuda.is_available() else torch.float32),
    low_cpu_mem_usage=True,
    device_map=None,
    img_context_token_id=IMG_CTX_ID,
).eval().to(DEVICE)

# 抽出純文字 LLM（這個才交給 LoRA/TRL）
if hasattr(wrap, "language_model"):
    base_model = wrap.language_model
else:
    for cand in ("llm", "text_model", "lm", "model"):
        if hasattr(wrap, cand):
            base_model = getattr(wrap, cand)
            break
    else:
        raise RuntimeError("找不到文字 LLM 子模組（language_model/llm/text_model/lm/model）。")

base_model.to(DEVICE).eval()
print("OK: 取得文字 LLM =", type(base_model).__name__)

FlashAttention2 is not installed.


Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.71s/it]


OK: 取得文字 LLM = LlamaForCausalLM


In [3]:
# cell 3 — 將 ChatML 展平成純文字樣本（dataset_text_field='text' 用）

SYS_FALLBACK = "請在腦中逐步推理，但最終只輸出一行：最終答案：XXXX"

def formatting_prompts_func(batch):
    outs = []
    for msgs in batch["messages"]:
        m = msgs
        if not m or m[0].get("role") != "system":
            m = [{"role":"system","content": SYS_FALLBACK}] + m
        text = tokenizer.apply_chat_template(
            m, tokenize=False, add_generation_prompt=False
        )
        outs.append(text)
    return {"text": outs}

In [None]:
# cell 4 — LoRA

from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    target_modules="all-linear",
    task_type="CAUSAL_LM",
)

def build_lora_model():
    m = get_peft_model(base_model, lora_config)
    m.print_trainable_parameters()
    return m

In [None]:
# cell 5 — Logging callback

from transformers import TrainerCallback
from collections import defaultdict
import matplotlib.pyplot as plt

class CustomLoggingCallback(TrainerCallback):
    def __init__(self):
        self.logs = defaultdict(list)
    def on_log(self, args, state, control, logs=None, **kwargs):
        if getattr(state, "is_local_process_zero", True):
            if logs is None: return
            if "loss" in logs:
                self.logs["train_loss"].append(logs["loss"])
            if "eval_loss" in logs:
                self.logs["eval_loss"].append(logs["eval_loss"])

def plot_losses(logs, title, output_path):
    plt.figure(figsize=(10, 6))
    if logs['train_loss']:
        plt.plot(logs['train_loss'], label='Training Loss')
    if logs['eval_loss']:
        gap = max(1, len(logs['train_loss']) // max(1, len(logs['eval_loss'])))
        xs = list(range(0, gap*len(logs['eval_loss']), gap))
        plt.plot(xs, logs['eval_loss'], 'o-', label='Validation Loss')
    plt.title(title); plt.xlabel('Training Steps'); plt.ylabel('Loss')
    plt.legend(); plt.grid(True); plt.tight_layout()
    plt.savefig(output_path, dpi=150)
    plt.close() 

In [None]:
# cell 6 — 單次訓練函式（CoT / AO 都用）

from datasets import load_dataset
from transformers import TrainingArguments
from trl import SFTTrainer
import os
import torch
import glob

def train_and_save_model(train_file, val_file, output_dir, model_type):
    print(f"\n=== 開始訓練 {model_type} 模型（_02） ===")

    # 重新掛一個新的 LoRA（避免重複疊 adapter）
    peft_model = build_lora_model()

    # 讀資料並轉為純文字
    train_ds = load_dataset("json", data_files={"train": train_file}, split="train")
    val_ds = load_dataset("json", data_files={"validation": val_file}, split="validation")
    train_text = train_ds.map(formatting_prompts_func, batched=True)
    val_text = val_ds.map(formatting_prompts_func, batched=True)

    cb = CustomLoggingCallback()

    args = TrainingArguments(
        output_dir=output_dir,
        per_device_train_batch_size=per_device_train_batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        num_train_epochs=num_train_epochs,
        learning_rate=learning_rate,
        weight_decay=weight_decay,
        warmup_ratio=warmup_ratio,
        max_grad_norm=max_grad_norm,
        lr_scheduler_type="linear",
        logging_steps=logging_steps,
        eval_steps=eval_steps,
        save_steps=save_steps,
        evaluation_strategy="steps",
        save_strategy="steps",
        save_total_limit=3,
        load_best_model_at_end=True,
        fp16=True,
        report_to="none",
    )

    trainer = SFTTrainer(
        model=peft_model,
        tokenizer=tokenizer,
        dataset_text_field="text",
        max_seq_length=max_seq_length,
        packing=True,
        args=args,
        train_dataset=train_text,
        eval_dataset=val_text,
        callbacks=[cb],
    )

    # 判斷是否從檢查點恢復
    # 檢查 output_dir 是否存在，且裡面是否有 checkpoint 檔案
    checkpoint_dirs = glob.glob(os.path.join(output_dir, "checkpoint-*"))
    if len(checkpoint_dirs) > 0:
        print(f"在 {output_dir} 找到檢查點，從上一次訓練繼續...")
        trainer.train(resume_from_checkpoint=True)
    else:
        print(f"在 {output_dir} 沒有找到檢查點，從頭開始訓練...")
        trainer.train()

    trainer.save_model(output_dir)

    plot_losses(cb.logs, f"{model_type} Training & Validation Loss", f"results/{model_type.lower()}_loss_02.png")

    # 合併 LoRA → 單一 LLM（純文字）
    merged = trainer.model.merge_and_unload()
    merged_dir = os.path.join(output_dir, "final_02")
    os.makedirs(merged_dir, exist_ok=True)
    merged.save_pretrained(merged_dir)
    tokenizer.save_pretrained(merged_dir)
    print(f"訓練完成！最終模型已保存於 {merged_dir}")

    # 釋放
    del trainer, peft_model, merged
    torch.cuda.empty_cache()
    return merged_dir

In [None]:
# cell 7 — 執行訓練（CoT / AO）

merged_dir_cot_02 = train_and_save_model(cot_train_data_file, val_data_file, OUTPUT_DIR_COT, "CoT")
merged_dir_ao_02  = train_and_save_model(ao_train_data_file,  val_data_file, OUTPUT_DIR_AO,  "AO")
print("完成訓練。")



=== 開始訓練 CoT 模型（_02） ===
trainable params: 24,313,856 || all params: 3,631,066,112 || trainable%: 0.6696



Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
  super().__init__(


在 ./breeze2-lora-cot-02 找到檢查點，從上一次訓練繼續...


  torch.load(os.path.join(checkpoint, OPTIMIZER_NAME), map_location=map_location)
  0%|          | 0/4480 [00:00<?, ?it/s]


{'train_runtime': 0.2485, 'train_samples_per_second': 288712.248, 'train_steps_per_second': 18029.424, 'train_loss': 0.0, 'epoch': 19.99}


  plt.legend(); plt.grid(True); plt.tight_layout()


Loss 圖表已保存到 results/cot_loss_02.png
訓練完成！最終模型已保存於 ./breeze2-lora-cot-02\final_02

=== 開始訓練 AO 模型（_02） ===
trainable params: 24,313,856 || all params: 3,631,066,112 || trainable%: 0.6696



Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
  super().__init__(


在 ./breeze2-lora-ao-02 找到檢查點，從上一次訓練繼續...


  torch.load(os.path.join(checkpoint, OPTIMIZER_NAME), map_location=map_location)
  0%|          | 0/1160 [00:00<?, ?it/s]


{'train_runtime': 0.4075, 'train_samples_per_second': 45982.427, 'train_steps_per_second': 2846.298, 'train_loss': 0.0, 'epoch': 19.67}


  plt.legend(); plt.grid(True); plt.tight_layout()


Loss 圖表已保存到 results/ao_loss_02.png
訓練完成！最終模型已保存於 ./breeze2-lora-ao-02\final_02
兩個模型皆已完成訓練並合併存檔。


In [None]:
# cell 8 — 載入合併後模型並評估 / 輸出（_02）

# 引入必要的函式庫
import os, json, gc
import torch
from datasets import load_dataset
from transformers import AutoModel, AutoModelForCausalLM, GenerationConfig

# 推論設定
generation_config = GenerationConfig(
    max_new_tokens=max_new_tokens,
    do_sample=True, temperature=0.01, top_p=0.01,
    repetition_penalty=1.1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id,
)

def generate_prompt(question, model_type):
    if model_type == "base":
        messages = [{"role": "user", "content": question}]
        return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
    system_p = system_prompt_cot if model_type == "cot" else system_prompt_ao
    messages = [
        {"role": "system", "content": system_p},
        {"role": "user", "content": question}
    ]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    return prompt

@torch.no_grad()
def infer(model, tokenizer, prompt):
    inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False).to(model.device)
    outputs = model.generate(**inputs, generation_config=generation_config, pad_token_id=tokenizer.eos_token_id)
    output_str = tokenizer.decode(outputs[0])
    return output_str.replace(prompt, "")

test_dataset = load_dataset("json", data_files={"test": test_data_file}, split="test")

def save_outputs(model, tokenizer, dataset, file_path, model_type):
    print(f"開始保存 {model_type} 模型輸出到 {file_path} ...")
    with open(file_path, "w", encoding="utf-8") as f:
        for ex in dataset:
            user_message = ex["messages"][1]["content"]
            prompt = generate_prompt(user_message, model_type)
            raw_output = infer(model, tokenizer, prompt)
            rec = {
                "question": user_message,
                "raw_output": raw_output,
                "target": ex["messages"][2]["content"],
            }
            f.write(json.dumps(rec, ensure_ascii=False) + "\n")
    print(f"完成！輸出已保存到 {file_path}")

# 設定輸出檔案路徑
BASE_MODEL_OUTPUTS = "results/base_model_outputs_02.jsonl"
FT_COT_OUTPUTS = "results/ft_cot_outputs_02.jsonl"
FT_AO_OUTPUTS  = "results/ft_ao_outputs_02.jsonl"

# 1) 載入並評估原生模型 
print("=== 載入並評估原生模型（使用原始提示）===")
try:
    wrap_base = AutoModel.from_pretrained(
        MODEL_NAME,
        trust_remote_code=True,
        torch_dtype=(torch.float16 if torch.cuda.is_available() else torch.float32),
        low_cpu_mem_usage=True,
        device_map="auto",
        img_context_token_id=128212,
    ).eval()
    if hasattr(wrap_base, "language_model"):
        base_model_for_eval = wrap_base.language_model
    else:
        for cand in ("llm", "text_model", "lm", "model"):
            if hasattr(wrap_base, cand):
                base_model_for_eval = getattr(wrap_base, cand)
                break
        else:
            raise RuntimeError("找不到文字 LLM 子模組。")
    base_model_for_eval.to(DEVICE).eval()
    save_outputs(base_model_for_eval, tokenizer, test_dataset, BASE_MODEL_OUTPUTS, "base")
except Exception as e:
    print(f"原生模型載入或評估失敗，錯誤：{e}")
finally:
    try:
        del base_model_for_eval, wrap_base
    except:
        pass
    torch.cuda.empty_cache(); gc.collect()

# 2) 載入並評估 CoT 微調後模型 
print("\n=== 載入並評估 CoT 模型（使用 COT 提示）===")
try:
    merged_model_cot = AutoModelForCausalLM.from_pretrained(
        os.path.join(OUTPUT_DIR_COT, "final_02"),
        torch_dtype=(torch.float16 if torch.cuda.is_available() else torch.float32),
        trust_remote_code=True,
        device_map="auto",
    ).eval()
    save_outputs(merged_model_cot, tokenizer, test_dataset, FT_COT_OUTPUTS, "cot")
finally:
    try:
        del merged_model_cot
    except:
        pass
    torch.cuda.empty_cache(); gc.collect()

# 3) 載入並評估 AO 微調後模型 
print("\n=== 載入並評估 AO 模型（使用 AO 提示）===")
try:
    merged_model_ao = AutoModelForCausalLM.from_pretrained(
        os.path.join(OUTPUT_DIR_AO, "final_02"),
        torch_dtype=(torch.float16 if torch.cuda.is_available() else torch.float32),
        trust_remote_code=True,
        device_map="auto",
    ).eval()
    save_outputs(merged_model_ao, tokenizer, test_dataset, FT_AO_OUTPUTS, "ao")
finally:
    try:
        del merged_model_ao
    except:
        pass
    torch.cuda.empty_cache(); gc.collect()

print("\n所有模型的評估已完成。")

=== 載入並評估原生模型（使用原始提示）===


Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.64s/it]
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


開始保存 base 模型輸出到 results/base_model_outputs_02.jsonl ...
完成！輸出已保存到 results/base_model_outputs_02.jsonl

=== 載入並評估 CoT 模型（使用 COT 提示）===


Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.59s/it]


開始保存 cot 模型輸出到 results/ft_cot_outputs_02.jsonl ...
完成！輸出已保存到 results/ft_cot_outputs_02.jsonl

=== 載入並評估 AO 模型（使用 AO 提示）===


Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.19s/it]


開始保存 ao 模型輸出到 results/ft_ao_outputs_02.jsonl ...
完成！輸出已保存到 results/ft_ao_outputs_02.jsonl

所有模型的評估已完成。
