# Unified Trainer Test

In [1]:
!pip install -q transformers peft accelerate datasets

In [2]:
import os, json
os.makedirs("data", exist_ok=True)

sft_data = [
    {"prompt": "Hello, how are you?", "response": "I am fine, thank you."},
    {"prompt": "What is your name?", "response": "I am a tiny GPT2 model."},
]
with open("data/sft.jsonl", "w", encoding="utf-8") as f:
    for ex in sft_data:
        f.write(json.dumps(ex, ensure_ascii=False) + "\n")

dpo_data = [
    {"prompt": "Say something nice.", "chosen": "You are doing great!", "rejected": "You are terrible."},
    {"prompt": "Greet the user.", "chosen": "Hello, nice to meet you.", "rejected": "What do you want?"},
]
with open("data/dpo.jsonl", "w", encoding="utf-8") as f:
    for ex in dpo_data:
        f.write(json.dumps(ex, ensure_ascii=False) + "\n")

rl_data = [
    {"prompt": "Give a short compliment."},
    {"prompt": "Say something positive."},
]
with open("data/rl.jsonl", "w", encoding="utf-8") as f:
    for ex in rl_data:
        f.write(json.dumps(ex, ensure_ascii=False) + "\n")

print("Dummy datasets written to ./data")


Dummy datasets written to ./data


In [3]:
from google.colab import drive
from pathlib import Path
drive.mount('/content/drive')

PROJ = Path('/content/drive/MyDrive/rag_bio_project').resolve()
print('Project path:', PROJ)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Project path: /content/drive/MyDrive/rag_bio_project


In [4]:
# 挂载 + 切目录 + 修正 sys.path（Colab 一次性修好路径）
from google.colab import drive
drive.mount('/content/drive')

import os, sys, pathlib

PROJ = "/content/drive/MyDrive/rag_bio_project"
assert os.path.exists(PROJ), f"项目目录不存在: {PROJ}"

# 让 Python 能在该目录下找模块
if PROJ not in sys.path:
    sys.path.insert(0, PROJ)

# 切换工作目录（可选，但推荐）
os.chdir(PROJ)

# 检查模块是否真的在那儿
print("CWD:", os.getcwd())
print("Has module:", os.path.exists("abi_trust_pipeline.py"))
!ls -l abi_trust_pipeline.py


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
CWD: /content/drive/MyDrive/rag_bio_project
Has module: True
-rw------- 1 root root 33116 Nov 28 11:12 abi_trust_pipeline.py


In [5]:
import os, json

os.makedirs("data", exist_ok=True)

# SFT 数据
sft_data = [
    {"prompt": "Hello, how are you?", "response": "I am fine, thank you."},
    {"prompt": "What is your name?", "response": "I am a tiny GPT2 model."},
]
with open("data/sft.jsonl", "w", encoding="utf-8") as f:
    for ex in sft_data:
        f.write(json.dumps(ex, ensure_ascii=False) + "\n")

# DPO 数据
dpo_data = [
    {"prompt": "Say something nice.", "chosen": "You are doing great!", "rejected": "You are terrible."},
    {"prompt": "Greet the user.", "chosen": "Hello, nice to meet you.", "rejected": "What do you want?"},
]
with open("data/dpo.jsonl", "w", encoding="utf-8") as f:
    for ex in dpo_data:
        f.write(json.dumps(ex, ensure_ascii=False) + "\n")

# RL 数据
rl_data = [
    {"prompt": "Give a short compliment."},
    {"prompt": "Say something positive."},
]
with open("data/rl.jsonl", "w", encoding="utf-8") as f:
    for ex in rl_data:
        f.write(json.dumps(ex, ensure_ascii=False) + "\n")

print("Dummy datasets written to ./data")


Dummy datasets written to ./data


In [6]:
from unified_train import TrainConfig, train_unified


## 1. Test SFT

In [7]:
cfg_sft = TrainConfig(
    model_name="sshleifer/tiny-gpt2",
    train_objective="sft",
    sft_file="data/sft.jsonl",
    output_dir="outputs/test_sft",
    num_epochs=1,
    batch_size=2,
    tuning_strategy="full",
)
train_unified(cfg_sft)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.51M [00:00<?, ?B/s]

`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


[SFT] saved to outputs/test_sft/sft-epoch1
[SFT] final model saved to outputs/test_sft/sft-final


## 2. Test DPO

In [8]:
cfg_dpo = TrainConfig(
    model_name="sshleifer/tiny-gpt2",
    train_objective="dpo",
    dpo_file="data/dpo.jsonl",
    output_dir="outputs/test_dpo",
    num_epochs=1,
    batch_size=2,
    tuning_strategy="full",
)
train_unified(cfg_dpo)


model.safetensors:   0%|          | 0.00/2.51M [00:00<?, ?B/s]

[DPO] saved to outputs/test_dpo/dpo-epoch1
[DPO] final model saved to outputs/test_dpo/dpo-final


## 3. Test PG

In [9]:
import torch

def simple_reward(prompts, responses):
    scores = []
    for p, r in zip(prompts, responses):
        scores.append(1.0 if "good" in r.lower() or "great" in r.lower() else 0.0)
    return torch.tensor(scores, dtype=torch.float32)

cfg_pg = TrainConfig(
    model_name="sshleifer/tiny-gpt2",
    train_objective="pg",
    rl_file="data/rl.jsonl",
    output_dir="outputs/test_pg",
    num_epochs=1,
    batch_size=2,
    tuning_strategy="full",
)
train_unified(cfg_pg, reward_fn=simple_reward)


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


[PG] saved to outputs/test_pg/pg-epoch1
[PG] final model saved to outputs/test_pg/pg-final
