In [None]:
import os


os.environ["HF_DATASETS_DISABLE_MULTIPROCESSING"] = "1"


os.environ["UNSLOTH_COMPILE_DISABLE"] = "1"


from unsloth import FastLanguageModel


import torch




# 4ビット量子化済みモデル（高速ダウンロード＆OOM回避）


max_seq_length = 4096


dtype = None




fourbit_models = [


    "unsloth/gpt-oss-20b-unsloth-bnb-4bit",  # bitsandbytes 4ビット量子化


    "unsloth/gpt-oss-120b-unsloth-bnb-4bit",


    "unsloth/gpt-oss-20b",                    # MXFP4フォーマット


    "unsloth/gpt-oss-120b",


]

# model

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gpt-oss-20b",
    dtype = dtype,  # 自動検出
    max_seq_length = max_seq_length,
    load_in_4bit = True,  # 4ビット量子化でメモリ削減
    full_finetuning = False,
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 8, # 0以上の値を入れる。8, 16, 32, 64, 128 など
    target_modules = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha = 16,
    lora_dropout = 0, # なんでもいいが 0 が最適らしい
    bias = "none",    # なんでもいいが "none" が最適らしい
    # ↓ "unsloth" にすると 30% 少ない VRAM かつ倍のサイズのバッチサイズにできるらしい
    use_gradient_checkpointing = "unsloth", # 長いコンテキスト長の時は True or "unsloth" にする
    random_state = 3407,
    use_rslora = False,  # rank stabilized LoRA
    loftq_config = None, # LoftQ
)

# dataset

In [None]:
# from datasets import load_dataset
# from unsloth.chat_templates import standardize_sharegpt

# def formatting_prompts_func(examples):
#     convos = examples["messages"]
#     texts = [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False)
#              for convo in convos]
#     return {"text": texts}

# # データセット読み込み
# dataset = load_dataset("HuggingFaceH4/Multilingual-Thinking", split="train")
# # dataset = load_dataset("teaching_data.jsonl")


# # データセットの標準化とフォーマット
# dataset = standardize_sharegpt(dataset)
# dataset = dataset.map(formatting_prompts_func, batched=True)

# # 最初のサンプルを確認
# print(type(dataset))

In [None]:
import json
from datasets import load_dataset
# dataset = []

# with open("teaching_data.jsonl", "r", encoding="utf-8") as f:
#     for line in f:
#         data = json.loads(line)
#         dataset.append(data)
# print(type(dataset[0]))
# print(dataset[0]['text'])

# class 'datasets.arrow_dataset.Dataset'に変換
dataset = load_dataset("json", data_files="teaching_data.jsonl")["train"]

# randomにシャッフル
dataset = dataset.shuffle(seed=42)

for i in range(10):
    print(dataset[i]['text'])

# train

In [None]:
from trl import SFTConfig, SFTTrainer


trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    args = SFTConfig(
        per_device_train_batch_size = 1,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 1, # Set this for 1 full training run.
        # max_steps = 30,
        max_steps = 886,
        learning_rate = 2e-4,
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        dataset_num_proc = 1,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

In [None]:
# 現在のメモリ統計を表示
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. 最大メモリ = {max_memory} GB.")
print(f"{start_gpu_memory} GB のメモリが予約されています。")

In [None]:
# 訓練開始
trainer_stats = trainer.train()

# 訓練後のメモリと時間の統計
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
print(f"訓練時間: {round(trainer_stats.metrics['train_runtime']/60, 2)} 分")
print(f"ピーク使用メモリ: {used_memory} GB")
print(f"LoRA訓練用メモリ: {used_memory_for_lora} GB")

In [None]:
# messages = [
#     {
#         "role": "system",
#         "content": "reasoning language: French\n\nあなたは数学の問題を解決できる有用なアシスタントです。"
#     },
#     {"role": "user", "content": "x^5 + 3x^4 - 10 = 3を解いてください。"},
# ]

# inputs = tokenizer.apply_chat_template(
#     messages,
#     add_generation_prompt = True,
#     return_tensors = "pt",
#     return_dict = True,
#     reasoning_effort = "medium",
# ).to(model.device)

# from transformers import TextStreamer
# _ = model.generate(**inputs, max_new_tokens = 2048, streamer = TextStreamer(tokenizer))