In [5]:
from unsloth import FastLanguageModel, is_bfloat16_supported

from trl import SFTTrainer
from transformers import TrainingArguments
from tuning.data.train_dataset import get_train_dataset
from tuning.training.config_training import ModelLoadConfig, LoraConfig, SFTRunConfig, PTRunConfig, DPOTrainingConfig, TrainingArgumentsConfig, PassAtKConfig, sft_batch_size, effective_batch_size
from tuning.training.perplexity_callback import PerplexityStoppingCallback
from tuning.training.passk_callback import PassAtKStoppingCallback
from tuning.utils.utils import apply_chat_template, chat_template_func
import json
import sys
from datasets import load_from_disk
from typing import List, Optional, Union
from pathlib import Path
from tuning.config import DATASETS_DIR, HF_MODEL_MAP
import os
from tuning.training.config_training import DatasetConfig, SFTRunConfig
from tuning.config import MODELS_DIR
from tuning.training.sft_training import train_model_sft
from tuning.training.dpo_training import train_model_dpo
import subprocess
import importlib
import wandb


In [2]:
import importlib
import tuning.training.passk_callback
importlib.reload(tuning.training.passk_callback)
from tuning.training.passk_callback import PassAtKStoppingCallback
from tuning.training.dpo_training import train_model_dpo
importlib.reload(tuning.training.dpo_training)
from tuning.training.dpo_training import train_model_dpo

In [2]:
MODEL = "llama3-8B"
total_train_size = 2048  # 29980

In [7]:
dataset_config = DatasetConfig(
    dataset = "tuluif",
    dataset_type = "sft",
    train_size = total_train_size, # 29980
)

run_config = SFTRunConfig(
    dataset_config = dataset_config,
    model_name_hf = HF_MODEL_MAP[MODEL],  # Use HuggingFace model name, not local path
    model_name = MODEL,  # Base model name for output directory construction
    do_training=True,
    do_inference=False,
    do_evaluation=False,
)
passk_config = PassAtKConfig( # this is just to dynamically view the pass@1 of ifeval
    target_pass_at_k=[0.1, 0.2, 0.3,0.4,0.5,0.6, 0.9],
    k_values=[1],
    n_samples=1,
    num_prompts=50,
    temperature=0.7,
    strict=False,
    enabled=True,
)

lora_config = LoraConfig()
model_load_config = ModelLoadConfig()
model_load_config.max_seq_length = 4096
training_args = TrainingArgumentsConfig()
training_args.eval_steps = 2
training_args.save_steps = 2

In [8]:
run = wandb.init(
    name=run_config.run_name, 
    project="tuning", 
    reinit=True,
    # Optional: Pass config here so it's logged even if training crashes early
    config=run_config.__dict__ if hasattr(run_config, "__dict__") else {} 
)

with run:
    model, tokenizer, trainer, callbacks = train_model_sft(
        run_config = run_config,
        lora_config = lora_config,
        model_load_config = model_load_config,
        training_args = training_args,
        # passk_config = passk_config
    )   

Getting train dataset for run config: llama3-8B_sft-tuluif-2048
Checking for dataset at /home/shougan/projects/aip-fredashi/shougan/balance-budget/tuning/data/datasets/sft-tuluif-2048
Dataset already exists at /home/shougan/projects/aip-fredashi/shougan/balance-budget/tuning/data/datasets/sft-tuluif-2048
Sampled dataset: DatasetDict({
    train: Dataset({
        features: ['id', 'prompt', 'messages', 'constraints'],
        num_rows: 2048
    })
    test: Dataset({
        features: ['id', 'prompt', 'messages', 'constraints'],
        num_rows: 200
    })
})
Example training row: {'id': 'personas_IF_9u3mcrurksv7hypq3xlppyba', 'prompt': 'Compile a detailed summary of the most recent case discussed on the podcast. Include the title of the episode wrapped in double angular brackets, i.e. <<title>>, and quote at least one statement made by the host during the discussion.', 'messages': [{'content': 'You are a helpful assistant who is an expert at responding to prompts by carefully followin

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'prompt', 'messages', 'constraints', 'text'],
        num_rows: 2048
    })
    test: Dataset({
        features: ['id', 'prompt', 'messages', 'constraints', 'text'],
        num_rows: 200
    })
})
{'id': 'personas_IF_9u3mcrurksv7hypq3xlppyba', 'prompt': 'Compile a detailed summary of the most recent case discussed on the podcast. Include the title of the episode wrapped in double angular brackets, i.e. <<title>>, and quote at least one statement made by the host during the discussion.', 'messages': [{'content': 'You are a helpful assistant who is an expert at responding to prompts by carefully following the given instructions', 'role': 'system'}, {'content': 'Compile a detailed summary of the most recent case discussed on the podcast. Include the title of the episode wrapped in double angular brackets, i.e. <<title>>, and quote at least one statement made by the host during the discussion.', 'role': 'user'}, {'content': "I'm

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,048 | Num Epochs = 2 | Total steps = 32
O^O/ \_/ \    Batch size per device = 16 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (16 x 8 x 1) = 128
 "-____-"     Trainable parameters = 83,886,080 of 8,114,147,328 (1.03% trained)


Step,Training Loss,Validation Loss
2,1.766,1.843255
4,1.8072,1.763545
6,1.6269,1.5824
8,1.5385,1.48587
10,1.4218,1.39153
12,1.2741,1.315719
14,1.2199,1.255882
16,1.1961,1.209554
18,1.1604,1.179342
20,1.1314,1.16853


Found HuggingFace hub cache directory: /home/shougan/.cache/huggingface/hub


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Checking cache directory for required files...


Unsloth: Copying 4 files from cache to `/home/shougan/projects/aip-fredashi/shougan/balanc


Successfully copied all 4 files from cache to `/home/shougan/projects/aip-fredashi/shougan/balance-budget/tuning/models/llama3-8B_sft-tuluif-2048`


Unsloth: Preparing safetensor model files: 100%|█████████| 4/4 [00:00<00:00, 30504.03it/s]
Unsloth: Merging weights into 16bit: 100%|██████████████████| 4/4 [00:48<00:00, 12.12s/it]


Unsloth: Merge process complete. Saved to `/home/shougan/projects/aip-fredashi/shougan/balance-budget/tuning/models/llama3-8B_sft-tuluif-2048`


0,1
eval/loss,█▇▅▄▃▃▂▂▁▁▁▁▁▁▁▁
eval/runtime,▂▁▁▁▂▂▄▃▄▃▃▃▃▆█▄
eval/samples_per_second,▇███▇▇▅▆▅▆▆▆▆▃▁▅
eval/steps_per_second,▇███▆▇▅▆▅▆▆▆▆▃▁▅
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/grad_norm,▇█▇██▇▅▅▅▆▆▆▅▃▃▄▄▃▃▁▂▂▂▁▁▁▁▁▁▁▁▁
train/learning_rate,▁▃▅▆█████▇▇▇▇▆▆▆▅▅▅▄▄▃▃▃▂▂▂▂▁▁▁▁
train/loss,██▇██▆▅▅▄▄▄▃▃▂▂▂▂▂▁▁▂▁▁▂▁▁▁▂▁▁▁▁

0,1
eval/loss,1.15184
eval/runtime,10.3334
eval/samples_per_second,19.355
eval/steps_per_second,1.258
total_flos,1.6645576771672474e+17
train/epoch,2.0
train/global_step,32.0
train/grad_norm,0.18052
train/learning_rate,0.0
train/loss,1.1164


In [None]:
ppl_callback = callbacks[-1]
metadata_file = ppl_callback.metadata_path
checkpoints = []
with open(metadata_file, "r") as f:
    for line in f:
        checkpoints.append(json.loads(line))
print(checkpoints)


[{'threshold_type': 'perplexity', 'threshold_value': 6.0, 'global_step': 40, 'checkpoint_path': '/home/shougan/projects/aip-fredashi/shougan/balance-budget/tuning/models/llama3-8B_ppl-6.00_sft-640', 'data_points_seen': 640}]


In [1]:
import torch
torch.cuda.empty_cache()
del model, tokenizer, trainer

NameError: name 'model' is not defined

In [None]:
for checkpoint in checkpoints:
    model_name = Path(checkpoint["checkpoint_path"]).name
    data = total_train_size - checkpoint["data_points_seen"] 
    model_load_config = ModelLoadConfig()
    training_args = DPOTrainingConfig()
    training_args.eval_steps = 5
    dataset_config = DatasetConfig(
        dataset = "tuluif",
        dataset_type = "pt",
        train_size = data,
    )
    sft_run_config = SFTRunConfig(
        dataset_config = DatasetConfig(
            dataset = "tuluif",
            dataset_type = "sft",
            train_size = checkpoint["data_points_seen"],
            dynamic_path = model_name
        ),
        model_name = MODEL,
        model_name_hf = HF_MODEL_MAP[MODEL], 
        task_name = "ifeval"
    )
    run_config = PTRunConfig(
        dataset_config = dataset_config,
        # model_name_hf = HF_MODEL_MAP[MODEL],  
        model_name = MODEL,  
        sft_run_config = sft_run_config,
        task_name = "ifeval",
        pft_method = "dpo",
        do_training = True
    )
    passk_config = PassAtKConfig( # this is just to dynamically view the pass@1 of ifeval
        target_pass_at_k=[1.2],
        k_values=[1],
        n_samples=1,
        num_prompts=50,
        temperature=0.7,
        strict=True,
        enabled=True,
    )
    train_model_dpo(
        run_config = run_config,
        lora_config = lora_config,
        model_load_config = model_load_config,
        training_args = training_args,
        passk_config = passk_config,
        perplexity_thresholds= [0.1] # dummy value to periodically check perplexities too
    )





Per device train batch size: 1
Getting train dataset for run config: llama3-8B_llama3-8B_ppl-6.00_sft-640_pt-tuluif-7552
Checking for dataset at /home/shougan/projects/aip-fredashi/shougan/balance-budget/tuning/data/datasets/pt-tuluif-7552
Dataset already exists at /home/shougan/projects/aip-fredashi/shougan/balance-budget/tuning/data/datasets/pt-tuluif-7552
Sampled dataset: DatasetDict({
    train: Dataset({
        features: ['id', 'prompt', 'constraints', 'chosen', 'rejected', 'chonsen_model', 'rejected_model', 'system_message'],
        num_rows: 7552
    })
    test: Dataset({
        features: ['id', 'prompt', 'constraints', 'chosen', 'rejected', 'chonsen_model', 'rejected_model', 'system_message'],
        num_rows: 200
    })
})
Example training row: {'id': 'personas_IF_dqxglsux2n8jeu59qktlnesh', 'prompt': 'Name two famous equestrian events that are part of the international jumping circuit, and format your answer by choosing one from these options: lowercase, UPPERCASE, Title 

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]