In [1]:
from pathlib import Path
from transformers import AutoModel, AutoTokenizer
from datetime import datetime
import os

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
WORKSPACE = Path.cwd().parent

OUTPUT_DIR = WORKSPACE / "output" / "step_03"

SDG_OUTPUT_DIR = WORKSPACE / "output"  / "step_02"

BASE_MODEL_NAME =  "meta-llama/Llama-3.2-1B-Instruct" #"meta-llama/Llama-3.1-8B-Instruct"

BASE_MODEL_PATH = OUTPUT_DIR / "base_model" / BASE_MODEL_NAME.split("/")[-1]

import os 

hf_token= ""



In [3]:
from huggingface_hub import login
login(token=hf_token)

In [None]:
if not BASE_MODEL_PATH.exists():
    print("Model not available locally, Downloading the model locally ")
    from transformers import AutoTokenizer, AutoModelForCausalLM
    
    # Save the model
    print(f"Loading model {BASE_MODEL_NAME}")
    model = AutoModelForCausalLM.from_pretrained(BASE_MODEL_NAME)
    model.save_pretrained(BASE_MODEL_PATH)
    print(f"Model saved to {BASE_MODEL_PATH}")
    
    
    # Save the tokenizer
    print(f"Loading tokenizer {BASE_MODEL_NAME}")
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
    tokenizer.save_pretrained(BASE_MODEL_PATH)
    print(f"Tokenizer saved to {BASE_MODEL_PATH}")
else:
    print(f"Model Available locally : {BASE_MODEL_PATH}")

Loading model meta-llama/Llama-3.2-1B-Instruct
Model saved to /Users/scharan/projects/red-hat-ai-examples/examples/knowledge-tuning/output/step_03/base_model/Llama-3.2-1B-Instruct
Loading tokenizer meta-llama/Llama-3.2-1B-Instruct
Tokenizer saved to /Users/scharan/projects/red-hat-ai-examples/examples/knowledge-tuning/output/step_03/base_model/Llama-3.2-1B-Instruct


In [5]:
# =============================================================================
# MODEL AND DATA CONFIGURATION
# =============================================================================


# Data paths for each phase
PHASE07_DATA_PATH = SDG_OUTPUT_DIR / "instructlab_phase_1_ds.jsonl" # Knowledge data
PHASE10_DATA_PATH = SDG_OUTPUT_DIR / "instructlab_phase_2_ds.jsonl"   # Skills data ONLY (no replay needed!)

# Output configuration
CHECKPOINT_BASE_DIR = OUTPUT_DIR / "checkpoints"
EXPERIMENT_PREFIX = "osft_multiphase_experiment"

# Create timestamped experiment directory
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
experiment_name = f"{EXPERIMENT_PREFIX}_{timestamp}"

print("📋 OSFT Multi-Phase Configuration")
print("=" * 50)
print(f"Base Model: {BASE_MODEL_PATH}")
print(f"Phase07 Data: {PHASE07_DATA_PATH}")
print(f"Phase10 Data: {PHASE10_DATA_PATH}")
print(f"Output Directory: {CHECKPOINT_BASE_DIR}/{experiment_name}")
print()
print("✨ Key Difference from Traditional LAB SFT:")
print("  Phase10 only needs skills data - no replay buffers!")
print("  OSFT preserves Phase07 knowledge automatically.")
print("  This workflow replaces complex LAB multi-phase training.")

📋 OSFT Multi-Phase Configuration
Base Model: /Users/scharan/projects/red-hat-ai-examples/examples/knowledge-tuning/output/step_03/base_model/Llama-3.2-1B-Instruct
Phase07 Data: /Users/scharan/projects/red-hat-ai-examples/examples/knowledge-tuning/output/step_02/instructlab_phase_1_ds.jsonl
Phase10 Data: /Users/scharan/projects/red-hat-ai-examples/examples/knowledge-tuning/output/step_02/instructlab_phase_2_ds.jsonl
Output Directory: /Users/scharan/projects/red-hat-ai-examples/examples/knowledge-tuning/output/step_03/checkpoints/osft_multiphase_experiment_20250925_123208

✨ Key Difference from Traditional LAB SFT:
  Phase10 only needs skills data - no replay buffers!
  OSFT preserves Phase07 knowledge automatically.
  This workflow replaces complex LAB multi-phase training.


In [6]:
# =============================================================================
# OSFT PROGRESSIVE UNFREEZE STRATEGY
# =============================================================================

# Phase07: Initial knowledge acquisition
PHASE07_UNFREEZE_RATIO = 0.3  # Standard ratio for knowledge learning

# Phase10: Reduced ratio for better preservation
UNFREEZE_REDUCTION = 0.1  # Reduce by 10% for each subsequent phase
PHASE10_UNFREEZE_RATIO = max(0.1, PHASE07_UNFREEZE_RATIO - UNFREEZE_REDUCTION)

print("🎯 OSFT Progressive Unfreeze Strategy")
print("=" * 50)
print(f"Phase07 (Knowledge): unfreeze_rank_ratio = {PHASE07_UNFREEZE_RATIO}")
print(f"Phase10 (Skills):    unfreeze_rank_ratio = {PHASE10_UNFREEZE_RATIO}")
print(f"Reduction:           -{UNFREEZE_REDUCTION} per phase")
print()
print("📊 Strategy Explanation:")
print(f"  • Phase07 ({PHASE07_UNFREEZE_RATIO}): More freedom to acquire new knowledge")
print(f"  • Phase10 ({PHASE10_UNFREEZE_RATIO}): Reduced to preserve Phase07 learning")
print()
print("💡 Guidelines:")
print("  • Start with 0.25-0.35 for Phase07")
print("  • Reduce by 0.05-0.15 for each subsequent phase")
print("  • Never go below 0.1 (too restrictive)")
print("  • Adjust based on your preservation needs")

🎯 OSFT Progressive Unfreeze Strategy
Phase07 (Knowledge): unfreeze_rank_ratio = 0.3
Phase10 (Skills):    unfreeze_rank_ratio = 0.19999999999999998
Reduction:           -0.1 per phase

📊 Strategy Explanation:
  • Phase07 (0.3): More freedom to acquire new knowledge
  • Phase10 (0.19999999999999998): Reduced to preserve Phase07 learning

💡 Guidelines:
  • Start with 0.25-0.35 for Phase07
  • Reduce by 0.05-0.15 for each subsequent phase
  • Never go below 0.1 (too restrictive)
  • Adjust based on your preservation needs


In [7]:
# =============================================================================
# TRAINING HYPERPARAMETERS
# =============================================================================

# Common parameters for both phases
MAX_SEQ_LEN = 8_192                 # Maximum sequence length
MAX_TOKENS_PER_GPU = 10_000         # Memory limit per GPU
NUM_EPOCHS = 2                      # Training epochs per phase
WARMUP_STEPS = 0                    # Warmup for Phase07
USE_LIGER = True                    # Enable Liger kernels for efficiency

# Phase07 specific parameters
PHASE07_BATCH_SIZE = 128            # Batch size for knowledge training
PHASE07_LEARNING_RATE = 5e-6        # Use low learning rate for better learning quality

# Phase10 specific parameters  
PHASE10_BATCH_SIZE = 128            # Can use same batch size (no replay data!)
PHASE10_LEARNING_RATE = 5e-6        # Use low learning rate for better learning quality
PHASE10_WARMUP_STEPS = 0            # No warmup

# Distributed training configuration
NPROC_PER_NODE = 8                  # Number of GPUs per node
NNODES = 1                          # Number of nodes
NODE_RANK = 0                       # Rank of this node
RDZV_ID = 47                        # Unique job ID
RDZV_ENDPOINT = "127.0.0.1:29500"   # Rendezvous endpoint

print("⚙️  Training Hyperparameters")
print("=" * 50)
print(f"Max Sequence Length: {MAX_SEQ_LEN:,}")
print(f"Max Tokens per GPU: {MAX_TOKENS_PER_GPU:,}")
print(f"Epochs per Phase: {NUM_EPOCHS}")
print()
print("Phase07 (Knowledge):")
print(f"  • Batch Size: {PHASE07_BATCH_SIZE}")
print(f"  • Learning Rate: {PHASE07_LEARNING_RATE}")
print(f"  • Warmup Steps: {WARMUP_STEPS}")
print()
print("Phase10 (Skills):")
print(f"  • Batch Size: {PHASE10_BATCH_SIZE}")
print(f"  • Learning Rate: {PHASE10_LEARNING_RATE} (reduced for preservation)")
print(f"  • Warmup Steps: {PHASE10_WARMUP_STEPS}")
print()
print(f"Distributed: {NPROC_PER_NODE} GPUs × {NNODES} nodes = {NPROC_PER_NODE * NNODES} total GPUs")

⚙️  Training Hyperparameters
Max Sequence Length: 8,192
Max Tokens per GPU: 10,000
Epochs per Phase: 2

Phase07 (Knowledge):
  • Batch Size: 128
  • Learning Rate: 5e-06
  • Warmup Steps: 0

Phase10 (Skills):
  • Batch Size: 128
  • Learning Rate: 5e-06 (reduced for preservation)
  • Warmup Steps: 0

Distributed: 8 GPUs × 1 nodes = 8 total GPUs


In [8]:
from contextlib import redirect_stdout, redirect_stderr
from io import StringIO
import time

In [9]:
import glob

def find_most_recent_checkpoint(output_dir):
    """
    Find the most recent checkpoint in the training output directory.
    
    Args:
        output_dir (str): Training output directory containing hf_format/ subdirectory
        
    Returns:
        str: Path to the most recent checkpoint
        
    Raises:
        ValueError: If no checkpoints are found
    """
    # Get all checkpoint directories under hf_format
    checkpoint_pattern = os.path.join(output_dir, "hf_format", "samples_*.0")
    checkpoint_dirs = glob.glob(checkpoint_pattern)
    
    if not checkpoint_dirs:
        raise ValueError(f"No checkpoints found in {os.path.join(output_dir, 'hf_format')}")
    
    # Find the most recently created checkpoint
    most_recent_checkpoint = max(checkpoint_dirs, key=os.path.getctime)
    
    return most_recent_checkpoint

print("✅ Checkpoint utility functions defined")

✅ Checkpoint utility functions defined


In [10]:

from training_hub import osft

In [12]:
# =============================================================================
# PHASE 1 (PHASE07): KNOWLEDGE TRAINING
# =============================================================================

phase07_output_dir = CHECKPOINT_BASE_DIR / experiment_name / "phase07_knowledge_training"

print("📚 Phase 1: Knowledge Training with OSFT")
print("=" * 60)
print(f"Starting from: {BASE_MODEL_PATH}")
print(f"Training data: {PHASE07_DATA_PATH}")
print(f"Output directory: {phase07_output_dir}")
print(f"Unfreeze ratio: {PHASE07_UNFREEZE_RATIO}")
print()

# Capture output to prevent notebook crashes
output_buffer = StringIO()
error_buffer = StringIO()

phase07_start_time = time.time()

try:
    with redirect_stdout(output_buffer), redirect_stderr(error_buffer):
        # Phase07 OSFT training
        phase07_result = osft(
            # Model and data
            model_path=f"{BASE_MODEL_PATH}",
            data_path=f"{PHASE07_DATA_PATH}",
            ckpt_output_dir=f"{phase07_output_dir}",
            
            # OSFT-specific
            unfreeze_rank_ratio=PHASE07_UNFREEZE_RATIO,
            
            # Training parameters
            num_epochs=NUM_EPOCHS,
            effective_batch_size=PHASE07_BATCH_SIZE,
            learning_rate=PHASE07_LEARNING_RATE,
            max_seq_len=MAX_SEQ_LEN,
            max_tokens_per_gpu=MAX_TOKENS_PER_GPU,
            
            # Data processing
            data_output_dir=os.path.join(phase07_output_dir, "data_processing"),
            warmup_steps=WARMUP_STEPS,
            
            # Optimization
            use_liger=USE_LIGER,
            seed=42,
            lr_scheduler="cosine",
            
            # Checkpointing
            checkpoint_at_epoch=True,
            save_final_checkpoint=True,
            
            # Distributed training
            nproc_per_node=NPROC_PER_NODE,
            nnodes=NNODES,
            node_rank=NODE_RANK,
            rdzv_id=RDZV_ID,
            rdzv_endpoint=RDZV_ENDPOINT,
        )
    
    phase07_duration = time.time() - phase07_start_time
    
    print(f"✅ Phase07 completed successfully in {phase07_duration/3600:.2f} hours!")
    print(f"📁 Checkpoint saved to: {phase07_output_dir}")
    print()
    print("📊 Phase07 Achievements:")
    print("  • Base model capabilities: ✅ Preserved")
    print("  • New knowledge integrated: ✅ Complete")
    print("  • Ready for Phase10: ✅ Yes")
    
    # Find the most recent checkpoint for Phase10
    PHASE07_CHECKPOINT = find_most_recent_checkpoint(phase07_output_dir)
    print(f"📁 Found most recent Phase07 checkpoint: {PHASE07_CHECKPOINT}")
    print(f"📁 Ready for Phase10 training!")
    
except Exception as e:
    print(f"❌ Phase07 training failed: {e}")
    print("\nError details:")
    print(error_buffer.getvalue())
    raise

📚 Phase 1: Knowledge Training with OSFT
Starting from: /Users/scharan/projects/red-hat-ai-examples/examples/knowledge-tuning/output/step_03/base_model/Llama-3.2-1B-Instruct
Training data: /Users/scharan/projects/red-hat-ai-examples/examples/knowledge-tuning/output/step_02/instructlab_phase_1_ds.jsonl
Output directory: /Users/scharan/projects/red-hat-ai-examples/examples/knowledge-tuning/output/step_03/checkpoints/osft_multiphase_experiment_20250925_123208/phase07_knowledge_training
Unfreeze ratio: 0.3

❌ Phase07 training failed: Training failed. Please check the logs at /Users/scharan/projects/red-hat-ai-examples/examples/knowledge-tuning/output/step_03/checkpoints/osft_multiphase_experiment_20250925_123208/phase07_knowledge_training/training_log_node0.log for details.

Error details:
Generating train split: 53 examples [00:00, 3444.88 examples/s]
Ensuring dataset is compatible with legacy format. (num_proc=8): 100%|##########| 53/53 [00:00<00:00, 310.40 examples/s]
Converting samples 

RuntimeError: Training failed. Please check the logs at /Users/scharan/projects/red-hat-ai-examples/examples/knowledge-tuning/output/step_03/checkpoints/osft_multiphase_experiment_20250925_123208/phase07_knowledge_training/training_log_node0.log for details.