In [1]:
%%capture
import os, importlib.util
!pip install --upgrade -qqq uv
if importlib.util.find_spec("torch") is None or "COLAB_" in "".join(os.environ.keys()):    
    try: import numpy, PIL; get_numpy = f"numpy=={numpy.__version__}"; get_pil = f"pillow=={PIL.__version__}"
    except: get_numpy = "numpy"; get_pil = "pillow"
    !uv pip install -qqq \
        "torch>=2.8.0" "triton>=3.4.0" {get_numpy} {get_pil} torchvision bitsandbytes "transformers==4.56.2" \
        "unsloth_zoo[base] @ git+https://github.com/unslothai/unsloth-zoo" \
        "unsloth[base] @ git+https://github.com/unslothai/unsloth" \
        git+https://github.com/triton-lang/triton.git@05b2c186c1b6c9a08375389d5efe9cb4c401c075#subdirectory=python/triton_kernels
elif importlib.util.find_spec("unsloth") is None:
    !uv pip install -qqq unsloth
!uv pip install --upgrade --no-deps transformers==4.56.2 tokenizers trl==0.22.2 unsloth unsloth_zoo

In [2]:
# ==============================
# 1Ô∏è‚É£ Install Required Packages
# ==============================


# ==============================
# 2Ô∏è‚É£ Import Libraries
# ==============================
#import torch
#from unsloth import FastLanguageModel 
#from unsloth.trainer import SFTTrainer
#from datasets import load_dataset, Dataset


In [3]:
# ==============================
# 3Ô∏è‚É£ Basic Fine Tune Config
# ==============================

# Define your custom system prompt
CUSTOM_SYSTEM_PROMPT = """\
You are a highly professional, concise technical expert across modern computing domains ‚Äî 
including software architecture, cloud infrastructure, data systems, machine learning, and applied AI.

Your task is to:
- Answer the user‚Äôs question using the provided CONTEXT as your primary source.
- If the CONTEXT does not contain enough information, use your own knowledge,
  but clearly distinguish between context-based and general reasoning.

Your responses must be:
- Structured ‚Äî use clear formatting and logical reasoning.
- Contextual ‚Äî rely only on the information available.
- Concise ‚Äî eliminate filler words while preserving precision.
- Aligned with industry best practices ‚Äî modern, reproducible, and standards-based.
"""

# --- Configuration ---
#MAX_SEQ_LEN = 4096 # Retain the VRAM safety length
MAX_SEQ_LEN = 1024
DPO_TEST_SIZE = 100 # Using 100 rows for a quick test run
LEARNING_RATE = 2e-5 # <--- INCREASED LEARNING RATE for DPO stability
OUTPUT_DIR = "sft_output" # New output directory
LORA_RANK = 16 # <--- INCREASED LORA RANK for better learning capacity

In [4]:
from unsloth import FastLanguageModel
import torch


# ==============================
# 3Ô∏è‚É£  Load FastLanguageModel + Tokenizer
# ==============================
# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/gpt-oss-20b-unsloth-bnb-4bit", # 20B model using bitsandbytes 4bit quantization
    "unsloth/gpt-oss-120b-unsloth-bnb-4bit",
    "unsloth/gpt-oss-20b", # 20B model using MXFP4 format
    "unsloth/gpt-oss-120b",
] # More models at https://huggingface.co/unsloth

dtype=None

# Unsloth recommended: returns both model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gpt-oss-20b-unsloth-bnb-4bit",
    
    # Hopper GPUs BF16 optimization, None for auto detection
    dtype=dtype, 
    
    # The model‚Äôs internal attention window ‚Äì i.e. how many tokens it can actually process at once during forward/backward passes
    max_seq_length = MAX_SEQ_LEN,

    # 4 bit quantization to reduce memory
    load_in_4bit = True,
    
    # False means with QLoRA/LoRA
    # [NEW!] unsloth have full finetuning now!
    full_finetuning = False,
    
    # token = "hf_...",              # use one if using gated models
)

print("\n‚úÖ FastLanguageModel + tokenizer loaded successfully")

!uv pip list | grep unsloth


ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
INFO 11-08 05:38:10 [__init__.py:216] Automatically detected platform cuda.
ERROR 11-08 05:38:11 [fa_utils.py:57] Cannot use FA version 2 is not supported due to FA2 is only supported on devices with compute capability >= 8
ü¶• Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.11.2: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gpt_oss won't work! Using float32.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]


‚úÖ FastLanguageModel + tokenizer loaded successfully
[2mUsing Python 3.12.11 environment at: /home/zeus/miniconda3/envs/cloudspace[0m
unsloth                           2025.11.2
unsloth-zoo                       2025.11.3


In [5]:
!uv pip list | grep unsloth

[2mUsing Python 3.12.11 environment at: /home/zeus/miniconda3/envs/cloudspace[0m
unsloth                           2025.11.2
unsloth-zoo                       2025.11.3


In [6]:
from datasets import load_dataset

# ==============================
# 4Ô∏è‚É£ Load Dataset, Split Dataset into Train / Validation
# ==============================
dataset_path = "./train_sft_final.jsonl"
raw_dataset = load_dataset("json", data_files={"train": dataset_path})

full_dataset = raw_dataset["train"]

full_dataset = full_dataset.select(range(100))

print(f"\n‚úÖ Total samples: {len(full_dataset)}")
print(f"\n‚úÖ Inspect the first entry of the data:\n\n {full_dataset[0]}")



# 95% train, 5% validation
split_dataset = full_dataset.train_test_split(test_size=0.05, seed=42)
train_dataset = split_dataset["train"]
val_dataset = split_dataset["test"]

print(f"\n‚úÖ Train samples: {len(train_dataset)}")
print(f"\n‚úÖ Validation samples: {len(val_dataset)}")

def inspect_message_with_chat_template(example, tokenizer):
    messages = [
        {"role": "system", "content": CUSTOM_SYSTEM_PROMPT},
        {"role": "user", "content": example["instruction"]},
        {"role": "assistant", "content": example["response"]},
    ]
    formatted_text = tokenizer.apply_chat_template(messages, add_generation_prompt=False, tokenize=False)
    print("-" * 50)
    print("\n‚úÖ Inspect data after apply chat template\n")
    print(formatted_text[:500])
    print("-" * 50)
    
inspect_message_with_chat_template(train_dataset[0], tokenizer)
inspect_message_with_chat_template(val_dataset[0], tokenizer)



‚úÖ Total samples: 100

‚úÖ Inspect the first entry of the data:

 {'instruction': 'Compile a visually appealing list of at least ten distinctive dips and gravies that can be paired with a range of dishes prepared on your electric griddle. Make sure to include both sweet and savory options, as well as dips that cater to different dietary restrictions such as vegan, gluten-free, or low-fat. Additionally, provide brief descriptions of each dip or gravy highlighting its key ingredients, flavor profile, and suggested griddle dishes to accompany it.', 'response': '1. Caramelized Onion and Garlic Dip - This savory dip pairs perfectly with breakfast dishes such as eggs, bacon, and pancakes or can be used as a topping for burgers and sandwiches. It is made with caramelized onions, garlic, cream cheese, and sour cream, and has a sweet and tangy flavor.\n \n2. Spicy Avocado Dip - This vegan option is perfect for those looking for a healthy dip option. Made with ripe avocados, jalapenos, lime ju

In [7]:
# ==============================
# 5Ô∏è‚É£  Tokenize both Train & Validation Datasets with chat template
# ==============================
def tokenize_fn_old(example, tokenizer):
    
    messages = [
        {"role": "system", "content": CUSTOM_SYSTEM_PROMPT},
        {"role": "user", "content": example.get("instruction", "")},
        {"role": "assistant", "content": example.get("response", "")},
    ]

    tokenized_chat_wrapped = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=False,
        tokenize=True,
    )

    #return tokenized_chat_wrapped
    # Return a dictionary so Hugging Face can build an Arrow table
    return {"input_ids": tokenized_chat_wrapped, 
            "attention_mask": [1] * len(tokenized_chat_wrapped)}


def tokenize_fn_problem(batch, tokenizer):
    # build texts
    texts = [
        tokenizer.apply_chat_template(
            [
                {"role": "system", "content": CUSTOM_SYSTEM_PROMPT},
                {"role": "user", "content": instr},
                {"role": "assistant", "content": resp},
            ],
            tokenize=False,
            add_generation_prompt=False,
        )
        for instr, resp in zip(batch["instruction"], batch["response"])
    ]

    # vectorized tokenizer call
    tokenized = tokenizer(
        texts,
        #truncation=True,
        #padding="max_length",   # or padding=False to let Trainer handle dynamic padding
        #padding_side = "right",
        truncation=False,  # <--- CHANGED: Set to False
        padding=False,     # <--- CHANGED: Set to False
        #max_length=MAX_SEQ_LEN,
        return_attention_mask=True,
        return_tensors=None,    # keep Python lists, HF Dataset friendly
    )

    return {
        "input_ids": tokenized["input_ids"],
        "attention_mask": tokenized["attention_mask"]
    }


def tokenize_fn(batch):
    # build texts
    texts = [
        tokenizer.apply_chat_template(
            [
                {"role": "system", "content": CUSTOM_SYSTEM_PROMPT},
                {"role": "user", "content": instr},
                {"role": "assistant", "content": resp},
            ],
            tokenize=False,
            add_generation_prompt=False,
        )
        for instr, resp in zip(batch["instruction"], batch["response"])
    ]

    return { "text" : texts, }


from unsloth.chat_templates import standardize_sharegpt

train_dataset = train_dataset.map(tokenize_fn, batched = True)
val_dataset = val_dataset.map(tokenize_fn, batched = True)

# Apply the formatting using a lambda function to pass the tokenizer
# map() can only pass the dataset batch, not extra arguments.
#train_dataset = train_dataset.map(
#    lambda x: tokenize_fn(x, tokenizer),
#    remove_columns=train_dataset.column_names,
#    num_proc=4, # Use multiple cores for fast processing
#    desc="Mapping self dataet for SFT train"
#)
#val_dataset = val_dataset.map(
#    lambda x: tokenize_fn(x, tokenizer),
#    remove_columns=val_dataset.column_names,
#    num_proc=4, # Use multiple cores for fast processing
#    desc="Mapping self dataet for SFT validation"
#)   

print("\n‚úÖTokenization complete")

#sample = val_dataset[0]
#print("input_ids (first 1 tokens):", sample["input_ids"][:1])
#print("attention_mask (first 1 tokens):", sample["attention_mask"][:1])
val_dataset
print(val_dataset)


‚úÖTokenization complete
Dataset({
    features: ['instruction', 'response', 'source', 'text'],
    num_rows: 5
})


In [8]:
from trl import SFTConfig, SFTTrainer
# TBD try unsloth later
#from unsloth.trainer import SFTTrainer 

# ==============================
# 6Ô∏è‚É£   PEFT settting
# ==============================
model = FastLanguageModel.get_peft_model(
    model,
    r = LORA_RANK, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 2*LORA_RANK,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

print("--- 1. Model and Adapter Check ---")
# This print statement now shows the doubled number of trainable parameters
print(f"\n‚úÖBase Model Parameters: {model.num_parameters()}\n (Trainable: {model.get_nb_trainable_parameters()})\n")

Unsloth: Making `model.base_model.model.model` require gradients
--- 1. Model and Adapter Check ---

‚úÖBase Model Parameters: 20922719808
 (Trainable: (7962624, 20922719808))



In [9]:
# ==============================
# 7Ô∏è‚É£ Training Arguments
# ==============================

trainer_args = {
    # How many tokens per example are kept during dataset preprocessing (tokenization, padding, truncation).
    "max_seq_length": MAX_SEQ_LEN,
    
    "output_dir": OUTPUT_DIR,
    
    # H100; 48 for H200
    #"per_device_train_batch_size": 32,
    
    "per_device_train_batch_size": 2, 
    
    # adjust to reach effective batch
    #"gradient_accumulation_steps": 4,
    
    "gradient_accumulation_steps": 16,
    
    "learning_rate": LEARNING_RATE,
    
    "num_train_epochs": 1,
    
    "logging_steps": 5,
    
    "save_strategy": "steps",
    
    "save_steps": 50,
    
    "save_total_limit": 2,
    
    #"bf16": True,
    "bf16": False,
    
    #"fp16": False,
    "fp16": True,
    
    "optim": "paged_adamw_32bit",
    
    "dataloader_num_workers": 4,
    
    "report_to": "none",
    
    # run validation during training
    "evaluation_strategy": "steps",  
    
     # validation every 50 steps
    "eval_steps": 50                
}


In [10]:
# ==============================
# 8Ô∏è‚É£ Initialize SFTTrainer with Validation
# ==============================
trainer = SFTTrainer(
    model=model,
    
    args=trainer_args,
    
    train_dataset=train_dataset,
    
    # validation dataset included
    eval_dataset=val_dataset,
    
    tokenizer=tokenizer,
    
    packing=True
    
    #packing=False
)
print(SFTConfig.__dataclass_fields__.keys())

print(f"\n‚úÖ SFTConfig Parameters:\n {SFTConfig.__dataclass_fields__.keys()}\n")

Unsloth: Switching to float32 training since model cannot work with float16


num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5.
num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5.


dict_keys(['output_dir', 'overwrite_output_dir', 'do_train', 'do_eval', 'do_predict', 'eval_strategy', 'prediction_loss_only', 'per_device_train_batch_size', 'per_device_eval_batch_size', 'per_gpu_train_batch_size', 'per_gpu_eval_batch_size', 'gradient_accumulation_steps', 'eval_accumulation_steps', 'eval_delay', 'torch_empty_cache_steps', 'learning_rate', 'weight_decay', 'adam_beta1', 'adam_beta2', 'adam_epsilon', 'max_grad_norm', 'num_train_epochs', 'max_steps', 'lr_scheduler_type', 'lr_scheduler_kwargs', 'warmup_ratio', 'warmup_steps', 'log_level', 'log_level_replica', 'log_on_each_node', 'logging_dir', 'logging_strategy', 'logging_first_step', 'logging_steps', 'logging_nan_inf_filter', 'save_strategy', 'save_steps', 'save_total_limit', 'save_safetensors', 'save_on_each_node', 'save_only_model', 'restore_callback_states_from_checkpoint', 'no_cuda', 'use_cpu', 'use_mps_device', 'seed', 'data_seed', 'jit_mode_eval', 'use_ipex', 'bf16', 'fp16', 'fp16_opt_level', 'half_precision_backend

In [11]:
# ==============================
# 9Ô∏è‚É£ Start Training
# ==============================
trainer.train()


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 72 | Num Epochs = 3 | Total steps = 27
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 2 x 1) = 8
 "-____-"     Trainable parameters = 7,962,624 of 20,922,719,808 (0.04% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,10.8687
2,9.4101
3,10.9641
4,9.3862
5,7.6793
6,7.3589
7,4.7325
8,3.6809
9,2.7318
10,2.4362


TrainOutput(global_step=27, training_loss=3.4462189232861555, metrics={'train_runtime': 1475.441, 'train_samples_per_second': 0.146, 'train_steps_per_second': 0.018, 'total_flos': 2.4545106573410304e+16, 'train_loss': 3.4462189232861555, 'epoch': 3.0})

In [None]:
# ==============================
# üîü Save Fine-Tuned Model
# ==============================
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

print(f"SFT model with validation saved to {output_dir}")
