<a href="https://colab.research.google.com/github/kdhenderson/msds_colab_notebooks/blob/main/MSDS_Workshop_Fine_Tuning_Part_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ✅ 1. Install Dependencies
!pip install -q unsloth bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
!pip install -q sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer


In [None]:
# ✅ 2. Import Libraries
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template, train_on_responses_only
from datasets import load_dataset, Dataset
from transformers import DataCollatorForSeq2Seq
from trl import SFTConfig, SFTTrainer
import torch
import json

In [None]:
# ✅ 3. Set Parameters
max_seq_length = 2048
dtype = None
load_in_4bit = True

In [None]:
# ✅ 4. Load Base Model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

In [None]:
# ✅ 5. Add LoRA Adapters (required for 4-bit finetuning)
model = FastLanguageModel.get_peft_model(
    model,
    r = 8,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,  # how much weight do you want to put on the new matrix vs pretrained (bigger more weight on fine-tuned data)
    lora_dropout = 0.05,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 42,
    use_rslora = False,
    loftq_config = None,
)

In [None]:
# ✅ 6. Create Example Train + Test JSONL Files
train_data = [
    {"conversations": [
        {"role": "user", "content": "What is the HW percentage in DS 6371?"},
        {"role": "assistant", "content": "The homework percentage in 6371 is 10%"}
    ]},
    {"conversations": [
        {"role": "user", "content": "What is the midterm percentage in DS 6371?"},
        {"role": "assistant", "content": "The midterm is worth 25% in DS 6371"}
    ]},
    {"conversations": [
        {"role": "user", "content": "What is the Final Exam worth in DS6371"},
        {"role": "assistant", "content": "The final exam is worth 25% of the grade in DS 6371"}
    ]},
    {"conversations": [
        {"role": "user", "content": "What is the HW percentage in DS 6371?"},
        {"role": "assistant", "content": "The homework percentage in 6371 is 10%"}
    ]},
    {"conversations": [
        {"role": "user", "content": "What is the midterm percentage in DS 6371?"},
        {"role": "assistant", "content": "The midterm is worth 25% in DS 6371"}
    ]},
    {"conversations": [
        {"role": "user", "content": "What is the Final Exam worth in DS6371"},
        {"role": "assistant", "content": "The final exam is worth 25% of the grade in DS 6371"}
    ]},
        {"conversations": [
        {"role": "user", "content": "What is the HW percentage in DS 6371?"},
        {"role": "assistant", "content": "The homework percentage in 6371 is 10%"}
    ]},
    {"conversations": [
        {"role": "user", "content": "What is the midterm percentage in DS 6371?"},
        {"role": "assistant", "content": "The midterm is worth 25% in DS 6371"}
    ]},
    {"conversations": [
        {"role": "user", "content": "What is the Final Exam worth in DS6371"},
        {"role": "assistant", "content": "The final exam is worth 25% of the grade in DS 6371"}
    ]}
]

test_data = [
    {"conversations": [
        {"role": "user", "content": "What percentage of the grade is the homework worth in DS6371?"},
        {"role": "assistant", "content": "10%"}
    ]},
    {"conversations": [
        {"role": "user", "content": "What is 5 + 7?"},
        {"role": "assistant", "content": "5 + 7 equals 12."}
    ]},
    {"conversations": [
        {"role": "user", "content": "What is the midterm percentage in DS 6371?"},
        {"role": "assistant", "content": "25%"}
    ]},
    {"conversations": [
        {"role": "user", "content": "What is the weight of the final exam in DS 6371?"},
        {"role": "assistant", "content": "It means Excitement, Respect and Celebration of Hard Work."}
    ]},
    {"conversations": [
        {"role": "user", "content": "What does Whamo mean?"},
        {"role": "assistant", "content": "It means Excitement, Respect and Celebration of Hard Work."}
    ]}
]

with open("train.jsonl", "w") as f:
    for item in train_data:
        f.write(json.dumps(item) + "\n")

with open("test.jsonl", "w") as f:
    for item in test_data:
        f.write(json.dumps(item) + "\n")

In [None]:
# ✅ 7. Load and Format Dataset
tokenizer = get_chat_template(tokenizer, chat_template="llama-3.1")

def format_conversations(example):
    text = tokenizer.apply_chat_template(example["conversations"], tokenize=False, add_generation_prompt=False)
    return {"text": text}

train_ds = load_dataset("json", data_files="train.jsonl", split="train").map(format_conversations)
test_ds = load_dataset("json", data_files="test.jsonl", split="train").map(format_conversations)

In [None]:
# ✅ 8. Tokenize and Mask Responses
train_ds = train_ds.map(lambda x: tokenizer(x["text"]), batched=True, num_proc=2)
test_ds = test_ds.map(lambda x: tokenizer(x["text"]), batched=True, num_proc=2)

In [None]:
# ✅ 9. Trainer Config
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_ds,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    data_collator=DataCollatorForSeq2Seq(tokenizer),
    dataset_num_proc=2,
    packing=False,
    args=SFTConfig(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        max_steps=40,    # epochs
        learning_rate=2e-4,
        logging_steps=1,
        output_dir="outputs",
        optim="adamw_8bit",
        seed=42,
        report_to="none"
    ),
)

In [None]:
# ✅ 10. Mask User Inputs, Only Train on Assistant Outputs
trainer = train_on_responses_only(
    trainer,
    instruction_part="<|start_header_id|>user<|end_header_id|>\n\n",
    response_part="<|start_header_id|>assistant<|end_header_id|>\n\n"
)

In [None]:

# ✅ 11. Train Model
trainer.train()


In [None]:
# ✅ 12. Inference on Test Data
from transformers import TextStreamer
FastLanguageModel.for_inference(model)  # Enable faster inference

for example in test_data:
    messages = example["conversations"][:1]  # Just the user message
    inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
    outputs = model.generate(inputs, max_new_tokens=100, temperature=0.99, top_p=0.9)
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))
