<a href="https://colab.research.google.com/github/rebekah-eichberg/playpause/blob/main/fine_tuning_rebekah.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
import os
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
!pip install --no-deps unsloth

In [2]:
%%capture
# Install latest transformers for Gemma 3N
!pip install --no-deps git+https://github.com/huggingface/transformers.git # Only for Gemma 3N
!pip install --no-deps --upgrade timm # Only for Gemma 3N

In [3]:
import torch

if torch.cuda.is_available():
    if torch.cuda.get_device_capability()[0] >= 8:
        torch_dtype = torch.bfloat16
    else:
        torch_dtype = torch.float16
    device = torch.device("cuda")
else:
    torch_dtype = torch.float32  # default for CPU
    device = torch.device("cpu")


In [7]:
%%capture
pip install --upgrade transformers accelerate bitsandbytes


In [11]:
from transformers import BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_8bit=False,
    llm_int8_enable_fp32_cpu_offload=True,
)

model = AutoModelForCausalLM.from_pretrained(
    "google/gemma-3n-e4b-it",
    device_map="auto",
    quantization_config=bnb_config)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/215 [00:00<?, ?B/s]



In [12]:
%%capture
%pip install -U transformers
%pip install -U datasets
%pip install -U accelerate
%pip install -U peft
%pip install -U trl
%pip install -U bitsandbytes
%pip install -U wandb

In [13]:
%%capture
%pip install python-docx

In [17]:
from docx import Document
import json

# Load the DOCX
doc = Document("Ages 2-4.docx")

# Intermediate buffers
qa_pairs = []
current_question = None
collecting_answer = False
current_answer = ""

for para in doc.paragraphs:
    text = para.text.strip()

    if not text:
        continue  # skip empty lines

    # Heuristic: Identify question
    if text.lower().startswith("why") or text.lower().startswith("how") or text.endswith("?"):
        if current_question and current_answer:
            qa_pairs.append((current_question, current_answer.strip()))
        current_question = text
        current_answer = ""
        collecting_answer = False

    elif "sparky's answer" in text.lower():
        # Start collecting answer from this point
        answer_start = text.split("Sparky's Answer:", 1)[-1].strip()
        current_answer = answer_start
        collecting_answer = True

    elif "wow! fact" in text.lower():
        collecting_answer = False  # End of answer

    elif collecting_answer:
        current_answer += " " + text

# Save final pair if needed
if current_question and current_answer:
    qa_pairs.append((current_question, current_answer.strip()))

# Format and save in JSONL (ChatML style)
output_path = "chatml_data.jsonl"
with open(output_path, "w", encoding="utf-8") as f:
    for question, answer in qa_pairs:
        chatml = (
            "<|begin_of_text|>\n"
            "<|user|>\n" + question + "\n"
            "<|assistant|>\n" + answer + "\n"
            "<|end_of_text|>"
        )
        json.dump({"text": chatml}, f)
        f.write("\n")

print(f"Extracted {len(qa_pairs)} question-answer pairs and saved to {output_path}")


Extracted 37 question-answer pairs and saved to chatml_data.jsonl


In [19]:
from datasets import load_dataset
dataset = load_dataset("json", data_files="chatml_data.jsonl", split="train")
dataset = dataset.shuffle().select(range(37))

In [20]:
from peft import LoraConfig

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.05,
    r=16,
    bias="none",
    target_modules="all-linear",
    task_type="CAUSAL_LM",
    modules_to_save=["lm_head", "embed_tokens"] # make sure to save the lm_head and embed_tokens as you train the special tokens
)

In [21]:
from trl import SFTConfig

args = SFTConfig(
    output_dir="gemma-ages-2-4",            # directory to save and repository id
    max_seq_length=512,                     # max sequence length for model and packing of the dataset
    packing=True,                           # Groups multiple samples in the dataset into a single sequence
    num_train_epochs=3,                     # number of training epochs
    per_device_train_batch_size=1,          # batch size per device during training
    gradient_accumulation_steps=4,          # number of steps before performing a backward/update pass
    gradient_checkpointing=True,            # use gradient checkpointing to save memory
    optim="adamw_torch_fused",              # use fused adamw optimizer
    logging_steps=10,                       # log every 10 steps
    save_strategy="epoch",                  # save checkpoint every epoch
    learning_rate=2e-4,                     # learning rate, based on QLoRA paper
    fp16=True if torch_dtype == torch.float16 else False,   # use float16 precision
    bf16=True if torch_dtype == torch.bfloat16 else False,   # use bfloat16 precision
    max_grad_norm=0.3,                      # max gradient norm based on QLoRA paper
    warmup_ratio=0.03,                      # warmup ratio based on QLoRA paper
    lr_scheduler_type="constant",           # use constant learning rate scheduler
    push_to_hub=True,                       # push model to hub
    report_to="tensorboard",                # report metrics to tensorboard
    dataset_kwargs={
        "add_special_tokens": False, # We template with special tokens
        "append_concat_token": True, # Add EOS token as separator token between examples
    }
)

In [24]:
from trl import SFTTrainer

# Create Trainer object
trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset=dataset,
    peft_config=peft_config,
    processing_class=tokenizer
)



Adding EOS to train dataset:   0%|          | 0/37 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/37 [00:00<?, ? examples/s]

Packing train dataset:   0%|          | 0/37 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [26]:
%%capture
%pip uninstall bitsandbytes -y
%pip install bitsandbytes --prefer-binary --upgrade --force-reinstall


In [27]:
trainer.train()

ValueError: You can't train a model that has been loaded in 8-bit or 4-bit precision with CPU or disk offload. If you want train the 8-bit or 4-bit model in CPU, please install bitsandbytes with multi-backend, see https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend

In [28]:
import bitsandbytes as bnb
print(bnb.__version__)
print(bnb.__file__)


0.46.1
/usr/local/lib/python3.11/dist-packages/bitsandbytes/__init__.py
