In [2]:
!pip install "transformers==4.41.2" "peft==0.11.1" "trl==0.9.4" "datasets==2.20.0" "accelerate==0.31.0" bitsandbytes

Collecting transformers==4.41.2
  Downloading transformers-4.41.2-py3-none-any.whl.metadata (43 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft==0.11.1
  Downloading peft-0.11.1-py3-none-any.whl.metadata (13 kB)
Collecting trl==0.9.4
  Downloading trl-0.9.4-py3-none-any.whl.metadata (11 kB)
Collecting datasets==2.20.0
  Downloading datasets-2.20.0-py3-none-any.whl.metadata (19 kB)
Collecting accelerate==0.31.0
  Downloading accelerate-0.31.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Collecting tokenizers<0.20,>=0.19 (from transformers==4.41.2)
  Downloading tokenizers-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting tyro>=0.5.11 (fr

In [8]:
# Final script for Google Colab Environment - AGGRESSIVE OPTIMIZATION

import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    BitsAndBytesConfig,
)
from peft import LoraConfig
from trl import SFTTrainer



# --- Authentication ---
from huggingface_hub import login
# You will be prompted to enter your token here
login()

# --- Configuration ---
BASE_MODEL = "google/gemma-2b-it"
DATASET_PATH = "dataset.jsonl"
NEW_MODEL_NAME = "gemma-2b-coach"

# --- QLoRA Configuration ---
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

# --- Load Resources ---
print("Loading dataset...")
dataset = load_dataset("json", data_files=DATASET_PATH, split="train")

print(f"Loading base model: {BASE_MODEL}")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=bnb_config,
    device_map={"": torch.cuda.current_device()},
)

# --- PEFT (LoRA) Configuration - MORE AGGRESSIVE ---
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=8, # REDUCED from 16
    bias="none",
    task_type="CAUSAL_LM",
)

# --- Training Configuration ---
training_arguments = TrainingArguments(
    output_dir="./results",
    num_train_epochs=5,
    per_device_train_batch_size=1, # MINIMUM BATCH SIZE
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=50,
    logging_steps=5,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="none",
)

# --- Prompt Formatting ---
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['instruction'])):
        text = f"<start_of_turn>user\n{example['instruction'][i]}\n{example['input'][i]}<end_of_turn>\n<start_of_turn>model\n{example['output'][i]}<end_of_turn>"
        output_texts.append(text)
    return output_texts

# --- Trainer Initialization ---
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    formatting_func=formatting_prompts_func,
    max_seq_length=512, # REDUCED from 2048
    tokenizer=tokenizer,
    args=training_arguments,
)

# --- Execute Training ---
print("Starting fine-tuning process with aggressive memory optimization...")
trainer.train()

# --- Save Final Asset ---
print(f"Saving trained LoRA adapter to ./{NEW_MODEL_NAME}")
trainer.model.save_pretrained(NEW_MODEL_NAME)

print("Fine-tuning complete. Asset is ready for download.")

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Loading dataset...
Loading base model: google/gemma-2b-it


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/4 [00:00<?, ? examples/s]



Starting fine-tuning process with aggressive memory optimization...


Step,Training Loss
5,3.6469
10,2.9406
15,2.8281
20,2.8016


Saving trained LoRA adapter to ./gemma-2b-coach
Fine-tuning complete. Asset is ready for download.




In [9]:
!ls -l

total 36
-rw-r--r-- 1 root root 19488 Sep 12 09:41 dataset.jsonl
drwxr-xr-x 2 root root  4096 Sep 12 10:17 gemma-2b-coach
drwxr-xr-x 3 root root  4096 Sep 12 09:46 results
drwxr-xr-x 1 root root  4096 Sep  9 13:46 sample_data
drwxr-xr-x 3 root root  4096 Sep 12 09:49 wandb


In [10]:
!zip -r gemma-2b-coach.zip gemma-2b-coach

  adding: gemma-2b-coach/ (stored 0%)
  adding: gemma-2b-coach/README.md (deflated 66%)
  adding: gemma-2b-coach/adapter_config.json (deflated 51%)
  adding: gemma-2b-coach/adapter_model.safetensors (deflated 9%)
