## Fine Tuning Using Gemma Models

https://wandb.ai/

In [1]:
# 1. Downgrade transformers to a version compatible with TRL 0.7.10
!pip install -q --upgrade transformers==4.38.2

# 2. Ensure TRL is pinned correctly
!pip install -q --upgrade trl==0.7.10

# 3. Re-run the other pins to ensure consistency
!pip install -q --upgrade diffusers==0.27.2
!pip install -q --upgrade accelerate==0.27.2
!pip install -q --upgrade bitsandbytes==0.42.0
!pip install -q --upgrade peft==0.8.2
!pip install -q --upgrade sentence-transformers==2.7.0

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m130.7/130.7 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.5/8.5 MB[0m [31m53.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m57.1 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sentence-transformers 5.1.2 requires transformers<5.0.0,>=4.41.0, but you have transformers 4.38.2 which is incompatible.[0m[31m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.9/150.9 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.6/132.6 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m40.4 

In [2]:
!pip install --upgrade trl



Collecting trl
  Downloading trl-0.25.1-py3-none-any.whl.metadata (11 kB)
Collecting accelerate>=1.4.0 (from trl)
  Downloading accelerate-1.12.0-py3-none-any.whl.metadata (19 kB)
Collecting transformers>=4.56.1 (from trl)
  Downloading transformers-4.57.1-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers>=4.56.1->trl)
  Downloading tokenizers-0.22.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Downloading trl-0.25.1-py3-none-any.whl (465 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m465.5/465.5 kB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading accelerate-1.12.0-py3-none-any.whl (380 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.9/380.9 kB[0m [31m36.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading transformers-4.57.1-py3-none-any.whl (1

In [3]:
!pip install bitsandbytes --upgrade

Collecting bitsandbytes
  Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl (59.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
  Attempting uninstall: bitsandbytes
    Found existing installation: bitsandbytes 0.42.0
    Uninstalling bitsandbytes-0.42.0:
      Successfully uninstalled bitsandbytes-0.42.0
Successfully installed bitsandbytes-0.48.2


In [4]:
import torch
from datasets import load_dataset
from trl import SFTTrainer
from peft import LoraConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GemmaTokenizer

print("Imports successful!")

Imports successful!


In [5]:
import os
import transformers
import torch
from datasets import load_dataset
from trl import SFTTrainer
from peft import LoraConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GemmaTokenizer


In [6]:
from google.colab import userdata
os.environ["HF_TOKEN"]=userdata.get("HF_TOKEN")

## Prerequisites

In [7]:
model_id="google/gemma-2b"
bnb_config=BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [8]:
import bitsandbytes as bnb
import torch

# This should print True
print(f"CUDA Available: {torch.cuda.is_available()}")

# This should print the correct library path without crashing
print(bnb.__version__)

CUDA Available: True
0.48.2


In [9]:
from transformers.utils import quantization_config
tokenizer=AutoTokenizer.from_pretrained(model_id,token=os.environ['HF_TOKEN'])


model=AutoModelForCausalLM.from_pretrained(model_id,
                                           quantization_config=bnb_config,
                                           device_map={"":0},
                                           token=os.environ['HF_TOKEN'])

tokenizer_config.json:   0%|          | 0.00/33.6k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [10]:
text="Quote:Imagination is more ,"
device="cuda:0"

inputs=tokenizer(text,return_tensors="pt").to(device)


outputs=model.generate(**inputs,max_new_tokens=50)

print(tokenizer.decode(outputs[0],skip_special_tokens=True))



Quote:Imagination is more , than knowledge.

I am a self-taught artist, born in 1985 in the city of São Paulo, Brazil. I have a degree in Fine Arts from the University of São Paulo.

I have been painting for more than


In [11]:
os.environ["WANDB_DISABLED"]="false"

In [12]:
lora_config=LoraConfig(
    r=8,
    target_modules=["q_proj","o_proj","k_proj","v_proj","gate_proj","up_proj","down_proj"],
    task_type="CAUSAL_LM"
)

In [13]:
from datasets import load_dataset
data=load_dataset("Abirate/english_quotes")

#data=data.map(lambda samples:tokenizer(samples["quote"]),batched=True)

README.md: 0.00B [00:00, ?B/s]

quotes.jsonl:   0%|          | 0.00/647k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2508 [00:00<?, ? examples/s]

In [14]:
data['train']['quote']

Column(['“Be yourself; everyone else is already taken.”', "“I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best.”", "“Two things are infinite: the universe and human stupidity; and I'm not sure about the universe.”", '“So many books, so little time.”', '“A room without books is like a body without a soul.”'])

In [15]:
data['train']['author']

Column(['Oscar Wilde', 'Marilyn Monroe', 'Albert Einstein', 'Frank Zappa', 'Marcus Tullius Cicero'])

In [20]:
# def formatting_func(example):
#   text=f"Quote: {example['quote'][0]}\nAuther:  {example['author'][0]}"
#   return [text]

# 2. Define the formatting function
def formatting_prompts_func(example):
    output_texts = []
    # Iterate over the batch
    for quote, author in zip(example['quote'], example['author']):
        # Format: Quote + Author
        text = f"Quote: {quote}\nAuthor: {author}"
        output_texts.append(text)
    # Return a dictionary to create the new 'text' column
    return {"text": output_texts}

data = data.map(formatting_prompts_func, batched=True)

# Verify it looks correct (should print a single string, not a list)
print("Sample format:", data['train'][0]['text'])


Map:   0%|          | 0/2508 [00:00<?, ? examples/s]

Sample format: Quote: “Be yourself; everyone else is already taken.”
Author: Oscar Wilde


In [21]:
data['train']

Dataset({
    features: ['quote', 'author', 'tags', 'text'],
    num_rows: 2508
})

In [24]:
from trl import SFTTrainer
trainer = SFTTrainer(
    model=model,
    train_dataset=data["train"],  # <--- Tell trainer to use our manually created column
    # formatting_func=formatting_func,  <--- REMOVE THIS LINE to avoid the bug
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=100,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    peft_config=lora_config,
)

Adding EOS to train dataset:   0%|          | 0/2508 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/2508 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/2508 [00:00<?, ? examples/s]

In [25]:
trainer.train()

  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmohammadalghani61[0m ([33mmohammadalghani61-boj[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
1,2.6566
2,2.0928
3,2.5849
4,2.8234
5,2.3766
6,2.4391
7,2.7874
8,2.0566
9,2.8818
10,2.2126


TrainOutput(global_step=100, training_loss=1.8059492844343186, metrics={'train_runtime': 146.3194, 'train_samples_per_second': 2.734, 'train_steps_per_second': 0.683, 'total_flos': 235740440186880.0, 'train_loss': 1.8059492844343186, 'epoch': 0.1594896331738437})

In [31]:
text="Quote: You only live once,"
device="cuda:0"

inputs=tokenizer(text,return_tensors="pt").to(device)


outputs=model.generate(**inputs,max_new_tokens=20)

print(tokenizer.decode(outputs[0],skip_special_tokens=True))

Quote: You only live once, but if you do it right, once is enough.
Author: Mae West
