Finetuning using Google Gemma's Model


In [1]:
!pip install -q -U transformers datasets accelerate peft trl bitsandbytes

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/10.4 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/10.4 MB[0m [31m92.6 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━[0m [32m6.4/10.4 MB[0m [31m88.9 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m10.4/10.4 MB[0m [31m109.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.4/10.4 MB[0m [31m69.7 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/520.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m520.7/520.7 kB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m540.5/540.5 kB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[2K   

In [2]:
import transformers, datasets, tokenizers, huggingface_hub

print("Transformers:", transformers.__version__)
print("Datasets:", datasets.__version__)
print("Tokenizers:", tokenizers.__version__)
print("HF Hub:", huggingface_hub.__version__)

Transformers: 5.2.0
Datasets: 4.6.0
Tokenizers: 0.22.2
HF Hub: 1.4.1


In [3]:
import os
import transformers
import torch
from google.colab import userdata
from datasets import load_dataset
from trl import SFTTrainer
from peft import LoraConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig, GemmaTokenizer

In [4]:
os.environ["HF_token"] = userdata.get('HF_token')

In [44]:
model_id= "google/gemma-2b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

In [45]:
tokenizer= AutoTokenizer.from_pretrained(model_id,token=os.environ['HF_token'])
model= AutoModelForCausalLM.from_pretrained(model_id,quantization_config=bnb_config,
                                            device_map={"":0},
                                            token=os.environ['HF_token'])

Loading weights:   0%|          | 0/164 [00:00<?, ?it/s]

In [46]:
text="Quote of albert,"
device = "cuda:0"
inputs = tokenizer(text,return_tensors="pt").to(device)
outputs= model.generate(**inputs, max_new_tokens=30)
print(tokenizer.decode(outputs[0],skip_special_tokens=True))

Quote of albert,

"I am a man of action, not of words. I am a man of action, not of words. I am a man of action


In [68]:
os.environ["WANDB_DISABLED"] = "false"

In [47]:
lora_config = LoraConfig(
      r=8,
      target_modules= ["q_proj","o_proj","k_proj","v_proj",
                       "gate_proj","up_proj","down_proj"],
      task_type="CAUSAL_LM",

  )

In [74]:
from datasets import load_dataset
data = load_dataset("Abirate/english_quotes")

In [75]:
data['train']['author']

Column(['Oscar Wilde', 'Marilyn Monroe', 'Albert Einstein', 'Frank Zappa', 'Marcus Tullius Cicero', ...])

In [79]:
def formatting_func(example):
    return f"Quote: {example['quote']}\nAuthor: {example['author']}"

In [80]:
trainer = SFTTrainer(
    model=model,
    train_dataset=data["train"],
    formatting_func=formatting_func,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=200,
        learning_rate=2e-4,
        fp16=False,        # safe for T4
        bf16=False,
        report_to="none",
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    peft_config=lora_config,
)



Applying formatting function to train dataset:   0%|          | 0/2508 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/2508 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/2508 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/2508 [00:00<?, ? examples/s]

In [81]:
trainer.train()

Step,Training Loss
1,2.662017
2,2.096907
3,2.601519
4,2.86171
5,2.436548
6,2.466273
7,2.819135
8,2.088557
9,2.907265
10,2.223133



Cannot access gated repo for url https://huggingface.co/google/gemma-2b/resolve/main/config.json.
Access to model google/gemma-2b is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in google/gemma-2b.


TrainOutput(global_step=200, training_loss=1.736454030573368, metrics={'train_runtime': 386.493, 'train_samples_per_second': 2.07, 'train_steps_per_second': 0.517, 'total_flos': 484578235392000.0, 'train_loss': 1.736454030573368})

In [82]:
text="Quote: Oustside of a dog, a book is man's."
device = "cuda:0"
inputs = tokenizer(text,return_tensors="pt").to(device)
outputs= model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0],skip_special_tokens=True))

Quote: Oustside of a dog, a book is man's.
Author: Mark Twain
