# Finetuning Using Google Gemma's Model


In [1]:
!pip3 install -q -U bitsandbytes
!pip3 install -q -U peft
!pip3 install -q -U trl
!pip3 install -q -U accelerate
!pip3 install -q -U datasets
!pip3 install -q -U transformers

In [2]:
from accelerate.utils import write_basic_config
import tqdm as notebook_tqdm

write_basic_config()

  from .autonotebook import tqdm as notebook_tqdm


Configuration already exists at /Users/beyond/.cache/huggingface/accelerate/default_config.yaml, will not override. Run `accelerate config` manually or pass a different `save_location`.


False

In [3]:
print(f"Number of GPUs available: {torch.cuda.device_count()}")
if torch.cuda.device_count() > 1:
    print("Multi-GPU training enabled!")
else:
    print("Only a single GPU is available.")

Number of GPUs available: 0
Only a single GPU is available.


In [4]:
import os
import transformers
import torch
from datasets import load_dataset
from trl import SFTTrainer
from peft import LoraConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig, GemmaTokenizer

In [5]:
os.environ["HF_TOKEN"] = "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"

### Prerequisites

- nf4(4-bit NormalFloat(NF4)) : https://www.kaggle.com/code/lorentzyeung/what-s-4-bit-quantization-how-does-it-help-llama2


In [8]:
model_id = "unsloth/gemma-2b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [9]:
tokenizer = AutoTokenizer.from_pretrained(
    model_id, token=os.environ['HF_TOKEN'])
model = AutoModelForCausalLM.from_pretrained(model_id,
                                             quantization_config=bnb_config,
                                             device_map="auto",
                                             token=os.environ['HF_TOKEN'])

ImportError: Using `bitsandbytes` 4-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`

In [None]:
text = "Text: नेपालको संविधानको धारा १९५ मा प्रदेश सभाहरूलाई "

# The model will already be distributed across GPUs via device_map="auto"
# Move tokenizer outputs to the correct device
inputs = tokenizer(text, return_tensors="pt")
inputs = {key: value.to(model.device)
          for key, value in inputs.items()}  # Use model's device

# Generate outputs
outputs = model.generate(**inputs, max_new_tokens=100)

# Decode and print
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
os.environ["WANDB_DISABLED"] = "true"

In [None]:
lora_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj",
                    "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",
)

In [None]:
from datasets import load_dataset

# Load the dataset
data = load_dataset(
    "csv", data_files="/kaggle/input/nepali-llm-dataset/nepali-dataset.csv")

# Tokenize the dataset


def tokenize_function(samples):
    return tokenizer(samples["Text"], truncation=True, padding="max_length", max_length=1024)


data = data.map(tokenize_function, batched=True,
                num_proc=torch.cuda.device_count())  # Parallel tokenization

In [None]:
data['train']['Text']

In [None]:
def formatting_func(example):
    text = f"Text: {example['Text'][0]}"
    return [text]

In [None]:
from accelerate import Accelerator

# Initialize Accelerator
accelerator = Accelerator()

# Prepare the model with Accelerator
model = accelerator.prepare(model)

# Count the trainable parameters
trainable_params = sum(p.numel()
                       for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())

print(
    f"Trainable parameters: {trainable_params} / Total parameters: {total_params}")

In [None]:
from transformers import TrainingArguments
from trl import SFTTrainer
from accelerate import Accelerator
from torch.utils.data import DataLoader

# Define the training arguments
training_args = TrainingArguments(
    num_train_epochs=5,                 # Number of training epochs
    per_device_train_batch_size=1,         # Batch size per GPU
    # Accumulate gradients for larger effective batch size
    gradient_accumulation_steps=16,
    warmup_steps=2,                        # Warmup steps for scheduler
    learning_rate=2e-4,                    # Learning rate
    fp16=True,                             # Enable mixed precision
    save_total_limit=2,                    # Limit the number of saved checkpoints
    logging_steps=1,                       # Log every step
    output_dir="outputs",                  # Directory to save outputs
    optim="paged_adamw_8bit",              # 8-bit optimizer for efficiency
    # No reporting (can use "wandb" or "tensorboard")
    report_to="none",
    # Multi-GPU setting: Optimize DDP performance
    ddp_find_unused_parameters=False,
)

# Initialize the Accelerator
accelerator = Accelerator()

# Use Accelerate to shard data across GPUs
train_dataloader = DataLoader(
    data["train"],
    batch_size=training_args.per_device_train_batch_size
)

# Prepare the model and dataloader for distributed training
model, train_dataloader = accelerator.prepare(model, train_dataloader)

# Initialize the SFTTrainer
trainer = SFTTrainer(
    model=model,                           # Model with LoRA and quantization
    train_dataset=data["train"],           # Tokenized training dataset
    args=training_args,                    # Training arguments
    peft_config=lora_config,               # LoRA configuration
    formatting_func=formatting_func,       # Formatting function for prompt
)

In [None]:
trainer.train()

In [None]:
text = "ताप्लेजुङ जिल्लाको ५,००० मिटर भन्दा माथी टुन्ड्रा हावापानी पाइन्छ। हिमाली भागमा बाह्रै "

inputs = tokenizer(text, return_tensors="pt")
inputs = {key: value.to(model.device)
          for key, value in inputs.items()}  # Use model's device

# Generate outputs
outputs = model.generate(**inputs, max_new_tokens=100)

# Decode and print
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [2]:
text = "त्यसयताका सबै खेलमा ब्राजिल हावी हुँदै आएको छ । १६ वर्ष यता बेल्जियम "

# The model will already be distributed across GPUs via device_map="auto"
# Move tokenizer outputs to the correct device
inputs = tokenizer(text, return_tensors="pt")
inputs = {key: value.to(model.device)
          for key, value in inputs.items()}  # Use model's device

# Generate outputs
outputs = model.generate(**inputs, max_new_tokens=100)

# Decode and print
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

NameError: name 'tokenizer' is not defined

In [None]:
text = "केटाकेटीलाई अनुशासित बनाउन पिट्ने प्रवृत्ति "

# The model will already be distributed across GPUs via device_map="auto"
# Move tokenizer outputs to the correct device
inputs = tokenizer(text, return_tensors="pt")
inputs = {key: value.to(model.device)
          for key, value in inputs.items()}  # Use model's device

# Generate outputs
outputs = model.generate(**inputs, max_new_tokens=100)

# Decode and print
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
# Save the base model
model.save_pretrained("gemma_finetuned")

# Save the LoRA weights
trainer.save_model("lora_weights")

In [None]:
import torch

# Function to calculate perplexity


def calculate_perplexity(text):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt")
    inputs = {key: value.to(model.device) for key, value in inputs.items()}

    with torch.no_grad():
        outputs = model(
            input_ids=inputs["input_ids"], labels=inputs["input_ids"])
        loss = outputs.loss  # Cross-entropy loss
        perplexity = torch.exp(loss)  # Perplexity is exp(loss)
    return perplexity.item()


# Example texts
texts = [
    "ताप्लेजुङ जिल्लाको ५,००० मिटर भन्दा माथी टुन्ड्रा हावापानी पाइन्छ। हिमाली भागमा बाह्रै ",
    "त्यसयताका सबै खेलमा ब्राजिल हावी हुँदै आएको छ । १६ वर्ष यता बेल्जियम "
]

# Calculate perplexity for each text
for i, text in enumerate(texts, 1):
    perplexity = calculate_perplexity(text)
    print(f"Text {i}: '{text}'")
    print(f"Perplexity: {perplexity}\n")