In [None]:
# gemma-2 is available from transformers>=4.42.3
!pip install -U "transformers>=4.42.3" bitsandbytes accelerate peft
!pip install datasets

Collecting transformers>=4.42.3
  Downloading transformers-4.48.0-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)
Downloading transformers-4.48.0-py3-none-any.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m78.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m32.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes, transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.47.1
    Uninstalling transformers-4.47.1:
      Successfully uninstalled transformers-4.47.1
Successfully installed bitsandbytes-0.45.0 

In [None]:
import os
import copy
from dataclasses import dataclass

import numpy as np
import torch
from datasets import Dataset
from transformers import (
    BitsAndBytesConfig,
    Gemma2ForSequenceClassification,
    GemmaTokenizerFast,
    Gemma2Config,
    PreTrainedTokenizerBase,
    EvalPrediction,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType
from sklearn.metrics import log_loss, accuracy_score

In [None]:
@dataclass
class Config:
    output_dir: str = "output"
    checkpoint: str = "unsloth/gemma-2-9b-it-bnb-4bit"  # 4-bit quantized gemma-2-9b-instruct
    max_length: int = 1024
    n_splits: int = 5
    fold_idx: int = 0
    optim_type: str = "adamw_8bit"
    per_device_train_batch_size: int = 2   ## can be increased up to 8 or 10 very underutlizing gpu
    gradient_accumulation_steps: int = 2  # global batch size is 8
    per_device_eval_batch_size: int = 8
    n_epochs: int = 1              #  Increase number of epochs if required here
    freeze_layers: int = 16  # there're 42 layers in total, we don't add adapters to the first 16 layers
    lr: float = 2e-4
    warmup_steps: int = 20
    lora_r: int = 32                    ### changed from 16 to 32
    lora_alpha: float = lora_r * 2
    lora_dropout: float = 0.05
    lora_bias: str = "none"

config = Config()

In [None]:
training_args = TrainingArguments(
    output_dir="output",
    overwrite_output_dir=True,
    report_to="none",
    num_train_epochs=config.n_epochs,
    per_device_train_batch_size=config.per_device_train_batch_size,
    gradient_accumulation_steps=config.gradient_accumulation_steps,
    per_device_eval_batch_size=config.per_device_eval_batch_size,
    logging_steps=10,
    eval_strategy="epoch",
    save_strategy="steps",
    save_steps=200,
    optim=config.optim_type,
    fp16=True,
    learning_rate=config.lr,
    warmup_steps=config.warmup_steps,
)

In [None]:
lora_config = LoraConfig(
    r=config.lora_r,
    lora_alpha=config.lora_alpha,
    # only target self-attention
    # target_modules=["q_proj", "k_proj", "v_proj"],
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    layers_to_transform=[i for i in range(42) if i >= config.freeze_layers],
    lora_dropout=config.lora_dropout,
    bias=config.lora_bias,
    task_type=TaskType.SEQ_CLS,
)

In [None]:
tokenizer = GemmaTokenizerFast.from_pretrained(config.checkpoint)
tokenizer.add_eos_token = True  # We'll add <eos> at the end
tokenizer.padding_side = "right"

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/47.0k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

In [None]:
model = Gemma2ForSequenceClassification.from_pretrained(
    config.checkpoint,
    num_labels=2,    #! it was three changes to 2 because output are 2 classes only check this
    torch_dtype=torch.float16,
    device_map="auto",
)
model.config.use_cache = False
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model

config.json:   0%|          | 0.00/1.41k [00:00<?, ?B/s]

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


model.safetensors:   0%|          | 0.00/6.13G [00:00<?, ?B/s]

Some weights of Gemma2ForSequenceClassification were not initialized from the model checkpoint at unsloth/gemma-2-9b-it-bnb-4bit and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): Gemma2ForSequenceClassification(
      (model): Gemma2Model(
        (embed_tokens): Embedding(256000, 3584, padding_idx=0)
        (layers): ModuleList(
          (0-15): 16 x Gemma2DecoderLayer(
            (self_attn): Gemma2Attention(
              (q_proj): Linear4bit(in_features=3584, out_features=4096, bias=False)
              (k_proj): Linear4bit(in_features=3584, out_features=2048, bias=False)
              (v_proj): Linear4bit(in_features=3584, out_features=2048, bias=False)
              (o_proj): Linear4bit(in_features=4096, out_features=3584, bias=False)
            )
            (mlp): Gemma2MLP(
              (gate_proj): Linear4bit(in_features=3584, out_features=14336, bias=False)
              (up_proj): Linear4bit(in_features=3584, out_features=14336, bias=False)
              (down_proj): Linear4bit(in_features=14336, out_features=3584, bias=False)
              (act_fn): PytorchGELUTanh()
 

In [None]:
import pandas as pd
input_columns = ['prompt' , 'response_a','response_b']
label_columns = ['winner_model_a','winner_model_b']

raw_train_dataset = pd.read_parquet('/content/train.parquet')

In [None]:
def to_onehot(row):
    # Return the row with new columns added
    return pd.Series({
        "winner_model_a": 1 if row['winner'] == "model_a" else 0,
        "winner_model_b": 1 if row['winner'] == "model_b" else 0
    })

# Apply and add new columns
onehot_encoded = raw_train_dataset.apply(to_onehot, axis=1)

# Add the one-hot encoded columns to the original DataFrame
raw_train_dataset = pd.concat([raw_train_dataset, onehot_encoded], axis=1)


In [None]:
raw_train_dataset = raw_train_dataset.dropna().drop(['model_a','model_b'],axis=1).reset_index(drop=True)

In [None]:
class CustomTokenizer:
    def __init__(
        self,
        tokenizer: PreTrainedTokenizerBase,
        max_length: int
    ) -> None:
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __call__(self, batch: dict) -> dict:
        # Process prompts and responses
        prompt = ["<Task>:Which response is better? which response will be preffered by user for given prompt? if response_a is better output model_a , if response_b is better output model_b in only one word,<prompt> " + self.process_text(t) for t in batch["prompt"]]
        response_a = ["\n\n<response_a>: " + self.process_text(t) for t in batch["response_a"]]
        response_b = ["\n\n<response_b>: " + self.process_text(t) for t in batch["response_b"]]

        # Concatenate processed text
        texts = [p + r_a + r_b for p, r_a, r_b in zip(prompt, response_a, response_b)]

        # Tokenize the concatenated texts
        tokenized = self.tokenizer(texts, max_length=self.max_length, truncation=True, padding="max_length")

        # Generate labels based on winner
        labels = []
        for a_win, b_win in zip(batch["winner_model_a"], batch["winner_model_b"]):
            if a_win:
                label = 0
            elif b_win:
                label = 1

            labels.append(label)

        # Return tokenized data with labels
        return {**tokenized, "labels": labels}

    @staticmethod
    def process_text(text: str) -> str:
        """
        Safely process text to handle multilingual and arbitrary input.
        """
        if text is None:
            return ""  # Handle null inputs
        try:
            return " ".join(str(text).split())  # Remove excess whitespace
        except Exception as e:
            print(f"Error processing text: {text}. Error: {e}")
            return ""  # Return empty string for invalid inputs


In [None]:
raw_train_dataset = raw_train_dataset.drop(["id"],axis=1)

In [None]:
ds = Dataset.from_pandas(raw_train_dataset)

In [None]:
# Example Usage
encode = CustomTokenizer(tokenizer, max_length=config.max_length)
ds = ds.map(encode, batched=True)

Map:   0%|          | 0/48439 [00:00<?, ? examples/s]

In [None]:
ds

Dataset({
    features: ['prompt', 'response_a', 'response_b', 'winner', 'language', 'winner_model_a', 'winner_model_b', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 48439
})

In [None]:
def compute_metrics(eval_preds: EvalPrediction) -> dict:
    preds = eval_preds.predictions
    labels = eval_preds.label_ids
    probs = torch.from_numpy(preds).float().softmax(-1).numpy()
    loss = log_loss(y_true=labels, y_pred=probs)
    acc = accuracy_score(y_true=labels, y_pred=preds.argmax(-1))
    return {"acc": acc, "log_loss": loss}

In [None]:
folds = [
    (
        [i for i in range(len(ds)) if i % config.n_splits != fold_idx],
        [i for i in range(len(ds)) if i % config.n_splits == fold_idx]
    )
    for fold_idx in range(config.n_splits)
]

In [None]:
train_idx, eval_idx = folds[config.fold_idx]

trainer = Trainer(
    args=training_args,
    model=model,
    tokenizer=tokenizer,
    train_dataset=ds.select(train_idx),
    eval_dataset=ds.select(eval_idx),
    compute_metrics=compute_metrics,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
)
trainer.train()

  trainer = Trainer(
  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*a

Epoch,Training Loss,Validation Loss,Acc,Log Loss,Runtime,Samples Per Second,Steps Per Second
1,0.6848,0.689027,0.537469,0.689025,1759.0467,5.508,0.688


TrainOutput(global_step=9688, training_loss=0.7848945531246585, metrics={'train_runtime': 35241.9245, 'train_samples_per_second': 1.1, 'train_steps_per_second': 0.275, 'total_flos': 1.9978036448612844e+18, 'train_loss': 0.7848945531246585, 'epoch': 1.0})

#### Get code to save the model

In [None]:
import shutil

# Specify the folder to zip and the output zip file name
folder_to_zip = "/content/final_model"
output_zip_file = "/content/final_model.zip"

# Create the zip file
shutil.make_archive(base_name=output_zip_file.replace('.zip', ''), format='zip', root_dir=folder_to_zip)

print(f"Zipped folder saved as: {output_zip_file}")


Zipped folder saved as: /content/final_model.zip


In [None]:
# Save the entire model, tokenizer, and configuration
output_dir = "final_model"

# Save the model
trainer.save_model(output_dir)

# Save the tokenizer
tokenizer.save_pretrained(output_dir)

# Save the training arguments
# training_args.save(output_dir)

# # Optional: Save the LoRA configuration (if used)
# if hasattr(model, "peft_config"):
#     import json
#     with open(f"{output_dir}/lora_config.json", "w") as f:
#         json.dump(model.peft_config.to_dict(), f)

print(f"Model, tokenizer, and configuration saved to {output_dir}")


Model, tokenizer, and configuration saved to final_model


In [None]:
base_model_dir = "gemma-2-9b-it-4bit"  # Corresponds to gemma_dir
model.save_pretrained(base_model_dir)
tokenizer.save_pretrained(base_model_dir)

lora_adapter_dir = "checkpoint-final"  # Corresponds to lora_dir
model.save_adapter(lora_adapter_dir)


In [None]:
# Define directories
base_model_dir = "saved_base_model"
lora_adapter_dir = "saved_lora_adapter"
tokenizer_dir = "saved_tokenizer"

# Save the base model
model.save_pretrained(base_model_dir)

# Save the LoRA adapter
model.save_adapter(lora_adapter_dir)

# Save the tokenizer
tokenizer.save_pretrained(tokenizer_dir)

print("Base model, LoRA adapter, and tokenizer saved successfully!")


In [None]:
from IPython.display import FileLink

# Provide a download link for the zipped file
FileLink('/content/output.zip')


https://www.kaggle.com/code/emiz6413/training-gemma-2-9b-4-bit-qlora-fine-tuning/notebook

https://www.kaggle.com/code/emiz6413/inference-gemma-2-9b-4-bit-qlora/notebook

In [None]:
# Define output directories
output_dir = "saved_model"
lora_adapter_dir = "saved_lora_adapter"
tokenizer_dir = "saved_tokenizer"

# Save the base model (includes Gemma2ForSequenceClassification weights)
model.save_pretrained(output_dir)

# Save the LoRA adapter (if applicable)
if hasattr(model, "save_adapter"):
    model.save_adapter(lora_adapter_dir)

# Save the tokenizer
tokenizer.save_pretrained(tokenizer_dir)

print(f"Model saved to: {output_dir}")
print(f"LoRA adapter saved to: {lora_adapter_dir}")
print(f"Tokenizer saved to: {tokenizer_dir}")


Model saved to: saved_model
LoRA adapter saved to: saved_lora_adapter
Tokenizer saved to: saved_tokenizer
