# INSTALL DEPENDENCIES

In [None]:
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    import torch; v = re.match(r"[0-9]{1,}\.[0-9]{1,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.33.post1" if v=="2.9" else "0.0.32.post2" if v=="2.8" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets==4.3.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.56.2
!pip install --no-deps trl==0.22.2

# IMPORTS & GLOBAL CONFIG

In [None]:
import torch
import random
import pandas as pd
import numpy as np

from datasets import Dataset
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

from unsloth import FastModel
from unsloth.trainer import SFTTrainer
from transformers import TrainingArguments

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

MAX_SEQ_LENGTH = 512

ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!


# LOADING BASE MODEL (8-BIT)

In [None]:
model, tokenizer = FastModel.from_pretrained(
    model_name="unsloth/gemma-3-270m-it",
    max_seq_length=MAX_SEQ_LENGTH,
    load_in_4bit=False,
    load_in_8bit=True,
    full_finetuning=False,
)

==((====))==  Unsloth 2025.12.6: Fast Gemma3 patching. Transformers: 4.56.2.
   \\   /|    NVIDIA L4. Num GPUs = 1. Max memory: 22.161 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 8.9. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Gemma3 does not support SDPA - switching to fast eager.


# APLLYING LORA (ALL LINEAR LAYERS)

In [None]:
model = FastModel.get_peft_model(
    model,
    r=32,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"
    ],
    gradient_checkpointing="unsloth",
)

Unsloth: Making `model.base_model.model.model` require gradients


# LOADING AND BALANCING DATASET

In [None]:
df = pd.read_csv("multilingual_sentiment.csv")

# FORMATIING COMPLETION STYLE PROMPTS

In [None]:
def format_sample(row):
    # Keep <lang=...> tag as-is
    prompt = f"{row['text']}\nSentiment: {{\"sentiment\": \"{row['label']}\"}}"
    return prompt + tokenizer.eos_token

df["text"] = df.apply(format_sample, axis=1)

# TRAIN / TEST SPLIT

In [None]:
train_df, test_df = train_test_split(
    df,
    test_size=0.01,
    random_state=SEED,
    stratify=df["label"]
)

train_dataset = Dataset.from_pandas(train_df[["text"]])
test_dataset = Dataset.from_pandas(test_df[["text", "label"]])

# TRAINING ARGUMENTS

In [None]:
training_args = TrainingArguments(
    output_dir="/content/gemma_sentiment",
    per_device_train_batch_size=16,
    gradient_accumulation_steps=4,
    max_steps=1500,
    warmup_steps=100,
    learning_rate=2e-4,
    logging_steps=50,
    save_steps=500,
    bf16=True,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="cosine",
    seed=SEED,
    report_to="none",
)

## TRAINER WITH PACKING

In [None]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    dataset_text_field="text",
    max_seq_length=MAX_SEQ_LENGTH,
    packing=True,
    args=training_args,
)

trainer.train()

Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/84149 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 84,149 | Num Epochs = 2 | Total steps = 1,500
O^O/ \_/ \    Batch size per device = 16 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (16 x 4 x 1) = 64
 "-____-"     Trainable parameters = 7,593,984 of 275,692,160 (2.75% trained)


Step,Training Loss
50,5.4595
100,3.4993
150,3.3531
200,3.2619
250,3.1542
300,3.1316
350,3.1263
400,3.1087
450,3.0874
500,3.0175


Unsloth: Will smartly offload gradients to save VRAM!


TrainOutput(global_step=1500, training_loss=3.1083999938964846, metrics={'train_runtime': 3758.3605, 'train_samples_per_second': 25.543, 'train_steps_per_second': 0.399, 'total_flos': 6680587516320000.0, 'train_loss': 3.1083999938964846, 'epoch': 1.1406844106463878})

# SAVING LORA MODEL

In [None]:
model.save_pretrained("/content/gemma_pretrained")
tokenizer.save_pretrained("/content/gemma_pretrained")

('/content/gemma_pretrained/tokenizer_config.json',
 '/content/gemma_pretrained/special_tokens_map.json',
 '/content/gemma_pretrained/chat_template.jinja',
 '/content/gemma_pretrained/tokenizer.model',
 '/content/gemma_pretrained/added_tokens.json',
 '/content/gemma_pretrained/tokenizer.json')

In [None]:
!zip -r gemma_pretrained.zip gemma_pretrained

In [None]:
from google.colab import files
files.download("gemma_pretrained.zip")