# Training gemma

In [1]:
import torch
import time
import pandas as pd
from datasets import Dataset


from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer, 
    BitsAndBytesConfig, 
    TrainingArguments
)
from peft import (
    LoraConfig, 
    get_peft_model, 
    prepare_model_for_kbit_training, 
    TaskType
)
from trl import SFTTrainer
from dataclasses import dataclass, field
from typing import List, Optional

In [2]:
@dataclass
class FinetuneConfig:
    # Model g·ªëc c·ªßa Google, kh√¥ng d√πng h√†ng Unsloth n·ªØa
    model_name: str = "google/gemma-2-2b" 
    max_seq_length: int = 2048
    
    # LoRA Params
    lora_rank: int = 16
    lora_alpha: int = 32
    lora_dropout: float = 0.05
    # Native PEFT c·∫ßn list target modules ch√≠nh x√°c
    target_modules: List[str] = field(default_factory=lambda: [
        "q_proj", "k_proj", "v_proj", "o_proj", 
        "gate_proj", "up_proj", "down_proj"
    ])
    
    # Train Params
    learning_rate: float = 2e-4
    batch_size: int = 2
    gradient_accumulation_steps: int = 4
    max_steps: int = 60
    warmup_steps: int = 5
    logging_steps: int = 1
    seed: int = 3407
    
    output_dir: str = "models/gemma_native_finetuned"
    dataset_path: str = "../data/question_answer/datasetdone.xlsx"

In [3]:
config = FinetuneConfig()

## 1. Model Loading

In [4]:
import torch
from peft import LoraConfig, TaskType
from transformers import BitsAndBytesConfig

### 1.1. LoRA configuration

In [5]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules="all-linear",
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

### 1.2. Quantization configuration

In [6]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,              
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

In [7]:
print(f"üìÇ Loading Model: {config.model_name}")
model = AutoModelForCausalLM.from_pretrained(
    config.model_name,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation="sdpa",
    torch_dtype=torch.bfloat16
)
model = prepare_model_for_kbit_training(model)

üìÇ Loading Model: google/gemma-2-2b


`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [8]:
print(f"üìÇ Loading Tokenizer: {config.model_name}")
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

üìÇ Loading Tokenizer: google/gemma-2-2b


In [9]:
print("üõ†Ô∏è Attaching LoRA adapters...")
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

üõ†Ô∏è Attaching LoRA adapters...
trainable params: 20,766,720 || all params: 2,635,108,608 || trainable%: 0.7881


### 1.3. Load model and QLoRA attachment

In [12]:
import torch
from trl import SFTTrainer, SFTConfig

def run_finetuning(
    config: FinetuneConfig,
    model,
    tokenizer,
):
    # --- 0. CHECK CUDA & GPU INFO ---
    print("\n" + "="*40)
    if torch.cuda.is_available():
        gpu_name = torch.cuda.get_device_name(0)
        vram_gb = torch.cuda.get_device_properties(0).total_memory / 1e9
        print(f"üî• GPU DETECTED: {gpu_name} ({vram_gb:.2f} GB VRAM)")
        print(f"   CUDA Version: {torch.version.cuda}")
    else:
        print("‚ö†Ô∏è WARNING: KH√îNG T√åM TH·∫§Y GPU! ƒêang ch·∫°y b·∫±ng CPU (s·∫Ω r·∫•t ch·∫≠m).")
    print("="*40 + "\n")

    versioning = time.strftime("%Y%m%d-%H%M%S")
    run_output_dir = f"{config.output_dir}/{versioning}"
    print(f"üöÄ Starting run: {versioning}")

    # --- 1. Load Data & Format ---
    print(f"üìö Loading Data from: {config.dataset_path}")
    try:
        df = pd.read_excel(config.dataset_path)
        if 'Column2' not in df.columns or 'Column3' not in df.columns:
             df.columns = ['Column2', 'Column3'] + list(df.columns[2:])
    except Exception as e:
        print(f"‚ùå L·ªói ƒë·ªçc Excel: {e}")
        return

    dataset = Dataset.from_pandas(df)
    def formatting_prompts_func(examples):
        texts = [f"<start_of_turn>user\n{q}<end_of_turn>\n<start_of_turn>model\n{a}<end_of_turn>" 
                 for q, a in zip(examples["Column2"], examples["Column3"])]
        return { "text" : texts }
    dataset = dataset.map(formatting_prompts_func, batched=True)

    # --- 2. Config & Trainer Setup ---
    print("‚öôÔ∏è Setting up Trainer...")
    
    sft_config = SFTConfig(
        output_dir=run_output_dir,
        
        # === DATASET & LENGTH (Quan tr·ªçng) ===
        dataset_text_field="text",
        packing=False,
        
        # === GPU & PERFORMANCE ===
        per_device_train_batch_size=config.batch_size,
        gradient_accumulation_steps=config.gradient_accumulation_steps,
        warmup_steps=config.warmup_steps,
        max_steps=config.max_steps,
        learning_rate=config.learning_rate,
        fp16=True,                # B·∫Øt bu·ªôc True ƒë·ªÉ ch·∫°y t·ªët tr√™n GPU T4/Consumer
        # bf16=True,              # B·∫≠t c√°i n√†y n·ªÅu d√πng RTX 3090/4090 ho·∫∑c A100 (t·∫Øt fp16 ƒëi)
        optim="paged_adamw_8bit", # Optimizer ti·∫øt ki·ªám VRAM
        
        # === TI·∫æN TR√åNH & LOGGING (Hi·ªÉn th·ªã thanh ch·∫°y) ===
        logging_steps=1,          # In log sau M·ªñI b∆∞·ªõc (ƒë·ªÉ th·∫•y n√≥ ch·∫°y ngay)
        logging_first_step=True,  # In ngay b∆∞·ªõc ƒë·∫ßu ti√™n
        report_to="none",         # In ra m√†n h√¨nh console (kh√¥ng g·ª≠i l√™n wandb)
        disable_tqdm=False,       # ƒê·∫£m b·∫£o thanh loading bar hi·ªán l√™n
        save_strategy="no",       # Kh√¥ng save checkpoint r√°c gi·ªØa ch·ª´ng cho nh·∫π ·ªï c·ª©ng
    )

    trainer = SFTTrainer(
        model=model,
        train_dataset=dataset,
        processing_class=tokenizer,
        args=sft_config,
    )

    # --- 3. RUN ---
    print("\nüî• TRAINING STARTED... (N·∫øu th·∫•y Loss gi·∫£m l√† ngon)")
    try:
        trainer.train()
    except Exception as e:
        print(f"\n‚ùå L·ªói khi train: {e}")
        print("üí° G·ª£i √Ω: N·∫øu l·ªói CUDA OOM (Out of Memory), h√£y gi·∫£m batch_size ho·∫∑c max_seq_length.")
        return

    # --- 4. Save ---
    print(f"\nüíæ Saving adapter to {run_output_dir}...")
    trainer.save_model(run_output_dir)
    tokenizer.save_pretrained(run_output_dir)
    print("‚úÖ DONE! TRAINING COMPLETED.")

In [13]:
if __name__ == "__main__":
    # Kh·ªüi t·∫°o config
    conf = FinetuneConfig()
    # Ch·∫°y
    run_finetuning(
        conf,
        model=model,
        tokenizer=tokenizer
    )


üî• GPU DETECTED: NVIDIA GeForce RTX 3050 6GB Laptop GPU (6.44 GB VRAM)
   CUDA Version: 12.1

üöÄ Starting run: 20251129-180733
üìö Loading Data from: ../data/question_answer/datasetdone.xlsx


Map:   0%|          | 0/275 [00:00<?, ? examples/s]

‚öôÔ∏è Setting up Trainer...


Adding EOS to train dataset:   0%|          | 0/275 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/275 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/275 [00:00<?, ? examples/s]

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 1}.



üî• TRAINING STARTED... (N·∫øu th·∫•y Loss gi·∫£m l√† ngon)


Step,Training Loss
1,2.7682
2,3.346
3,2.1147
4,2.3544
5,2.3282
6,2.3111
7,2.1924
8,2.0199
9,1.7067
10,1.9912



üíæ Saving adapter to models/gemma_native_finetuned/20251129-180733...
‚úÖ DONE! TRAINING COMPLETED.
