In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
!pip install -q trl
!pip install -U bitsandbytes
!pip install -q transformers --upgrade

In [None]:
import torch
import time
import pandas as pd
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments
)
from peft import (
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training,
    TaskType
)
from trl import SFTConfig, SFTTrainer
from dataclasses import dataclass, field
from typing import List, Optional

# 1. Configuration

In [None]:
@dataclass
class FinetuneConfig:
    model_name: str = "google/gemma-2-2b"
    # GI·∫¢M max_seq_length ƒë·ªÉ ti·∫øt ki·ªám VRAM, c√≥ th·ªÉ tƒÉng l·∫°i n·∫øu ƒë·ªß VRAM
    max_seq_length: int = 512

    # --- LoRA Parameters ---
    lora_rank: int = 16
    lora_alpha: int = 32
    lora_dropout: float = 0.05

    # --- PEFT LoraConfig ---
    peft_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules="all-linear",
        lora_dropout=0.05,
        bias="none",
        task_type=TaskType.CAUSAL_LM
    )

    # --- Training Parameters ---
    learning_rate: float = 2e-4
    batch_size: int = 1
    gradient_accumulation_steps: int = 4
    max_steps: int = 500
    warmup_steps: int = 5
    logging_steps: int = 1
    seed: int = 3407

    output_dir: str = "/content/gemma_finetuned"
    dataset_path: str = "/content/train.csv"


config = FinetuneConfig()

In [None]:
#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

# 2. Model loading

## 2.1. Hugging Face authentication

## 2.2. Model loading

In [None]:
print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"PyTorch Version: {torch.__version__}")
print(f"PyTorch CUDA Version: {torch.version.cuda}")

print(f"üìÇ Loading Model: {config.model_name}")
model = AutoModelForCausalLM.from_pretrained(
    config.model_name,
    device_map="cuda",
    attn_implementation="sdpa",
    # S·ª≠ d·ª•ng float16 ƒë·ªÉ gi·∫£m b·ªô nh·ªõ so v·ªõi FP32
    torch_dtype=torch.float16
)
model = prepare_model_for_kbit_training(model)

In [None]:
print(f"üìÇ Loading Tokenizer: {config.model_name}")
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
print("üõ†Ô∏è Attaching LoRA adapters...")
model = get_peft_model(model, config.peft_config)
model.print_trainable_parameters()

# 3. Model finetuning

In [None]:
def run_finetuning(
    config: FinetuneConfig,
    model,
    tokenizer,
):
    # ======================================================
    # --- CHECK CUDA & GPU INFO ---
    # ======================================================
    print("\n" + "="*40)
    if torch.cuda.is_available():
        gpu_name = torch.cuda.get_device_name(0)
        vram_gb = torch.cuda.get_device_properties(0).total_memory / 1e9
        print(f"üî• GPU DETECTED: {gpu_name} ({vram_gb:.2f} GB VRAM)")
        print(f"   CUDA Version: {torch.version.cuda}")
    else:
        print("‚ö†Ô∏è KH√îNG T√åM TH·∫§Y GPU!")
    print("="*40 + "\n")

    versioning = time.strftime("%Y%m%d-%H%M%S")
    run_output_dir = f"{config.output_dir}/{versioning}"
    print(f"üöÄ Starting run: {versioning}")

    # ======================================================
    # --- Load Data & Format ---
    # ======================================================
    print(f"üìö Loading Data from: {config.dataset_path}")
    try:
        df = pd.read_csv(config.dataset_path)
        if 'question' not in df.columns or 'answer' not in df.columns:
            df.columns = ['question', 'answer'] + list(df.columns[2:])
    except Exception as e:
        print(f"‚ùå L·ªói ƒë·ªçc file d·ªØ li·ªáu: {e}")
        return

    dataset = Dataset.from_pandas(df)
    def formatting_prompts_func(examples):
        texts = [f"<start_of_turn>user\n{q}<end_of_turn>\n<start_of_turn>model\n{a}<end_of_turn>"
                  for q, a in zip(examples["question"], examples["answer"])]
        return { "text" : texts }
    dataset = dataset.map(formatting_prompts_func, batched=True)

    # ======================================================
    # --- Config & Trainer Setup ---
    # ======================================================
    print("‚öôÔ∏è Setting up Trainer...")
    sft_config = SFTConfig(
        output_dir=run_output_dir,

        # --- Dataset and Length ---
        dataset_text_field="text",
        packing=False,
        max_length=config.max_seq_length,

        # --- GPU and performance ---
        per_device_train_batch_size=config.batch_size,
        gradient_accumulation_steps=config.gradient_accumulation_steps,
        warmup_steps=config.warmup_steps,
        max_steps=config.max_steps,
        learning_rate=config.learning_rate,
        fp16=True,
        optim="paged_adamw_8bit",
        gradient_checkpointing=True,

        # --- Progress and Logging ---
        logging_steps=config.logging_steps,
        logging_first_step=True,
        report_to="none",
        disable_tqdm=False,
        save_strategy="steps",
        save_steps=10,
        save_total_limit=1,
    )

    trainer = SFTTrainer(
        model=model,
        train_dataset=dataset,
        args=sft_config,
        peft_config=config.peft_config,
    )

    # --- Run and logging ---
    print("\nüî• TRAINING STARTED... (N·∫øu th·∫•y Loss gi·∫£m l√† ngon)")
    try:
        training_output = trainer.train()
        print("\n‚úÖ TRAINING COMPLETED.")

        final_loss = training_output.metrics.get('train_loss')
        if final_loss is not None:
            print(f"üìà Final Training Loss: {final_loss:.4f}")
        else:
            last_log = trainer.state.log_history[-1]
            if 'loss' in last_log:
                print(f"üìà Final Training Loss (t·ª´ log): {last_log['loss']:.4f}")
            else:
                print("‚ö†Ô∏è Kh√¥ng t√¨m th·∫•y Loss cu·ªëi c√πng trong metrics hay log.")

    except Exception as e:
        print(f"\n‚ùå L·ªói khi train: {e}")
        print("üí° G·ª£i √Ω: N·∫øu l·ªói CUDA OOM (Out of Memory), h√£y gi·∫£m batch_size (th·ª≠ 1) ho·∫∑c max_length (th·ª≠ 256).")
        return

    # --- 5. Save ---
    print(f"\nüíæ Saving adapter to {run_output_dir}...")
    trainer.save_model(run_output_dir)
    tokenizer.save_pretrained(run_output_dir)
    print("‚úÖ DONE! MODEL SAVED.")

In [None]:
if __name__ == "__main__":
    conf = FinetuneConfig()
    run_finetuning(
        conf,
        model=model,
        tokenizer=tokenizer
    )