# Gemma-3-1B Fine-Tuning: QLoRA vs GaLore Comparison

Bu notebook Gemma-3-1b-it modelini QLoRA ve GaLore teknikleri ile fine-tune eder ve karşılaştırır.

**Gereksinimler:**
- Colab Pro (GPU ve high RAM)
- HuggingFace token (Gemma model erişimi için)

**Runtime Ayarları:**
- Runtime > Change runtime type > GPU (T4 veya A100)

## 1. Setup - GitHub Clone ve Dependencies

In [None]:
# GitHub repository clone
import os

if not os.path.exists('/content/Gemma-Finetune'):
    print("Repository clone ediliyor")
    !git clone https://github.com/sendayildirim/Gemma-Finetune
    print("Clone tamamlandi")
else:
    print("Repository zaten mevcut")

# Working directory
os.chdir('/content/Gemma-Finetune')
print(f"Working directory: {os.getcwd()}")



import sys
sys.path.insert(0, '/content/Gemma-Finetune')

!pip install -q -r requirements.txt

print("Dependencies yuklendi")

In [None]:
import sys
import torch
import transformers
import datasets
import peft
import trl
import accelerate
import os
import random
import numpy as np
import pandas as pd
from huggingface_hub import login
import json
import gc

login(new_session=False)

os.environ["WANDB_DISABLED"] = "true"

print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")

## 1.5 Reproducibility - Seed Initialization

Tüm random işlemlerin reproducible olması için seed=42 kullanılır.

In [None]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)

torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)

## 2. Dataset Hazirlama ve Onizleme

In [None]:
from src.data.prepare_datasets import DatasetPreparer
from src.config.data_config import get_data_config

config = get_data_config()
preparer = DatasetPreparer(config)

print("Dataset preparer hazir")

In [None]:
print("Dataset'ler indiriliyor ve sample'lar aliniyor")

train_dataset, test_dataset = preparer.prepare_all_datasets()

print(f"\nToplam train samples: {len(train_dataset)}")
print(f"Toplam test samples: {len(test_dataset)}")

In [None]:
preparer.save_datasets(train_dataset, test_dataset)
print("Dataset'ler kaydedildi")

In [None]:
for source in ["alpaca", "tulu", "ultrachat"]:
    source_samples = train_dataset.filter(lambda x: x["source_dataset"] == source)

    print(f"{source.upper()} Dataset Örnek")

    if len(source_samples) > 0:
        sample = source_samples[0]
        for key, value in sample.items():
            if isinstance(value, str) and len(value) > 200:
                print(f"{key}: {value[:200]}")
            else:
                print(f"{key}: {value}")
    print()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

source_counts = {}
for sample in train_dataset:
    source = sample["source_dataset"]
    source_counts[source] = source_counts.get(source, 0) + 1

print("Dataset dagilimlari:")
for source, count in source_counts.items():
    print(f"  {source}: {count} samples")

## 3. Preprocessing - Gemma Chat Template

In [None]:
from src.data.preprocess import DatasetPreprocessor

preprocessor = DatasetPreprocessor(config)

print("Preprocessor hazir")
print(f"Tokenizer: {preprocessor.tokenizer.name_or_path}");

In [None]:
print("Dataset'ler Gemma chat template'ine cevriliyor")

train_processed, test_processed = preprocessor.load_and_preprocess()

In [None]:
print("PREPROCESSED ORNEK (Gemma Chat Format)")
print(train_processed[0]["text"])

In [None]:
preprocessor.save_processed_datasets(train_processed, test_processed)

print("Preprocessed dataset'ler kaydedildi")

In [None]:
print("GEMMA CHAT TEMPLATE FORMAT ORNEKLERI")

examples_dir = "./data/processed/examples"

for source in ["alpaca", "tulu", "ultrachat"]:
    example_file = os.path.join(examples_dir, f"{source}_example.txt")

    if os.path.exists(example_file):
        print(f"{source.upper()} Dataset - Gemma Format:")

        with open(example_file, "r", encoding="utf-8") as f:
            content = f.read()
            if len(content) > 600:
                print(content[:600] + "\n...")
            else:
                print(content)
    else:
        print(f"{source} example dosyasi bulunamadi")

## 4. Base Model Evaluation (Baseline)

In [None]:
from src.evaluation.evaluate_base import BaseModelEvaluator

base_evaluator = BaseModelEvaluator(model_name=config.model_name)

print("Base model evaluator hazir")

In [None]:
import importlib
if 'src.evaluation.evaluate_base' in sys.modules:
    del sys.modules['src.evaluation.evaluate_base']
    from src.evaluation.evaluate_base import BaseModelEvaluator
    base_evaluator = BaseModelEvaluator(model_name=config.model_name)
    print("Module reloaded - batch processing aktif!")

print("Base model yukleniyor")
base_evaluator.load_model()

print("Model yuklendi")

In [None]:
import time
print("Base model evaluation basliyor")
print( "Burada GPU RAM 2.6'dan 26.1'e çıktı")

test_dataset_path = "./data/processed/test_processed.json"
start_time = time.time()
base_results = base_evaluator.evaluate_on_test_set(
    test_dataset_path,
    max_samples=1000
)
elapsed_time = time.time() - start_time
elapsed_minutes = elapsed_time / 60
print("Base Model Results:")
print(f"BLEU-4: {base_results['bleu_4']:.4f}")
print(f"ROUGE-L: {base_results['rouge_l']:.4f}")
print(f"Evaluation completed in {elapsed_minutes:.2f} minutes")

In [None]:
with open('results/metrics/base_model_examples.json', 'r') as f:
    examples = json.load(f)

print("BASE MODEL ORNEK GENERATION'LAR:")

for i, ex in enumerate(examples[:3], 1):
    print(f"Ornek {i}:")
    print(f"Instruction: {ex['instruction'][:10000]}")
    print(f"Expected: {ex['expected'][:10000]}")
    print(f"Generated: {ex['generated'][:10000]}")

In [None]:
del base_evaluator
gc.collect()
torch.cuda.empty_cache()
print("Memory temizlendi")

## 4.5 Hyperparameter Configuration Comparison

Bu hücre QLoRA ve GaLore tekniklerinin hyperparameter'larını karşılaştırır.
Report için gerekli comparison table'ı oluşturur.

In [None]:
from src.config.qlora_config import get_qlora_config
from src.config.galore_config import get_galore_config

qlora_cfg = get_qlora_config()
galore_cfg = get_galore_config()

comparison_data = {
    'Hyperparameter': [
        'Epochs',
        'Batch Size (per device)',
        'Gradient Accumulation',
        'Effective Batch Size',
        'Learning Rate',
        'Weight Decay',
        'Warmup Steps',
        'Max Sequence Length',
        'Optimizer',
        'LR Scheduler',
        'Precision',
        'Gradient Checkpointing',
        'Max Grad Norm',
        'Random Seed'
    ],
    'QLoRA': [
        qlora_cfg.num_train_epochs,
        qlora_cfg.per_device_train_batch_size,
        qlora_cfg.gradient_accumulation_steps,
        qlora_cfg.per_device_train_batch_size * qlora_cfg.gradient_accumulation_steps,
        qlora_cfg.learning_rate,
        qlora_cfg.weight_decay,
        qlora_cfg.warmup_steps,
        qlora_cfg.max_seq_length,
        qlora_cfg.optim,
        qlora_cfg.lr_scheduler_type,
        'BF16' if qlora_cfg.bf16 else ('FP16' if qlora_cfg.fp16 else 'FP32'),
        'Yes' if qlora_cfg.gradient_checkpointing else 'No',
        qlora_cfg.max_grad_norm,
        qlora_cfg.seed
    ],
    'GaLore': [
        galore_cfg.num_train_epochs,
        galore_cfg.per_device_train_batch_size,
        galore_cfg.gradient_accumulation_steps,
        galore_cfg.per_device_train_batch_size * galore_cfg.gradient_accumulation_steps,
        galore_cfg.learning_rate,
        galore_cfg.weight_decay,
        galore_cfg.warmup_steps,
        galore_cfg.max_seq_length,
        galore_cfg.optim,
        galore_cfg.lr_scheduler_type,
        'BF16' if galore_cfg.bf16 else ('FP16' if galore_cfg.fp16 else 'FP32'),
        'Yes' if galore_cfg.gradient_checkpointing else 'No',
        galore_cfg.max_grad_norm,
        galore_cfg.seed
    ],
    'Rationale': [
        'Sufficient for convergence',
        'Memory constraint',
        'Memory optimization',
        'Effective batch size',
        'Adjusted for technique',
        'Regularization',
        'Learning rate warmup',
        'Gemma context window',
        'Technique-specific',
        'Smooth decay',
        'Numerical stability',
        'Memory optimization',
        'Gradient clipping',
        'Reproducibility'
    ]
}

df = pd.DataFrame(comparison_data)

print("HYPERPARAMETER COMPARISON TABLE")
print(df.to_string(index=False))

print("TECHNIQUE-SPECIFIC PARAMETERS:")
print("QLoRA:")
print(f"  - LoRA rank (r): {qlora_cfg.lora_r}")
print(f"  - LoRA alpha: {qlora_cfg.lora_alpha}")
print(f"  - LoRA dropout: {qlora_cfg.lora_dropout}")
print(f"  - Target modules: {qlora_cfg.target_modules}")
print(f"  - 4-bit quantization: {qlora_cfg.load_in_4bit}")
print(f"  - Quantization type: {qlora_cfg.bnb_4bit_quant_type}")

print("GaLore:")
print(f"  - Rank: {galore_cfg.rank}")
print(f"  - Update projection gap: {galore_cfg.update_proj_gap}")
print(f"  - GaLore scale: {galore_cfg.galore_scale}")
print(f"  - Projection type: {galore_cfg.proj_type}")
print(f"  - Target modules: {galore_cfg.target_modules_list}")


os.makedirs('results/metrics', exist_ok=True)
df.to_csv('results/metrics/hyperparameter_comparison.csv', index=False)
print("Table saved to: results/metrics/hyperparameter_comparison.csv")

## 5. QLoRA Fine-Tuning

In [None]:
from src.config.qlora_config import get_qlora_config

qlora_config = get_qlora_config()

print("QLoRA Konfigurasyonu:")
print(f"  Model: {qlora_config.model_name}")
print(f"  LoRA rank: {qlora_config.lora_r}")
print(f"  LoRA alpha: {qlora_config.lora_alpha}")
print(f"  Learning rate: {qlora_config.learning_rate}")
print(f"  Epochs: {qlora_config.num_train_epochs}")
print(f"  Batch size: {qlora_config.per_device_train_batch_size}")

In [None]:
from src.training.train_qlora import train

print("QLoRA TRAINING BASLIYOR")

train(qlora_config)

In [None]:
with open('results/metrics/qlora_metrics.json', 'r') as f:
    qlora_metrics = json.load(f)

print("QLoRA Training Metrikleri:")
print(f"  Peak Memory: {qlora_metrics['memory_stats']['peak_memory_allocated_gb']:.2f} GB")
print(f"  Training Time: {qlora_metrics['training_time_hours']:.2f} hours")
print(f"  Config: {json.dumps(qlora_metrics['config'], indent=2)}")

In [None]:
gc.collect()
torch.cuda.empty_cache()
print("Memory temizlendi")

## 6. GaLore Fine-Tuning

In [None]:
from src.config.galore_config import get_galore_config

galore_config = get_galore_config()

print("GaLore Konfigurasyonu:")
print(f"  Model: {galore_config.model_name}")
print(f"  Rank: {galore_config.rank}")
print(f"  Update projection gap: {galore_config.update_proj_gap}")
print(f"  Learning rate: {galore_config.learning_rate}")
print(f"  Epochs: {galore_config.num_train_epochs}")
print(f"  Batch size: {galore_config.per_device_train_batch_size}")

In [None]:
from src.training.train_galore import train as train_galore

print("GALORE TRAINING BASLIYOR")
print("Bu islem 2-4 saat surebilir")

train_galore(galore_config)

In [None]:
with open('results/metrics/galore_metrics.json', 'r') as f:
    galore_metrics = json.load(f)

print("GaLore Training Metrikleri:")
print(f"  Peak Memory: {galore_metrics['memory_stats']['peak_memory_allocated_gb']:.2f} GB")
print(f"  Training Time: {galore_metrics['training_time_hours']:.2f} hours")
print(f"  Config: {json.dumps(galore_metrics['config'], indent=2)}")

In [None]:
# Memory temizle
gc.collect()
torch.cuda.empty_cache()
print("Memory temizlendi")

## 7. Fine-Tuned Models Evaluation

In [None]:
# Module'u reload et (yeni batch processing icin)
import importlib
if 'src.evaluation.evaluate_models' in sys.modules:
    del sys.modules['src.evaluation.evaluate_models']

from src.evaluation.evaluate_models import FineTunedModelEvaluator
import time

print("QLoRA model evaluation basliyor (batch processing aktif)\n")

qlora_evaluator = FineTunedModelEvaluator(
    technique="QLoRA",
    model_path="./models/qlora/final",
    base_model_name=config.model_name
)

qlora_evaluator.load_model()

start_time = time.time()
qlora_eval_results = qlora_evaluator.evaluate_on_test_set(
    test_dataset_path="./data/processed/test_processed.json",
    max_samples=500
)
elapsed_time = time.time() - start_time
elapsed_minutes = elapsed_time / 60

print("\nQLoRA Evaluation Results:")
print(f"BLEU-4: {qlora_eval_results['bleu_4']:.4f}")
print(f"ROUGE-L: {qlora_eval_results['rouge_l']:.4f}")
print(f"Evaluation completed in {elapsed_minutes:.2f} minutes")

In [None]:
del qlora_evaluator
gc.collect()
torch.cuda.empty_cache()

In [None]:
# Module'u reload et (yeni batch processing icin)
import importlib
if 'src.evaluation.evaluate_models' in sys.modules:
    del sys.modules['src.evaluation.evaluate_models']

from src.evaluation.evaluate_models import FineTunedModelEvaluator
import time

print("GaLore model evaluation basliyor (batch processing aktif)\n")

galore_evaluator = FineTunedModelEvaluator(
    technique="GaLore",
    model_path="./models/galore/final",
    base_model_name=config.model_name
)

galore_evaluator.load_model()

start_time = time.time()
galore_eval_results = galore_evaluator.evaluate_on_test_set(
    test_dataset_path="./data/processed/test_processed.json",
    max_samples=500
)
elapsed_time = time.time() - start_time
elapsed_minutes = elapsed_time / 60

print("\nGaLore Evaluation Results:")
print(f"BLEU-4: {galore_eval_results['bleu_4']:.4f}")
print(f"ROUGE-L: {galore_eval_results['rouge_l']:.4f}")
print(f"Evaluation completed in {elapsed_minutes:.2f} minutes")

In [None]:
del galore_evaluator
gc.collect()
torch.cuda.empty_cache()

## 8. Results Visualization

In [None]:
from src.evaluation.visualize_results import ResultsVisualizer

visualizer = ResultsVisualizer()
visualizer.load_results()

print("Results yuklendi")

In [None]:
comparison_df = visualizer.create_comparison_table()

print("ASSESSMENT REPORTING TABLE")
print(comparison_df.to_string(index=False))

# Delta hesaplamalari
print("IMPROVEMENT ANALYSIS")

for idx, row in comparison_df.iterrows():
    technique = row['Technique']

    try:
        bleu_before = float(row['BLEU-4 (Before)'])
        bleu_after = float(row['BLEU-4 (After)'])
        rouge_before = float(row['ROUGE-L (Before)'])
        rouge_after = float(row['ROUGE-L (After)'])

        bleu_improvement = ((bleu_after - bleu_before) / bleu_before) * 100
        rouge_improvement = ((rouge_after - rouge_before) / rouge_before) * 100

        print(f"\n{technique}:")
        print(f"  BLEU-4 Improvement: {bleu_improvement:+.2f}%")
        print(f"  ROUGE-L Improvement: {rouge_improvement:+.2f}%")
        print(f"  Peak Memory: {row['Peak Memory (GB)']} GB")
        print(f"  Training Time: {row['Training Time (Hrs)']} hours")
    except (ValueError, ZeroDivisionError):
        print(f"\n{technique}: Data not available yet")


print("MARKDOWN FORMAT that I used in report.md):")
print("```")
print(comparison_df.to_markdown(index=False))
print("```")

In [None]:
# BLEU/ROUGE comparison plot
visualizer.plot_bleu_rouge_comparison()

from IPython.display import Image, display
display(Image('results/plots/bleu_rouge_comparison.png'))

In [None]:
# Memory vs Performance plot
visualizer.plot_memory_vs_performance()

display(Image('results/plots/memory_vs_performance.png'))

In [None]:
# Summary report
visualizer.create_summary_report()

with open('results/summary_report.txt', 'r') as f:
    print(f.read())

## 9. Side-by-Side Example Generations

In [None]:
with open('results/metrics/base_model_examples.json', 'r') as f:
    base_examples = json.load(f)

with open('results/metrics/qlora_examples.json', 'r') as f:
    qlora_examples = json.load(f)

with open('results/metrics/galore_examples.json', 'r') as f:
    galore_examples = json.load(f)

In [None]:
num_examples = min(10, len(base_examples))

for i in range(num_examples):
    print("="*100)
    print(f"EXAMPLE {i+1}")
    print("="*100)

    print(f"\n[INSTRUCTION]")
    print(base_examples[i]['instruction'])

    print(f"\n[EXPECTED RESPONSE]")
    print(base_examples[i]['expected'])

    print(f"\n[BASE MODEL]")
    print(base_examples[i]['generated'])

    print(f"\n[QLORA MODEL]")
    print(qlora_examples[i]['generated'])

    print(f"\n[GALORE MODEL]")
    print(galore_examples[i]['generated'])

    print("\n")

## 10. Final Summary

In [None]:
print("1. DATASET:")
print(f"   - Train samples: {len(train_processed)}")
print(f"   - Test samples: {len(test_processed)}")
print(f"   - Sources: Alpaca, Tulu v2, Ultrachat")

print("2. BASE MODEL PERFORMANCE:")
print(f"   - BLEU-4: {base_results['bleu_4']:.4f}")
print(f"   - ROUGE-L: {base_results['rouge_l']:.4f}")

print("3. QLORA:")
print(f"   - BLEU-4: {qlora_eval_results['bleu_4']:.4f}")
print(f"   - ROUGE-L: {qlora_eval_results['rouge_l']:.4f}")
print(f"   - Peak Memory: {qlora_metrics['memory_stats']['peak_memory_allocated_gb']:.2f} GB")
print(f"   - Training Time: {qlora_metrics['training_time_hours']:.2f} hours")

print("4. GALORE:")
print(f"   - BLEU-4: {galore_eval_results['bleu_4']:.4f}")
print(f"   - ROUGE-L: {galore_eval_results['rouge_l']:.4f}")
print(f"   - Peak Memory: {galore_metrics['memory_stats']['peak_memory_allocated_gb']:.2f} GB")
print(f"   - Training Time: {galore_metrics['training_time_hours']:.2f} hours")

print("5. CIKTILAR:")
print("   - Comparison Table: results/metrics/comparison_table.csv")
print("   - Plots: results/plots/")
print("   - Examples: results/metrics/*_examples.json")
print("   - Summary: results/summary_report.txt")

In [None]:
print("Kaydedilen dosyalar:")
for root, dirs, files in os.walk('results/'):
    for file in files:
        filepath = os.path.join(root, file)
        print(f"  {filepath}")