In [4]:
import torch, numpy as np
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling,
    TrainerCallback
)
from datasets import load_dataset
import evaluate

# -------------------------------
# Model ve cihaz ayarları
# -------------------------------
model_checkpoint = "C:/Users/ersan/Desktop/mobile/local_llama3"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running on: {device}")
print("Supported quantized backends:", torch.backends.quantized.supported_engines)

# -------------------------------
# Tokenizer yükle
# -------------------------------
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token

# -------------------------------
# 8-bit quantization ile modeli yükle
# -------------------------------
model = AutoModelForCausalLM.from_pretrained(
    model_checkpoint,
    load_in_8bit=True,
    device_map="auto"
)

# -------------------------------
# Test üretimi
# -------------------------------
input_text = "The future of AI is"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)
attention_mask = torch.ones(input_ids.shape, device=device)
output_ids = model.generate(input_ids, attention_mask=attention_mask, max_length=50)
print("\n=== Generation Test ===")
print(tokenizer.decode(output_ids[0], skip_special_tokens=True))

# -------------------------------
# Veri yükleme ve ön işleme
# -------------------------------
data_files = {
    "train": "../data2/train_data.json",
    "validation": "../data2/eval_data.json",
    "test": "../data2/test_data.json",
}
raw_datasets = load_dataset("json", data_files=data_files)

def preprocess_function(examples):
    texts = [
        inp.strip() + "\nAnswer: " + tgt.strip() + tokenizer.eos_token
        for inp, tgt in zip(examples["input_text"], examples["target_text"])
    ]
    tokenized = tokenizer(texts, truncation=True, padding="max_length", max_length=1024)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_datasets = raw_datasets.map(
    preprocess_function, batched=True, remove_columns=raw_datasets["train"].column_names
)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# -------------------------------
# Eğitim argümanları
# -------------------------------
training_args = TrainingArguments(
    output_dir="./llama_finetuned",
    eval_strategy="epoch",
    learning_rate=3e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_steps=10,
    save_total_limit=3,
    fp16=True,
    max_grad_norm=1.0,
    report_to="none",
)

# -------------------------------
# Gradient loglama callback'i
# -------------------------------
class GradientLoggingCallback(TrainerCallback):
    def on_step_end(self, args, state, control, model=None, **kwargs):
        total_norm = sum(
            param.grad.detach().data.norm(2).item() ** 2
            for param in model.parameters() if param.grad is not None
        ) ** 0.5
        print(f"[Step {state.global_step}] Gradient Norm: {total_norm:.4f}")

# -------------------------------
# ROUGE metrik hesaplama
# -------------------------------
rouge_metric = evaluate.load("rouge")
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    result = rouge_metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
    return {key: value.mid.fmeasure * 100 for key, value in result.items()}

# -------------------------------
# Kısa gradient testi
# -------------------------------
def gradient_test():
    sample = tokenized_datasets["train"][0]
    input_ids_sample = torch.tensor(sample["input_ids"]).unsqueeze(0).to(device)
    attention_mask_sample = torch.tensor(sample["attention_mask"]).unsqueeze(0).to(device)
    labels_sample = torch.tensor(sample["labels"]).unsqueeze(0).to(device)
    model.train()
    outputs = model(input_ids=input_ids_sample, attention_mask=attention_mask_sample, labels=labels_sample)
    loss = outputs.loss
    loss.backward()
    grad_norm = sum(
        param.grad.norm().item() ** 2 for param in model.parameters() if param.grad is not None
    ) ** 0.5
    print(f"Gradient Test -> Loss: {loss.item():.4f}, Gradient Norm: {grad_norm:.4f}")

gradient_test()

# -------------------------------
# Trainer'ı tanımla ve eğitime başla
# -------------------------------
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[GradientLoggingCallback()],
)

trainer.train()

# -------------------------------
# Model ve tokenizer'ı kaydet
# -------------------------------
trainer.save_model("./llama_finetuned_final")
tokenizer.save_pretrained("./llama_finetuned_final")


Running on: cuda
Supported quantized backends: ['none', 'onednn', 'x86', 'fbgemm']


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



=== Generation Test ===
The future of AI is in the hands of the young
The future of AI is in the hands of the young
AI has the potential to transform every industry, but how do we ensure that it is developed in a way that benefits everyone?


Map: 100%|██████████| 2350/2350 [00:00<00:00, 3296.02 examples/s]


Gradient Test -> Loss: 8.5866, Gradient Norm: 170.2367


ValueError: You cannot perform fine-tuning on purely quantized models. Please attach trainable adapters on top of the quantized model to correctly perform fine-tuning. Please see: https://huggingface.co/docs/transformers/peft for more details