In [None]:
import torch, numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling, TrainerCallback
from datasets import load_dataset
import evaluate
from transformers import BitsAndBytesConfig
from peft import LoraConfig, get_peft_model

# Model ve cihaz ayarları
model_checkpoint = "C:/Users/ersan/Desktop/mobile/local_llama3"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running on: {device}")

# Tokenizer yükle
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token

# Quantized model yükle
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
model = AutoModelForCausalLM.from_pretrained(
    model_checkpoint,
    quantization_config=quantization_config,
    device_map="auto"
    # llm_int8_enable_fp32_cpu_offload=True
)

# PEFT ve LoRA yapılandırması
lora_config = LoraConfig(
    r=8,  # rank of the low-rank approximation
    lora_alpha=16,  # scaling factor for the LoRA layers
    lora_dropout=0.1,  # dropout for LoRA layers
    task_type="CAUSAL_LM",  # task type, for CausalLM models
)

# PEFT modelini LoRA ile adapte et
peft_model = get_peft_model(model, lora_config)

# Test
input_text = "The future of AI is"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)
attention_mask = torch.ones(input_ids.shape, device=device)
output_ids = peft_model.generate(input_ids, attention_mask=attention_mask, max_length=50)
print(tokenizer.decode(output_ids[0], skip_special_tokens=True))

# Veri yükleme
data_files = {
    "train": "../data_less/train_data.json",
    "validation": "../data_less/eval_data.json",
    "test": "../data_less/test_data.json",
}
raw_datasets = load_dataset("json", data_files=data_files)

# Ön işleme fonksiyonu
def preprocess_function(examples):
    texts = [inp.strip() + "\nAnswer: " + tgt.strip() + tokenizer.eos_token
             for inp, tgt in zip(examples["input_text"], examples["target_text"])]
    tokenized = tokenizer(texts, truncation=True, padding="max_length", max_length=1024)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_datasets = raw_datasets.map(preprocess_function, batched=True, remove_columns=raw_datasets["train"].column_names)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Eğitim argümanları
training_args = TrainingArguments(
    output_dir="./llama_finetuned",
    eval_strategy="epoch",
    learning_rate=3e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_steps=10,
    save_total_limit=3,
    fp16=True,
    max_grad_norm=1.0,
    report_to="none",
)

# Gradient logging callback
class GradientLoggingCallback(TrainerCallback):
    def on_step_end(self, args, state, control, model=None, **kwargs):
        total_norm = sum(param.grad.detach().data.norm(2).item() ** 2
                         for param in model.parameters() if param.grad is not None) ** 0.5
        print(f"[Step {state.global_step}] Gradient Norm: {total_norm:.4f}")

# ROUGE metrik hesaplama
rouge_metric = evaluate.load("rouge")
# def compute_metrics(eval_pred):
#     predictions, labels = eval_pred
#     labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
#     decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
#     decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
#     result = rouge_metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
#     return {key: value.mid.fmeasure * 100 for key, value in result.items()}

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    # Convert logits to token ids
    predictions = np.argmax(logits, axis=-1)
    
    # Replace -100 in the labels as needed
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    result = rouge_metric.compute(
        predictions=decoded_preds, 
        references=decoded_labels, 
        use_stemmer=True
    )
    # return {key: value.mid.fmeasure * 100 for key, value in result.items()}
    return {key: value * 100 for key, value in result.items()}

# Trainer tanımla ve eğitimi başlat
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[GradientLoggingCallback()],
)

trainer.train()
trainer.save_model("./llama_peft_finetuned_final")
tokenizer.save_pretrained("./llama_peft_finetuned_final")


In [None]:
import torch, numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling, TrainerCallback
from datasets import load_dataset
import evaluate
from transformers import BitsAndBytesConfig
from peft import LoraConfig, get_peft_model

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# Define model path (adjust based on your saved directory)
model_path = "./llama_peft_finetuned_final"

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running on: {device}")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")

# Load PEFT model (LoRA-adapted)
peft_model = PeftModel.from_pretrained(base_model, model_path)
peft_model.to(device)  # Move model to GPU if available

def test_model_prompt(model, tokenizer, device, prompt_text):
    print("=== Input Prompt ===")
    print(prompt_text)
    
    # Tokenize with attention mask
    encoding = tokenizer(prompt_text, return_tensors="pt")
    input_ids = encoding.input_ids.to(device)
    attention_mask = encoding.attention_mask.to(device)
    
    # Generate output with sampling enabled
    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_length=150,
            do_sample=True,      # Enable sampling to allow creative output
            temperature=0.7,      # Adjust temperature for output diversity
            top_p=0.9,            # Use nucleus sampling
            eos_token_id=tokenizer.eos_token_id,
        )
    
    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    
    print("\n=== Generated Output ===")
    print(generated_text)

# Example test prompt with clear JSON formatting instructions
prompt_text = (
    "I am located in Türkiye/İstanbul. I would like to learn more about a place called 'Opet'.\n"
    "Please provide the details in JSON format as per the following structure:\n"
    "{\"name\": ..., \"description\": ..., \"location\": ..., \"coordinates\": {\"latitude\": ..., \"longitude\": ...}, \"tags\": [...]}\n"
    "Answer:"
)

# Run the test
test_model_prompt(peft_model, tokenizer, device, prompt_text)

Running on: cuda


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


=== Input Prompt ===
I am located in Türkiye/İstanbul. I would like to learn more about a place called 'Opet'.
Please provide the details in JSON format as per the following structure:
{"name": ..., "description": ..., "location": ..., "coordinates": {"latitude": ..., "longitude": ...}, "tags": [...]}
Answer:

=== Generated Output ===
I am located in Türkiye/İstanbul. I would like to learn more about a place called 'Opet'.
Please provide the details in JSON format as per the following structure:
{"name":..., "description":..., "location":..., "coordinates": {"latitude":..., "longitude":...}, "tags": [...]}
Answer: { "name": "Opet", "description": "An ancient Roman public bath and gymnasium in the city of Ephesus, Turkey", "location": "Ephesus, Turkey", "coordinates": {"latitude": 38.3396, "longitude": 27.9801}, "tags": ["Bath", "Gymnasium", "Roman", "Public", "Ancient
