In [None]:
!pip install -q transformers datasets peft accelerate bitsandbytes

import json
import torch
from datasets import load_dataset, Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
)
from peft import get_peft_model, LoraConfig, TaskType

# 🚀 Model adı
model_name = "microsoft/Phi-4-mini-instruct"

# ✅ Tokenizer yükle
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token

# ✅ 4-bit quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4"
)

# ✅ Hafifletilmiş model yükle
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config,
    trust_remote_code=True,
    force_download=True
)
model.config.pad_token_id = tokenizer.pad_token_id

# ✅ LoRA yapılandır
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["self_attn.qkv_proj", "self_attn.o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)
model = get_peft_model(model, lora_config)

# ✅ Büyük veri setleri için streaming seçeneği
# Eğer dosyan büyükse, burayı aktif edebilirsin:
USE_STREAMING = False  # <-- Büyük dosyalar için True yap

if USE_STREAMING:
    dataset = load_dataset("json", data_files="dataset.jsonl", split="train", streaming=True)
else:
    with open("dataset.jsonl", "r", encoding="utf-8") as f:
        lines = [json.loads(line) for line in f]
    dataset = Dataset.from_list(lines)

# ✅ Tokenizasyon fonksiyonu
def tokenize_function(examples):
    try:
        if USE_STREAMING:
            inputs = [examples["messages"][0]["content"]]
            outputs = [examples["messages"][1]["content"]]
        else:
            inputs = [msg[0]["content"] for msg in examples["messages"]]
            outputs = [msg[1]["content"] for msg in examples["messages"]]

        model_inputs = tokenizer(
            [f"Kullanıcı girdisi: {inp}\nGörev: Kullanıcının ilgi alanlarına uygun bir kutu oyunu öner." for inp in inputs],
            max_length=256,
            truncation=True,
            padding="max_length"
        )
        model_inputs["labels"] = tokenizer(
            outputs,
            max_length=256,
            truncation=True,
            padding="max_length"
        )["input_ids"]

        return model_inputs
    except Exception as e:
        print(f"Tokenizasyon hatası: {e}")
        return {}

# ✅ Tokenleştirme (streaming modunda map biraz farklı çalışır)
if USE_STREAMING:
    tokenized_dataset = dataset.map(tokenize_function)
else:
    tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names)
    tokenized_dataset.set_format(type="torch")

# ✅ Eğitim ayarları
training_args = TrainingArguments(
    output_dir="./mistral-finetuned",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=2,
    num_train_epochs=1,
    learning_rate=2e-4,
    save_strategy="epoch",
    logging_steps=10,
    fp16=True,
    remove_unused_columns=False
)

# ✅ Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

# ✅ Eğitimi başlat
print("Eğitim başlıyor...")
trainer.train()

# ✅ Modeli kaydet
model.save_pretrained("./mistral-finetuned")
tokenizer.save_pretrained("./mistral-finetuned")
print("Model başarıyla kaydedildi! 🚀")


tokenizer_config.json:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/659 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/659 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

model_path = "./mistral-finetuned"

# 8-bit quantization ayarları
quant_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_enable_fp32_cpu_offload=True,  # GPU RAM yetmiyorsa CPU offload yapılabilir
)

tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    quantization_config=quant_config,
    device_map="auto"  # Modülleri GPU/CPU/disk'e otomatik yerleştir
)

model.eval()


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): lora.Linear8bitLt(
            (base_layer): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=4096, out_features=8, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=8, out_features=4096, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (k_proj): Linear8bitLt(in_features=4096, out_features=1024, bias=False)
          (v_proj): lora.Linear8bitLt(
            (base_layer): Linear8bitLt(

In [2]:
def generate_response(prompt, max_new_tokens=100, temperature=0.7, top_p=0.9):
    # Girdiyi tokenize et
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Cevabı üret
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            top_p=top_p,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id  # padding için gerekiyorsa
        )

    # Model cevabını decode et ve döndür
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Prompt kısmını çıkartıp sadece cevabı döndürelim
    return response[len(prompt):].strip()


In [6]:
prompt = "I love strategy and zombie boardgames."
response = generate_response(prompt)
print("Model:", response)

Model: I want to make a game with strategy and zombie theme.

I am thinking of a game where you have a city and you have to build walls to protect it. You have limited resources so you have to make decision. you can use the resources to build wall, or to build traps. or to build weapons. or to build buildings, or to build barricades. or to build towers. or to build defenses. or to build barricades in buildings.


In [5]:
prompt = "How is the weather today?"
response = generate_response(prompt)
print("Model:", response)

Model: It's a common question we ask ourselves and each other as we plan our day. But knowing the current weather conditions is only half the story.

The weather forecast can also give you an indication of what to expect in the hours, days, and even weeks ahead. Understanding the weather forecast can help you plan your activities, keep your family safe, and even save you money.

In this article, we'll explore the various weather forecasts, how they're created
