In [65]:
#!pip install unsloth

In [66]:
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments

from datasets import Dataset, DatasetDict, concatenate_datasets, load_dataset

In [67]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = torch.bfloat16 # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/mistral-7b-bnb-4bit",
    "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
    "unsloth/llama-2-7b-bnb-4bit",
    "unsloth/llama-2-13b-bnb-4bit",
    "unsloth/codellama-34b-bnb-4bit",
    "unsloth/tinyllama-bnb-4bit",
    "unsloth/gemma-7b-bnb-4bit", # New Google 6 trillion tokens model 2.5x faster!
    "unsloth/gemma-2b-bnb-4bit",
] # More models at https://huggingface.co/unsloth



In [68]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-bnb-4bit", # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B Unsloth-SmolLM2-360M-Lora
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    trust_remote_code=True,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

Are you certain you want to do remote code execution?
==((====))==  Unsloth 2025.2.15: Fast Llama patching. Transformers: 4.47.1.
   \\   /|    GPU: NVIDIA GeForce RTX 3090. Max memory: 23.584 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu118. CUDA: 8.6. CUDA Toolkit: 11.8. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [69]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = True,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [70]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.2",
    mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},
)


def questions2gptFormat(rows):
    conversations = []
    
    # Sistem promptunu ekleyelim
    system_prompt = [
        {"from": "system", "value": "Sen bir tarım uzmanısın. Kullanıcının zirai hastalıklar, bitki hastalıkları, ilaçlar ve tarımsal ekipmanlar hakkında sorularını detaylı ve teknik bir şekilde cevapla."}
    ]

    def format_row(row):
        # Kullanıcı ve asistan mesajlarını ekleme
        conversations.append(system_prompt + [{"from": "user", "value": row[0]}, {"from": "assistant", "value": row[1]}])
                
    for i in zip(rows["instruction"], rows["output"]):
        format_row(i)

    return {"conversations": conversations}

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }
pass

In [71]:

from datasets import load_dataset
# 2. Tapaco Veri Setini Yükleyin

dataset4 = Dataset.from_csv("BKÜ Sınav Analizi - BKÜ Sınav Analizi.csv")

In [72]:
dataset4 = dataset4.filter(lambda x: x["GPT"]!=None and x["Soru"]!=None)
dataset4 = dataset4.filter(lambda x: len(x["GPT"])<max_seq_length)

In [73]:
dataset4

Dataset({
    features: ['Soru', 'Cevap', 'Doğru Cevap', 'GPT', 'inputs'],
    num_rows: 16350
})

In [74]:
dataset4 = dataset4.rename_columns({"Soru": "instruction", "GPT": "output"})

In [75]:
# dataset3 = concatenate_datasets([dataset3["train"], dataset3["test"]])
# dataset = concatenate_datasets([dataset.select(range(int(len(dataset)*0.30))), dataset2, dataset3.select(range(1000))]).remove_columns(['id', 'context', 'is_negative_response', 'number_of_articles', 'ctx_split_points', 'correct_intro_idx', 'doctor_title', 'doctor_speciality'])

In [76]:
dataset = dataset4

In [77]:
from datasets import Dataset
import numpy as np

# None içeren satırları temizleyen fonksiyon
def remove_none_rows(example):
    return example["instruction"] is not None and example["output"] is not None

# None değerleri içeren satırları filtrele
dataset = dataset.filter(remove_none_rows)

In [78]:
dataset

Dataset({
    features: ['instruction', 'Cevap', 'Doğru Cevap', 'output', 'inputs'],
    num_rows: 16350
})

In [79]:
dataset = dataset.map(questions2gptFormat, batched = True, batch_size=10000).remove_columns(['instruction', 'Cevap', 'Doğru Cevap', 'output', 'inputs'])

In [80]:
dataset[0]

{'conversations': [{'from': 'system',
   'value': 'Sen bir tarım uzmanısın. Kullanıcının zirai hastalıklar, bitki hastalıkları, ilaçlar ve tarımsal ekipmanlar hakkında sorularını detaylı ve teknik bir şekilde cevapla.'},
  {'from': 'user',
   'value': 'Bitki koruma ürünleri bayileri, sattıkları hangi grup bitki koruma ürünlerini ilgili İl Müdürlüklerine bildirirler?  A) İnsektisit B) Fungisit C) Fumigant D) Herbisit'},
  {'from': 'assistant',
   'value': 'Öncelikle, bitki koruma ürünlerinin farklı gruplara ayrıldığını bilmemiz gerekiyor. Bu gruplar, hedefledikleri zararlı organizmalara göre sınıflandırılır:\n\nİnsektisitler (A): Böcekleri öldürmek veya kontrol altına almak için kullanılır.\nFungisitler (B): Mantar hastalıklarını önlemek veya tedavi etmek için kullanılır.\nFumigantlar (C): Gaz halinde kullanılan ve genellikle kapalı alanlarda böcekleri, kemirgenleri, mantarları veya diğer zararlıları yok etmek için kullanılan kimyasallardır.\nHerbisitler (D): Yabancı otları kontrol altı

In [81]:
dataset = dataset.map(formatting_prompts_func, batched = True,)

Map:   0%|          | 0/16350 [00:00<?, ? examples/s]

In [82]:
dataset

Dataset({
    features: ['conversations', 'text'],
    num_rows: 16350
})

In [83]:
dataset = dataset.shuffle(seed=42)
dataset = dataset.shuffle(seed=41)
dataset = dataset.shuffle(seed=40)
dataset = dataset.shuffle(seed=39)

In [84]:
from unsloth.chat_templates import standardize_sharegpt
dataset = standardize_sharegpt(dataset)
dataset = dataset.map(formatting_prompts_func, batched = True,)

Standardizing format:   0%|          | 0/16350 [00:00<?, ? examples/s]

Map:   0%|          | 0/16350 [00:00<?, ? examples/s]

In [85]:
dataset

Dataset({
    features: ['conversations', 'text'],
    num_rows: 16350
})

In [86]:
print(dataset[5]["text"])

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 July 2024

Sen bir tarım uzmanısın. Kullanıcının zirai hastalıklar, bitki hastalıkları, ilaçlar ve tarımsal ekipmanlar hakkında sorularını detaylı ve teknik bir şekilde cevapla.<|eot_id|><|start_header_id|>user<|end_header_id|>

Nohat bitksnde Paa Hastalığı ve Zeyton Sineği zararlısı için ne tür önlemler alnlmalıdır?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Nohut yetiştiriciliğinde Pas Hastalığı hastalığı ve Zeytin Sineği zararlısıyla mücadelede en etkili yöntemlerden biri Kültürel önlemler uygulamasıdır. Ayrıca, kimyasal mücadele gerekiyorsa Sistemik insektisit tavsiye edilir. Düzenli bitki kontrolü, ekim nöbeti ve doğru sulama da zararlıların çoğalmasını engeller.<|eot_id|>


In [87]:
train_dataset, eval_dataset = dataset.train_test_split(test_size=0.2, seed=42).values()


In [88]:
import wandb

wandb.init(project="Basic LLM Train", name="Llama 3B v3.2", resume="allow") #id="a7zeymst",id="ecibz7e4" id="dbaxrwf4"
wandb.watch(model, log="all")


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mh-osmankarabulut[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [89]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    #data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 6,
    packing = False, # Can make training 5x faster for short sequences.
    #callbacks=[wandb_callback],
    args = TrainingArguments(
       
        gradient_accumulation_steps = 1,
        num_train_epochs=30,  
        per_device_train_batch_size=128,       # GPU başına batch boyutu
        per_device_eval_batch_size=128,       # GPU başına batch boyutu
        learning_rate = 0.001,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 300,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "Unsloth-Llama-3B-v3.2-BKU-GPT",
        report_to="wandb",                    # WandB veya diğer araçlara raporlama yok
        save_total_limit=2,                  # Sadece son iki checkpoint'i sakla
        save_steps=300,
        warmup_steps=1000,           # İlk 1000 adımda LR'yi yavaş yavaş artır
        
    ),
)

Converting train dataset to ChatML (num_proc=6):   0%|          | 0/13080 [00:00<?, ? examples/s]

Applying chat template to train dataset (num_proc=6):   0%|          | 0/13080 [00:00<?, ? examples/s]

Tokenizing train dataset (num_proc=6):   0%|          | 0/13080 [00:00<?, ? examples/s]

Tokenizing train dataset (num_proc=6):   0%|          | 0/13080 [00:00<?, ? examples/s]

Converting eval dataset to ChatML (num_proc=6):   0%|          | 0/3270 [00:00<?, ? examples/s]

Applying chat template to eval dataset (num_proc=6):   0%|          | 0/3270 [00:00<?, ? examples/s]

Tokenizing eval dataset (num_proc=6):   0%|          | 0/3270 [00:00<?, ? examples/s]

Tokenizing eval dataset (num_proc=6):   0%|          | 0/3270 [00:00<?, ? examples/s]

In [90]:
""" from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
) """

' from unsloth.chat_templates import train_on_responses_only\ntrainer = train_on_responses_only(\n    trainer,\n    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",\n    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",\n) '

In [91]:
""" tokenizer.decode(trainer.train_dataset[5]["input_ids"]) """

' tokenizer.decode(trainer.train_dataset[5]["input_ids"]) '

In [92]:
""" 
space = tokenizer(" ", add_special_tokens = False).input_ids[0]
tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[5]["labels"]]) 
"""

' \nspace = tokenizer(" ", add_special_tokens = False).input_ids[0]\ntokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[5]["labels"]]) \n'

In [None]:
wandb.watch(model, log="all")
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 13,080 | Num Epochs = 30
O^O/ \_/ \    Batch size per device = 128 | Gradient Accumulation steps = 1
\        /    Total batch size = 128 | Total steps = 3,090
 "-____-"     Number of trainable parameters = 24,313,856


  0%|          | 0/3090 [00:00<?, ?it/s]

In [None]:
model.save_pretrained("Unsloth-Llama-3B-v3.2-BKU-GPT") # Local saving
tokenizer.save_pretrained("Unsloth-Llama-3B-v3.2-BKU-GPT")
# model.push_to_hub("your_name/lora_model", token = "...") # Online saving
# tokenizer.push_to_hub("your_name/lora_model", token = "...") # Online saving

INFERENCE

In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = torch.bfloat16 # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/mistral-7b-bnb-4bit",
    "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
    "unsloth/llama-2-7b-bnb-4bit",
    "unsloth/llama-2-13b-bnb-4bit",
    "unsloth/codellama-34b-bnb-4bit",
    "unsloth/tinyllama-bnb-4bit",
    "unsloth/gemma-7b-bnb-4bit", # New Google 6 trillion tokens model 2.5x faster!
    "unsloth/gemma-2b-bnb-4bit",
] # More models at https://huggingface.co/unsloth



In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "Unsloth-Llama-3B-v3.2-BKU-GPT/checkpoint-10500",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    trust_remote_code=True,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

In [None]:
alpaca_prompt = """Senin adın Crispy. Sen bir ziraat mühendisi asistansın.Aşağıda bir görevi tanımlayan bir talimat ve daha fazla bağlam sağlayan bir girdi bulunmaktadır. Talebi uygun şekilde tamamlayan bir yanıt yazın.

### Talimat:
{}

### Giriş:
{}

### Yanıt:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["inputs"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        input = instruction.split("?")[1] if "?" in instruction else ""
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass



In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = True,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [None]:
from unsloth.chat_templates import get_chat_template

# Tokenizer'a chat şablonunu uygula
tokenizer = get_chat_template(
    tokenizer,
    chat_template="chatml",  # Desteklenen formatlar: zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
    mapping={
        "role": "from",
        "content": "value",
        "user": "human",
        "assistant": "gpt"
    },  # ShareGPT tarzı eşleme
    map_eos_token=True  # <|im_end|> ifadesini </s> ile eşle
)

# Modeli çıkarım (inference) için hazırla
FastLanguageModel.for_inference(model)

inputs = tokenizer(
[
    alpaca_prompt.format(
        "Crispy,naber", # instruction
        "", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")



# Modelden yanıt üret
outputs = model.generate(
    **inputs,
    max_new_tokens=2048,
    use_cache=True
)

# Yanıtları çözümle
decoded_outputs = tokenizer.batch_decode(outputs)

print(decoded_outputs)

# Çıktıyı formatlayan fonksiyon
def format_chat_output(decoded_outputs):
    formatted_text = decoded_outputs[0].replace("<|im_start|>user\n", "🗣 **Kullanıcı:**\n") \
                                      .replace("<|im_start|>assistant\n", "🤖 **Asistan:**\n") \
                                      .replace("<|im_end|>", "").strip()
    return formatted_text

# Formatlanmış çıktıyı ekrana yazdır
print(format_chat_output(decoded_outputs))


In [None]:
""" import pandas as pd
import torch
import random
from unsloth.chat_templates import get_chat_template
from difflib import SequenceMatcher
from tqdm import tqdm

# GPU kullanımı kontrol et
device = "cuda" if torch.cuda.is_available() else "cpu"

# Chat şablonunu Tokenizer'a uygula
tokenizer = get_chat_template(
    tokenizer,
    chat_template="chatml",
    mapping={
        "role": "from",
        "content": "value",
        "user": "human",
        "assistant": "gpt"
    },
    map_eos_token=True
)

# Modeli çıkarım (inference) için hazırla
FastLanguageModel.for_inference(model)
model.to(device)  # GPU'ya taşı

# CSV dosyasını oku
csv_file = "BKÜ Sınav Analizi.csv"
df = pd.read_csv(csv_file)

# Veriyi karıştır ve sadece %25'ini kullan
df = df.sample(frac=1, random_state=42)  # Karıştır
df = df.sample(frac=0.99, random_state=42)  # %25'ini seç

# Doğru tahminleri saymak için sayaç
correct_count = 0
total_questions = len(df)

# Benzerlik hesaplayan fonksiyon
def similarity(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Soruları tek tek modele gönder ve doğruluğu ölç
for index, row in tqdm(df.iterrows(), total=len(df)):
    question = row["Soru"]
    correct_answer = row["GPT"]

    messages = [{"from": "human", "value": question}]

    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(device)

    outputs = model.generate(
        input_ids=inputs,
        max_new_tokens=512,
        use_cache=True
    )

    decoded_outputs = tokenizer.batch_decode(outputs)
    
    # Model çıktısını formatla
    model_answer = decoded_outputs[0].replace("<|im_start|>user\n", "").replace("<|im_start|>assistant\n", "").replace("<|im_end|>", "").strip()
    model_answer = model_answer[len(question):]

    # Benzerlik oranını hesapla
    match_ratio = similarity(str(model_answer), str(correct_answer))

    # %80'den büyükse doğru kabul et
    if match_ratio > 0.8:
        correct_count += 1

    #print(f"Soru: {question}")
    #print(f"Model Cevabı: {model_answer}")
    #print(f"Gerçek Cevap: {correct_answer}")
    #print(f"Benzerlik: %{match_ratio * 100:.2f}")
   # print("-" * 50)

# Doğruluk yüzdesini hesapla
accuracy = (correct_count / total_questions) * 100
print(f"Modelin doğruluk oranı: %{accuracy:.2f}")
 """