In [1]:
!pip install unsloth



In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments

from datasets import Dataset, DatasetDict, concatenate_datasets, load_dataset

2025-03-10 12:52:32.066520: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-10 12:52:32.076134: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741600352.086020   16284 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741600352.089102   16284 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-10 12:52:32.100902: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [3]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = torch.bfloat16 # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/mistral-7b-bnb-4bit",
    "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
    "unsloth/llama-2-7b-bnb-4bit",
    "unsloth/llama-2-13b-bnb-4bit",
    "unsloth/codellama-34b-bnb-4bit",
    "unsloth/tinyllama-bnb-4bit",
    "unsloth/gemma-7b-bnb-4bit", # New Google 6 trillion tokens model 2.5x faster!
    "unsloth/gemma-2b-bnb-4bit",
] # More models at https://huggingface.co/unsloth



🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [4]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "Unsloth-SmolLM2-1.7B-BKU", # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B Unsloth-SmolLM2-360M-Lora
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    trust_remote_code=True,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

Are you certain you want to do remote code execution?
==((====))==  Unsloth 2025.2.15: Fast Llama patching. Transformers: 4.47.1.
   \\   /|    GPU: NVIDIA GeForce RTX 3090. Max memory: 23.584 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu118. CUDA: 8.6. CUDA Toolkit: 11.8. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2025.2.15 patched 24 layers with 24 QKV layers, 24 O layers and 24 MLP layers.


In [5]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth: Already have LoRA adapters! We shall skip this step.


In [6]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "chatml", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
    mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
    map_eos_token = True, # Maps <|im_end|> to </s> instead
)

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }
pass


Unsloth: Will map <|im_end|> to EOS = <|im_end|>.


In [7]:

from datasets import load_dataset
# 2. Tapaco Veri Setini Yükleyin
dataset = load_dataset("turkish-nlp-suite/InstrucTurca", split="train", cache_dir="/media/hosman/Yedek/Datasets/")  # Tapaco veri setini yükle
dataset2 = load_dataset("Metin/WikiRAG-TR", split="train", cache_dir="/media/hosman/Yedek/Datasets/").rename_columns({"question": "Input", "answer": "Output"})
dataset3 = load_dataset("kayrab/patient-doctor-qa-tr-167732",  cache_dir="/media/hosman/Yedek/Datasets/").rename_columns({"question_content": "Input", "question_answer": "Output"})
dataset4 = Dataset.from_csv("cleaned_output.csv")

In [8]:
dataset4 = dataset4.rename_columns({"Soru": "Input", "Doğru Cevap": "Output"})

In [9]:
dataset3 = concatenate_datasets([dataset3["train"], dataset3["test"]])
dataset = concatenate_datasets([dataset.select(range(int(len(dataset)*0.30))), dataset2, dataset3.select(range(1000))]).remove_columns(['id', 'context', 'is_negative_response', 'number_of_articles', 'ctx_split_points', 'correct_intro_idx', 'doctor_title', 'doctor_speciality'])

In [10]:
dataset = dataset4

In [11]:
from datasets import Dataset
import numpy as np

# None içeren satırları temizleyen fonksiyon
def remove_none_rows(example):
    return example["Input"] is not None and example["Output"] is not None

# None değerleri içeren satırları filtrele
dataset = dataset.filter(remove_none_rows)

In [12]:
dataset

Dataset({
    features: ['Input', 'Cevap', 'Output'],
    num_rows: 525
})

In [13]:
def questions2gptFormat(rows):

    conversations = []
    
    def format_row(row):
        #print(row)
        return conversations.append( [{"from": "user", "value": row[0]}, {"from": "assistant", "value": row[1]}] )
                
           
    for i in zip(rows["Input"], rows["Output"]):
      
        format_row(i)

    return {"conversations":conversations}


In [14]:
dataset = dataset.map(questions2gptFormat, batched = True, batch_size=10000).remove_columns(["Input","Output"])

In [15]:
dataset

Dataset({
    features: ['Cevap', 'conversations'],
    num_rows: 525
})

In [16]:
dataset = dataset.map(formatting_prompts_func, batched = True,)

In [17]:
""" from unsloth.chat_templates import standardize_sharegpt
dataset = standardize_sharegpt(dataset)
dataset = dataset.map(formatting_prompts_func, batched = True,) """

' from unsloth.chat_templates import standardize_sharegpt\ndataset = standardize_sharegpt(dataset)\ndataset = dataset.map(formatting_prompts_func, batched = True,) '

In [18]:
dataset[5]["conversations"]

[{'from': 'user',
  'value': 'Bir bitki koruma ürününün az miktarda fakat uzun süreli alınması hâlinde sıcakkanlılarda meydana getirdiği fizyolojik düzensizliklere ne denir? A) Toksisite B) Kronik Toksisite C) Zehirlenme D) Akut Toksisite'},
 {'from': 'assistant', 'value': 'Kronik Toksisite'}]

In [19]:
print(dataset[5]["text"])

<|im_start|>system
Bir bitki koruma ürününün az miktarda fakat uzun süreli alınması hâlinde sıcakkanlılarda meydana getirdiği fizyolojik düzensizliklere ne denir? A) Toksisite B) Kronik Toksisite C) Zehirlenme D) Akut Toksisite<|im_end|>
<|im_start|>system
Kronik Toksisite<|im_end|>



In [20]:
unsloth_template = \
    "{{ bos_token }}"\
    "{{ 'You are a helpful assistant to the user\n' }}"\
    "{% for message in messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ '>>> User: ' + message['content'] + '\n' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ '>>> Assistant: ' + message['content'] + eos_token + '\n' }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '>>> Assistant: ' }}"\
    "{% endif %}"
unsloth_eos_token = "eos_token"

if False:
    tokenizer = get_chat_template(
        tokenizer,
        chat_template = (unsloth_template, unsloth_eos_token,), # You must provide a template and EOS token
        mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
        map_eos_token = True, # Maps <|im_end|> to </s> instead
    )

In [21]:
train_dataset, eval_dataset = dataset.train_test_split(test_size=0.2, seed=42).values()


In [22]:
""" import wandb
from transformers import TrainerCallback
import torch
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel

class WandBQuestionCallback(TrainerCallback):
    def __init__(self, tokenizer, model, questions, log_interval=500,**kwargs):
        self.tokenizer = get_chat_template(
            tokenizer,
            chat_template="chatml",
            mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},
            map_eos_token=True,
        )
        
        self.model = model

        self.device = "cuda"
        self.questions = questions  # List of question strings
        self.log_interval = log_interval
        
    def on_step_end(self, args, state, control, **kwargs):
        # if state.global_step % self.log_interval == 0:
        #     wandb.log({"step": state.global_step})
        #     self.log_model_responses()
        if True:
            wandb.log({"step": state.global_step})
            self.log_model_responses()

    def log_model_responses(self,**kwargs):

        

        responses = {}
        for question in self.questions:
            messages = [
                {"from": "human", "value": question},
            ]
            inputs = self.tokenizer.apply_chat_template(
                messages,
                tokenize=True,
                add_generation_prompt=True,
                return_tensors="pt"
            ).to(self.device)
            
            outputs = self.model.generate(input_ids=inputs, max_new_tokens=256, temperature=0.2, top_p=0.9, do_sample=True)
            response = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
            responses[question] = response
        
        wandb.log({"model_responses": responses})
 """

' import wandb\nfrom transformers import TrainerCallback\nimport torch\nfrom unsloth.chat_templates import get_chat_template\nfrom unsloth import FastLanguageModel\n\nclass WandBQuestionCallback(TrainerCallback):\n    def __init__(self, tokenizer, model, questions, log_interval=500,**kwargs):\n        self.tokenizer = get_chat_template(\n            tokenizer,\n            chat_template="chatml",\n            mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},\n            map_eos_token=True,\n        )\n        \n        self.model = model\n\n        self.device = "cuda"\n        self.questions = questions  # List of question strings\n        self.log_interval = log_interval\n        \n    def on_step_end(self, args, state, control, **kwargs):\n        # if state.global_step % self.log_interval == 0:\n        #     wandb.log({"step": state.global_step})\n        #     self.log_model_responses()\n        if True:\n            wandb.log({"step": state.glob

In [23]:
""" # Örnek sorular
questions = [
    "433 * b - 7420490 = -7413995 denklemini çöz.",
    "Türkiye'nin başkenti neresidir?",
    "E=mc^2 denkleminin fiziksel anlamı nedir?",
    "Merhaba.Nasılsın?",
    "Merhaba, dün diş çekimi yapıldıktan sonra bu sabah aşırı kanama ile hekime başvurdum. Pihtinin oluştuğunu, ancak kanamanın durmadığı gerekçesiyle dikiş işlemi uyguladı. Bugün herhangi bir kanama veya ağrı yok, yalnız dikiş bölgesinde mukusa benzer bir doku oluştu. Tekrar gitmem gerekir mi?",
    "Merhaba, ben 18 yaşındayım, geçen yıl elimin üst kısmı yanmıştı, şimdi iyileşti ancak elimin üstünde yanık izi kaldı. Bu iz için herhangi bir ilaç veya farklı tedavi yöntemi var mıdır?"
    "Mulan filminin hikayesi hangi kaynaktan esinlenmiştir?",
    "Kartografya günümüzde nasıl teknolojilerden faydalanıyor?"

]

# Callback'i oluştur
wandb_callback = WandBQuestionCallback(tokenizer, model, questions) """

' # Örnek sorular\nquestions = [\n    "433 * b - 7420490 = -7413995 denklemini çöz.",\n    "Türkiye\'nin başkenti neresidir?",\n    "E=mc^2 denkleminin fiziksel anlamı nedir?",\n    "Merhaba.Nasılsın?",\n    "Merhaba, dün diş çekimi yapıldıktan sonra bu sabah aşırı kanama ile hekime başvurdum. Pihtinin oluştuğunu, ancak kanamanın durmadığı gerekçesiyle dikiş işlemi uyguladı. Bugün herhangi bir kanama veya ağrı yok, yalnız dikiş bölgesinde mukusa benzer bir doku oluştu. Tekrar gitmem gerekir mi?",\n    "Merhaba, ben 18 yaşındayım, geçen yıl elimin üst kısmı yanmıştı, şimdi iyileşti ancak elimin üstünde yanık izi kaldı. Bu iz için herhangi bir ilaç veya farklı tedavi yöntemi var mıdır?"\n    "Mulan filminin hikayesi hangi kaynaktan esinlenmiştir?",\n    "Kartografya günümüzde nasıl teknolojilerden faydalanıyor?"\n\n]\n\n# Callback\'i oluştur\nwandb_callback = WandBQuestionCallback(tokenizer, model, questions) '

In [24]:
import wandb

wandb.init(project="Basic LLM Train",  resume="allow", ) #id="a7zeymst",id="ecibz7e4"
wandb.watch(model, log="all")


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mh-osmankarabulut[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [25]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    #data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 6,
    packing = False, # Can make training 5x faster for short sequences.
    #callbacks=[wandb_callback],
    args = TrainingArguments(
       
        gradient_accumulation_steps = 1,
        num_train_epochs=100,  
        per_device_train_batch_size=16,       # GPU başına batch boyutu
        per_device_eval_batch_size=16,       # GPU başına batch boyutu
        learning_rate = 0.0001,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 300,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "Unsloth-SmolLM2-1.7B",
        report_to="wandb",                    # WandB veya diğer araçlara raporlama yok
        save_total_limit=2,                  # Sadece son iki checkpoint'i sakla
        save_steps=300,
        warmup_steps=10000,           # İlk 1000 adımda LR'yi yavaş yavaş artır
        #run_name="ecibz7e4"
    ),
)

Tokenizing train dataset (num_proc=6):   0%|          | 0/420 [00:00<?, ? examples/s]

Tokenizing train dataset (num_proc=6):   0%|          | 0/420 [00:00<?, ? examples/s]

Tokenizing eval dataset (num_proc=6):   0%|          | 0/105 [00:00<?, ? examples/s]

Tokenizing eval dataset (num_proc=6):   0%|          | 0/105 [00:00<?, ? examples/s]

In [26]:
""" from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
) """

' from unsloth.chat_templates import train_on_responses_only\ntrainer = train_on_responses_only(\n    trainer,\n    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",\n    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",\n) '

In [27]:
""" tokenizer.decode(trainer.train_dataset[5]["input_ids"]) """

' tokenizer.decode(trainer.train_dataset[5]["input_ids"]) '

In [28]:
""" 
space = tokenizer(" ", add_special_tokens = False).input_ids[0]
tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[5]["labels"]]) 
"""

' \nspace = tokenizer(" ", add_special_tokens = False).input_ids[0]\ntokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[5]["labels"]]) \n'

In [29]:
wandb.watch(model, log="all")
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 420 | Num Epochs = 100
O^O/ \_/ \    Batch size per device = 16 | Gradient Accumulation steps = 1
\        /    Total batch size = 16 | Total steps = 2,700
 "-____-"     Number of trainable parameters = 18,087,936


  0%|          | 0/2700 [00:00<?, ?it/s]

{'loss': 0.4421, 'grad_norm': 1.3181688785552979, 'learning_rate': 3e-06, 'epoch': 11.11}
{'loss': 0.3464, 'grad_norm': 1.1805654764175415, 'learning_rate': 6e-06, 'epoch': 22.22}
{'loss': 0.2147, 'grad_norm': 2.810464859008789, 'learning_rate': 9e-06, 'epoch': 33.33}
{'loss': 0.1198, 'grad_norm': 0.8986873030662537, 'learning_rate': 1.2e-05, 'epoch': 44.44}
{'loss': 0.074, 'grad_norm': 0.7190911173820496, 'learning_rate': 1.5e-05, 'epoch': 55.56}
{'loss': 0.0612, 'grad_norm': 2.523554801940918, 'learning_rate': 1.8e-05, 'epoch': 66.67}
{'loss': 0.0589, 'grad_norm': 0.6387069225311279, 'learning_rate': 2.1e-05, 'epoch': 77.78}
{'loss': 0.0584, 'grad_norm': 0.755878210067749, 'learning_rate': 2.4e-05, 'epoch': 88.89}
{'loss': 0.0578, 'grad_norm': 0.7478410005569458, 'learning_rate': 2.7000000000000002e-05, 'epoch': 100.0}
{'train_runtime': 2597.4541, 'train_samples_per_second': 16.17, 'train_steps_per_second': 1.039, 'train_loss': 0.15925174642492224, 'epoch': 100.0}


In [30]:
model.save_pretrained("Unsloth-SmolLM2-1.7B-BKU") # Local saving
tokenizer.save_pretrained("Unsloth-SmolLM2-1.7B-BKU")
# model.push_to_hub("your_name/lora_model", token = "...") # Online saving
# tokenizer.push_to_hub("your_name/lora_model", token = "...") # Online saving

('Unsloth-SmolLM2-1.7B-BKU/tokenizer_config.json',
 'Unsloth-SmolLM2-1.7B-BKU/special_tokens_map.json',
 'Unsloth-SmolLM2-1.7B-BKU/vocab.json',
 'Unsloth-SmolLM2-1.7B-BKU/merges.txt',
 'Unsloth-SmolLM2-1.7B-BKU/added_tokens.json',
 'Unsloth-SmolLM2-1.7B-BKU/tokenizer.json')

INFERENCE

In [1]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = torch.bfloat16 # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/mistral-7b-bnb-4bit",
    "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
    "unsloth/llama-2-7b-bnb-4bit",
    "unsloth/llama-2-13b-bnb-4bit",
    "unsloth/codellama-34b-bnb-4bit",
    "unsloth/tinyllama-bnb-4bit",
    "unsloth/gemma-7b-bnb-4bit", # New Google 6 trillion tokens model 2.5x faster!
    "unsloth/gemma-2b-bnb-4bit",
] # More models at https://huggingface.co/unsloth



🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


2025-03-10 13:53:43.092769: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-10 13:53:43.102043: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741604023.111701   27168 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741604023.114390   27168 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-10 13:53:43.125790: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "Unsloth-SmolLM2-1.7B-BKU",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    trust_remote_code=True,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

Are you certain you want to do remote code execution?
==((====))==  Unsloth 2025.2.15: Fast Llama patching. Transformers: 4.47.1.
   \\   /|    GPU: NVIDIA GeForce RTX 3090. Max memory: 23.584 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu118. CUDA: 8.6. CUDA Toolkit: 11.8. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2025.2.15 patched 24 layers with 24 QKV layers, 24 O layers and 24 MLP layers.


In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth: Already have LoRA adapters! We shall skip this step.


In [4]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "chatml", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
    mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
    map_eos_token = True, # Maps <|im_end|> to </s> instead
)

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }
pass


Unsloth: Will map <|im_end|> to EOS = <|im_end|>.


In [5]:
from unsloth.chat_templates import get_chat_template

# Tokenizer'a chat şablonunu uygula
tokenizer = get_chat_template(
    tokenizer,
    chat_template="chatml",  # Desteklenen formatlar: zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
    mapping={
        "role": "from",
        "content": "value",
        "user": "human",
        "assistant": "gpt"
    },  # ShareGPT tarzı eşleme
    map_eos_token=True  # <|im_end|> ifadesini </s> ile eşle
)

# Modeli çıkarım (inference) için hazırla
FastLanguageModel.for_inference(model)

# Kullanıcı mesajları
messages = [
    {"from": "human", "value": 'MRL’nin birimi nedir? A) ml/kg B) ml/l C) mg/l D) mg/kg'}
]

# Mesajları tokenize et ve modele uygun hale getir
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,  # Üretim için gerekli
    return_tensors="pt"
).to("cuda")

# Modelden yanıt üret
outputs = model.generate(
    input_ids=inputs,
    max_new_tokens=2048,
    use_cache=True
)

# Yanıtları çözümle
decoded_outputs = tokenizer.batch_decode(outputs)

# Çıktıyı formatlayan fonksiyon
def format_chat_output(decoded_outputs):
    formatted_text = decoded_outputs[0].replace("<|im_start|>user\n", "🗣 **Kullanıcı:**\n") \
                                      .replace("<|im_start|>assistant\n", "🤖 **Asistan:**\n") \
                                      .replace("<|im_end|>", "").strip()
    return formatted_text

# Formatlanmış çıktıyı ekrana yazdır
print(format_chat_output(decoded_outputs))


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


🗣 **Kullanıcı:**
MRL’nin birimi nedir? A) ml/kg B) ml/l C) mg/l D) mg/kg
🤖 **Asistan:**
mg/kg


In [6]:
import pandas as pd
import torch
from unsloth.chat_templates import get_chat_template
from difflib import SequenceMatcher

# Chat şablonunu Tokenizer'a uygula
tokenizer = get_chat_template(
    tokenizer,
    chat_template="chatml",
    mapping={
        "role": "from",
        "content": "value",
        "user": "human",
        "assistant": "gpt"
    },
    map_eos_token=True
)

# Modeli çıkarım (inference) için hazırla
FastLanguageModel.for_inference(model)

# CSV dosyasını oku
csv_file = "cleaned_output.csv"  # CSV dosya adı
df = pd.read_csv(csv_file)

# Doğru tahminleri saymak için sayaç
correct_count = 0
total_questions = len(df)

# Benzerlik hesaplayan fonksiyon
def similarity(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Soruları tek tek modele gönder ve doğruluğu ölç
for index, row in df.iterrows():
    question = row["Soru"]
    correct_answer = row["Doğru Cevap"]

    messages = [{"from": "human", "value": question}]

    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to("cuda")

    outputs = model.generate(
        input_ids=inputs,
        max_new_tokens=50,
        use_cache=True
    )

    decoded_outputs = tokenizer.batch_decode(outputs)
    
    # Model çıktısını formatla
    model_answer = decoded_outputs[0].replace("<|im_start|>user\n", "").replace("<|im_start|>assistant\n", "").replace("<|im_end|>", "").strip()
    
    model_answer = model_answer[(len(question)):]

    # Benzerlik oranını hesapla
    match_ratio = similarity(model_answer, correct_answer)

    # %80'den büyükse doğru kabul et
    if match_ratio > 0.8:
        correct_count += 1

    print(f"Soru: {question}")
    print(f"Model Cevabı: {model_answer}")
    print(f"Gerçek Cevap: {correct_answer}")
    print(f"Benzerlik: %{match_ratio * 100:.2f}")
    print("-" * 50)

# Doğruluk yüzdesini hesapla
accuracy = (correct_count / total_questions) * 100
print(f"Modelin doğruluk oranı: %{accuracy:.2f}")


Soru: Bitki koruma ürünleri bayileri, sattıkları hangi grup bitki koruma ürünlerini ilgili İl Müdürlük lerine bildirirler? A) İnsektisit B) Fungisit C) Fumigant D) Herbisit
Model Cevabı: 
İnsektisit
Gerçek Cevap: Fumigant
Benzerlik: %30.00
--------------------------------------------------
Soru: Piyasada satışa sunulan bitki koruma ürünleri hangi mevzuata göre kontrol edilmektedir? A) Zirai Mücadele İlaçları Kontrol Yönetmeliği B) Bitki Koruma Ürünleri Ruhsatlandırma Yönet meliği C) Zirai Mücadele Alet ve Makinaları Hakkında Yönetmelik D) Bitki Koruma Ürünlerinin Toptan ve Perakende Satılması ve Depolanması Hakkında Yönet melik
Model Cevabı: 
Zirai Mücadele İlaçları Kontrol Yönetmeliği
Gerçek Cevap: Zirai Mücadele İlaçları Kontrol Yönetmeliği
Benzerlik: %98.88
--------------------------------------------------
Soru: Yapılan kontroller sonucunda ihtar cezası alan bir bitki koruma ürününün sonraki iki yıl içinde tekrar ihtar cezasını gerektiren bir ceza alması durumunda söz konusu bitki 