# Fine-tune Llama 3.1 (8B) for Turkish Drug Analysis

**NOTE**: This notebook MUST be run on a GPU. Ensure you have selected **Runtime > Change runtime type > T4 GPU** if using Colab.

In [4]:
%%capture
# 1. INSTALLATION & SETUP
import sys
import shutil

# STRICT GPU CHECK (No imports required)
if not shutil.which('nvidia-smi'):
    print("\n\n" + "="*60)
    print("ðŸ›‘ STOPPING EXECUTION: NO NVIDIA GPU DETECTED")
    print("="*60)
    print("This notebook relies on 'Unsloth' which requires an NVIDIA GPU.")
    print("Your current environment does not have 'nvidia-smi' available.")
    print("\nPLEASE RUN THIS ON GOOGLE COLAB (FREE):")
    print("1. Go to https://colab.research.google.com")
    print("2. Upload this notebook")
    print("3. Runtime > Change runtime type > T4 GPU")
    print("="*60 + "\n\n")
    raise RuntimeError("No GPU detected. Please upload to Google Colab.")

print("Installing Unsloth and Dependencies...")
%pip install unsloth "unsloth[colab-new]" @ git+https://github.com/unslothai/unsloth.git
%pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes

import torch
from unsloth import FastLanguageModel
from trl import SFTTrainer
from transformers import TrainingArguments
from datasets import load_dataset
from unsloth.chat_templates import get_chat_template

print("âœ… Installation Complete. Libraries Loaded.")

OSError: [WinError 126] Belirtilen modÃ¼l bulunamadÄ±. Error loading "c:\Users\Emre\Desktop\hackathon_chatbot-main\.venv\Lib\site-packages\torch\lib\caffe2_nvrtc.dll" or one of its dependencies.

In [None]:
# 2. Configuration
max_seq_length = 2048
dtype = None
load_in_4bit = True

# 3. Load Model
model_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
new_model_name = "llama-3.1-8b-turkish-drug-finetuned"

print(f"Loading {model_name}...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

In [None]:
# 4. Add LoRA Adapters
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

In [None]:
# 5. Load Dataset (Upload finetune_dataset.jsonl to Colab files first!)
dataset = load_dataset("json", data_files="finetune_dataset.jsonl", split="train")

# 6. Format Prompt (Chat Template)
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
    mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"},
)

def formatting_prompts_func(examples):
    convos = []
    texts = []
    for instruction, input_text, output in zip(examples["instruction"], examples["input"], examples["output"]):
        user_msg = instruction
        if input_text:
            user_msg += "\n" + input_text
            
        convo = [
            {"role": "system", "content": "Sen yardÄ±mcÄ± bir ilaÃ§ asistanÄ±sÄ±n. Her zaman TÃ¼rkÃ§e yanÄ±t ver."},
            {"role": "user", "content": user_msg},
            {"role": "assistant", "content": output},
        ]
        text = tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False)
        texts.append(text)
    return { "text" : texts, }

dataset = dataset.map(formatting_prompts_func, batched = True,)

In [None]:
# 7. Train
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

trainer.train()

In [None]:
# 8. Save & Convert to GGUF
model.save_pretrained(new_model_name)
tokenizer.save_pretrained(new_model_name)

try:
    model.save_pretrained_gguf(new_model_name, tokenizer, quantization_method = "q4_k_m")
    print(f"Model saved and converted to GGUF in {new_model_name}")
except Exception as e:
    print(f"GGUF conversion failed: {e}")