In [1]:
%%capture
!pip install unsloth

In [2]:
from unsloth import FastLanguageModel
import torch
from datasets import load_dataset
from transformers import TrainingArguments, TextStreamer
from trl import SFTTrainer
max_seq_length = 2048
dtype = None
load_in_4bit = True



model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.3.17: Fast Llama patching. Transformers: 4.48.0.
   \\   /|    NVIDIA GeForce RTX 4090. Num GPUs = 1. Max memory: 23.628 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.9. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,  # LoRA rank. 8, 16, 32, etc. commonly used
    target_modules = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)


Unsloth 2025.3.17 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [4]:

dataset = load_dataset(
    "csv",
    data_files={"train": "yesno_train.csv"},
    split="train"
)


dataset[0]

{'text': 'Grandma Terri Should Burn in Trash \nGrandma Terri is trash. I hate Grandma Terri. F%%K her to HELL! 71.74.76.40',
 'labels': 'toxic: yes, severe_toxic: no, obscene: no, threat: no, insult: no, identity_hate: no'}

In [5]:
def wrap_conversation(example):
    return {
        "conversations": [
            {"role": "user", "content": example["text"]},
            {"role": "assistant", "content": example["labels"]}
        ]
    }

dataset = dataset.map(wrap_conversation)


In [68]:
from pprint import pprint
pprint(dataset[0])

{'conversations': [{'content': 'Grandma Terri Should Burn in Trash \n'
                               'Grandma Terri is trash. I hate Grandma Terri. '
                               'F%%K her to HELL! 71.74.76.40',
                    'role': 'user'},
                   {'content': 'toxic: yes, severe_toxic: no, obscene: no, '
                               'threat: no, insult: no, identity_hate: no',
                    'role': 'assistant'}],
 'labels': 'toxic: yes, severe_toxic: no, obscene: no, threat: no, insult: no, '
           'identity_hate: no',
 'text': 'Grandma Terri Should Burn in Trash \n'
         'Grandma Terri is trash. I hate Grandma Terri. F%%K her to HELL! '
         '71.74.76.40'}


In [6]:
from unsloth import standardize_sharegpt
dataset = standardize_sharegpt(dataset)

In [7]:
chat_template ="""
Below is an instruction for a text classification task.
Analyze a sentence and classify it into exactly six toxicity categories: toxic, severe toxic, obscene, threat, insult, identity hate.
For each category, output "yes" if the category is present in the sentence, and "no" otherwise.
>>> Sentence:
{INPUT}
>>> Which toxicity categories from toxic, severe toxic, obscene, threat, insult or identity hate does the sentence have?
{OUTPUT}"""

from unsloth import apply_chat_template
dataset = apply_chat_template(
    dataset,
    tokenizer = tokenizer,
    chat_template = chat_template,

)

Unsloth: We automatically added an EOS token to stop endless generations.


Map:   0%|          | 0/127656 [00:00<?, ? examples/s]

In [6]:
def create_instruction_columns(example):
    # Detailed construction for classification process
    instruction = (
        "Classify the sentence into six toxicity categories: "
        "toxic, severe toxic, obscene, threat, insult, identity hate. "
        "For each category, output '1' if present and '0' if not. "
        "Output only the result in the following format: "
        "toxic: X, severe_toxic: X, obscene: X, threat: X, insult: X, identity_hate: X."
    )
    
    text = example["text"]
    labels_list = example["labels"]
    
    # If the labels_list saved as row, wil try to remake it into the list
    if isinstance(labels_list, str):
        import ast
        try:
            labels_list = ast.literal_eval(labels_list)
        except Exception:
            labels_list = [item.strip() for item in labels_list.split(",")]
    
    # Convert list items to lowercase and trim spaces
    labels_list = [label.strip().lower() for label in labels_list if isinstance(label, str)]
    
    # Fixed order of categories
    categories = ["toxic", "severe toxic", "obscene", "threat", "insult", "identity hate"]
    output_parts = []
    for cat in categories:
        val = "1" if cat in labels_list else "0"
        output_parts.append(f"[{cat.upper()} {val}]")
    output_str = " ".join(output_parts)
    
    return {
        "instruction": instruction,
        "input": text,
        "output": output_str,
    }

dataset = dataset.map(create_instruction_columns)


In [7]:
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []

    for instruction, inpt, outpt in zip(instructions, inputs, outputs):
        text = my_prompt_template.format(instruction, inpt, outpt) + EOS_TOKEN
        texts.append(text)

    return {"text": texts}

dataset = dataset.map(formatting_prompts_func, batched=True)


In [69]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="labels",
    max_seq_length=max_seq_length,
    dataset_num_proc=4,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=8,
        gradient_accumulation_steps=4,
        warmup_steps=20,
        max_steps=0,
        num_train_epochs=2,
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=100,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
    ),
)


In [70]:
trainer.train()


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 127,656 | Num Epochs = 2 | Total steps = 7,978
O^O/ \_/ \    Batch size per device = 8 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (8 x 4 x 1) = 32
 "-____-"     Trainable parameters = 24,313,856/3,000,000,000 (0.81% trained)


Step,Training Loss
100,1.0741
200,1.2989
300,1.2461
400,1.2569
500,1.274
600,1.3019
700,1.282
800,1.2563
900,1.2932
1000,1.274


TrainOutput(global_step=7978, training_loss=1.2157847293787776, metrics={'train_runtime': 27503.1424, 'train_samples_per_second': 9.283, 'train_steps_per_second': 0.29, 'total_flos': 1.8687164555215995e+18, 'train_loss': 1.2157847293787776, 'epoch': 1.9996866578930876})

In [41]:
torch.cuda.empty_cache()

In [10]:
model.save_pretrained("lora_model") # Local saving
#tokenizer.save_pretrained("lora_model")

In [11]:
!curl -fsSL https://ollama.com/install.sh | sh

>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
############################################################################################# 100.0%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


In [74]:
model.save_pretrained_gguf("newllamamodel3.1", tokenizer, quantization_method = "q4_k_m")

Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 14.75 out of 61.92 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


100%|██████████| 28/28 [00:00<00:00, 99.50it/s] 


Unsloth: Saving tokenizer... Done.
Done.
==((====))==  Unsloth: Conversion from QLoRA to GGUF information
   \\   /|    [0] Installing llama.cpp might take 3 minutes.
O^O/ \_/ \    [1] Converting HF to GGUF 16bits might take 3 minutes.
\        /    [2] Converting GGUF 16bits to ['q4_k_m'] might take 10 minutes each.
 "-____-"     In total, you will have to wait at least 16 minutes.

Unsloth: Installing llama.cpp. This might take 3 minutes...
Unsloth: [1] Converting model at newllamamodel3.1 into bf16 GGUF format.
The output location will be /home/artipark/Studenti/temp/train4/newllamamodel3.1/unsloth.BF16.gguf
This might take 3 minutes...
INFO:hf-to-gguf:Loading model: newllamamodel3.1
INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
INFO:hf-to-gguf:Exporting model...
INFO:hf-to-gguf:rope_freqs.weight,           torch.float32 --> F32, shape = {64}
INFO:hf-to-gguf:gguf: loading model weight map from 'model.safetensors.index.json'
INFO:hf-to-gguf:gguf: loading model 