In [1]:
from unsloth import FastLanguageModel
import torch
import pandas as pd
from unsloth.chat_templates import get_chat_template
from unsloth.chat_templates import standardize_sharegpt
from unsloth.chat_templates import train_on_responses_only

from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

from datasets import load_dataset
from tqdm import tqdm

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
print('test')

test


In [3]:
ax_seq_length = 2048
dtype = None
load_in_4bit = True
max_seq_length = 2048

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",      # Llama-3.1 2x faster
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",    # 4bit for 405b!
    "unsloth/Mistral-Small-Instruct-2409",     # Mistral 22b 2x faster!
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",           # Phi-3.5 2x faster!
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",            # Gemma 2x faster!

    "unsloth/Llama-3.2-1B-bnb-4bit",           # NEW! Llama 3.2 models
    "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
    "unsloth/Llama-3.2-3B-bnb-4bit",
    "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",

    "unsloth/Llama-3.3-70B-Instruct-bnb-4bit" # NEW! Llama 3.3 70B!
] # More models at https://huggingface.co/unsloth


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct", # or choose "unsloth/Llama-3.2-1B-Instruct"
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

==((====))==  Unsloth 2025.2.4: Fast Llama patching. Transformers: 4.48.2.
   \\   /|    GPU: NVIDIA A10. Max memory: 21.975 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2025.2.4 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [4]:

dataset = load_dataset("csv", data_files="hf://datasets/btwitssayan/sentiment-analysis-for-mental-health/data.csv")
# Truncate the text in super long statements
for i in range(len(dataset['train'])):
    if len(dataset['train'][i]['statement']) > 4096:
        dataset['train'][i]['statement'] = dataset['train'][i]['statement'][:4096]
# Split the dataset into train and test using the split_dataset function
# Everything is stored in the 'train' partition by default
dataset = dataset['train'].train_test_split(test_size=0.02)
dataset


data.csv:   0%|          | 0.00/31.5M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['Unnamed: 0', 'statement', 'status'],
        num_rows: 51627
    })
    test: Dataset({
        features: ['Unnamed: 0', 'statement', 'status'],
        num_rows: 1054
    })
})

In [5]:
status_list = []
for i in range(len(dataset['train'])):
    status_list.append(dataset['train'][i]['status'].lower())
class_names = list(set(status_list))
class_names

['anxiety',
 'bipolar',
 'depression',
 'personality disorder',
 'normal',
 'suicidal',
 'stress']

In [6]:
classes_str = ", ".join(class_names)
instruction = f"""You are a thoughtful assistant that does sentiment classification and returns one of the following classes: {classes_str}
Please only return the class name, not any other text."""
def format_chat_template(row) -> str:
    row_json = [
        {"role": "system", "content": instruction},
        {"role": "user", "content": row["statement"]},
        {"role": "assistant", "content": row["status"]},
    ]

    # Add to the dataset inplace
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize = False, add_generation_prompt = False)
    return row

In [7]:
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

In [8]:
# Add a 'text' column to the dataset to be used in fine tuning
train_dataset = dataset['train'].map(format_chat_template, num_proc=5)
train_dataset[3]

Map (num_proc=5):   0%|          | 0/51627 [00:00<?, ? examples/s]

{'Unnamed: 0': 38770,
 'statement': 'although my colleague and bos have been really supportive and nice to me my mind tell me they secretly hate me or just won t express something they don t like of me in the fear that i m new i m socially awkward and many other thing about the job and conversation of the day echo loud in my head in repeat and i just can t think of anything else i scroll through the gram for distraction or read through my astrology happening for the day i don t know how to relax or quiet my mind so many thing bothering me at once',
 'status': 'Depression',
 'text': '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 July 2024\n\nYou are a thoughtful assistant that does sentiment classification and returns one of the following classes: anxiety, bipolar, depression, personality disorder, normal, suicidal, stress\nPlease only return the class name, not any other text.<|eot_id|><|start_header_id|>user<|end_h

### Predict on the test set before training

In [9]:
# Extract some samples from the training data to run inference on before train
test_inputs_extracted = []
for i in range(len(dataset['test'])):
    test_inputs_extracted.append(
        [
            {"role": "system", "content": instruction},
            {"role": "user", "content": dataset['test'][i]['statement']}
        ]
    )
print(len(test_inputs_extracted))
test_inputs_extracted[:2]

1054


[[{'role': 'system',
   'content': 'You are a thoughtful assistant that does sentiment classification and returns one of the following classes: anxiety, bipolar, depression, personality disorder, normal, suicidal, stress\nPlease only return the class name, not any other text.'},
  {'role': 'user',
   'content': 'Considering suicide again. Shit sucks dick and I am tired of life and being a failure. I cannot keep even a minimum wage job. cannot get my license because I need to work shit out with the sheriffs office before I can (long story). Wish I had access to a gun because my survival instinct makes jumping in front of a train too hard. And there is no bridges high enough that are near me. Just a dumb vent because I had nowhere else to post it. Hope everyone here is coping alright, take care of yourselves In a really dark spot right now, do not know if its the usual where it flares up at night and I am fine in the morning, or if I am getting bad again.'}],
 [{'role': 'system',
   'con

In [10]:
def get_response(text_output):
    response_start = "<|start_header_id|>assistant<|end_header_id|>\n\n"
    response_string = text_output[0].split(response_start)[1].replace("<|eot_id|>", "").replace("<|end_of_text|>", "")
    classification = response_string.split()[0].lower()
    return classification

In [11]:
def predict_from_messages(messages, dataset_mapped, model, batch_size=32, max_length=1024):
    predicted_outputs = []
    correct_outputs = []
    
    # Process messages in batches
    for i in tqdm(range(0, len(messages), batch_size), desc="Predicting"):
        batch_messages = messages[i:i + batch_size]
        
        # Process all messages in batch at once with padding and truncation
        batch_inputs = tokenizer.apply_chat_template(
            batch_messages,
            tokenize = True,
            add_generation_prompt = True,
            return_tensors = "pt",
            padding = True,           # Add padding
            truncation = True,        # Add truncation
            max_length = max_length,        # Set max length to match your model's config
        ).to("cuda")

        # Add attention mask to tell model which tokens to ignore
        attention_mask = (batch_inputs != tokenizer.pad_token_id).to("cuda")

        # Generate for entire batch at once
        batch_outputs = model.generate(
            input_ids = batch_inputs,
            attention_mask = attention_mask,
            max_new_tokens = 64,
            use_cache = True,
            temperature = 1.0,
            min_p = 0.2
        )
        
        # Decode all outputs in batch
        batch_text_outputs = tokenizer.batch_decode(batch_outputs)

        # Clear memory for this batch
        del batch_inputs, batch_outputs
        torch.cuda.empty_cache()
        
        # Process each output in the batch
        for j, text_output in enumerate(batch_text_outputs):
            try: 
                predicted_outputs.append(get_response([text_output]))  # Note: wrapped in list since get_response expects list
            except(Exception) as e:
                print("Error in get_response...\n")
                print(i, j, text_output)
                print(e)
                continue
            correct_output = dataset_mapped[i + j]['status']
            correct_outputs.append(correct_output.lower())

    torch.cuda.empty_cache()
    
    return predicted_outputs, correct_outputs

In [12]:
def count_correct(predicted_outputs, correct_outputs, original_dataset):
    correct_count = 0
    for i in range(len(predicted_outputs)):
        if predicted_outputs[i] == correct_outputs[i]:
            correct_count += 1
        # Print every 50th statement
        if i % 50 == 0:
            print(original_dataset[i]['statement'], ":  ", predicted_outputs[i], "  ", correct_outputs[i])
    print("Accuracy: ", correct_count / len(predicted_outputs))


In [13]:
# Run inference on some of the training data before training happens
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
test_dataset = dataset['test']
predicted_outputs, correct_outputs = predict_from_messages(test_inputs_extracted, test_dataset, model, 16, 2048)

Predicting:  14%|█▎        | 9/66 [00:31<02:42,  2.85s/it]

Error in get_response...

128 10 <|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 July 2024

You are a thoughtful assistant that does sentiment classification and returns one of the following classes: anxiety, bipolar, depression, personality disorder, normal, suicidal, stress
Please only return the class name, not any other text.<|eot_id|><|start_header_id|>user<|end_header_id|>

My entire life has spontaneously combusted over the last couple of months. A 7-year relationship that I moved across the country for four years ago ended suddenly. My ex-partner, my best friend in the world, within a month of our breakup, started dating a mutual friend who has spent the entirety of the pandemic harassing, bullying, manipulating, and gaslighting our friends -- conveniently, everyone but my partner. She has been aware of this, to the extent that I have been comfortable confiding in her, because this friend has been making many of 

Predicting: 100%|██████████| 66/66 [02:46<00:00,  2.53s/it]


In [14]:
idx = 0
print(predicted_outputs[idx])
print(correct_outputs[idx])

suicidal
suicidal


In [15]:
count_correct(predicted_outputs, correct_outputs, test_dataset)

Considering suicide again. Shit sucks dick and I am tired of life and being a failure. I cannot keep even a minimum wage job. cannot get my license because I need to work shit out with the sheriffs office before I can (long story). Wish I had access to a gun because my survival instinct makes jumping in front of a train too hard. And there is no bridges high enough that are near me. Just a dumb vent because I had nowhere else to post it. Hope everyone here is coping alright, take care of yourselves In a really dark spot right now, do not know if its the usual where it flares up at night and I am fine in the morning, or if I am getting bad again. :   suicidal    suicidal
when all you can feel is you re failing at everything failure failure failure so you give up and you attempt to end it all and even fail at that how am i supposed to feel then :   suicidal    depression
This is my first time posting on here. I did not really think I was depressed before, but now I do not know. I do not 

### Run the actual training process

In [16]:


trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 4,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 16,
        gradient_accumulation_steps = 2,
        warmup_steps = 5,
        num_train_epochs = 1, # Set this for 1 full training run.
        # max_steps = 100,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 50,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

Map (num_proc=4):   0%|          | 0/51627 [00:00<?, ? examples/s]

In [17]:
# This makes the model train only on the losss from the outputs/classifications

trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

Map:   0%|          | 0/51627 [00:00<?, ? examples/s]

In [1]:
# Check that training is correclty splitting out the instruction and response
print(tokenizer.decode(trainer.train_dataset[5]["input_ids"]))
space = tokenizer(" ", add_special_tokens = False).input_ids[0]
print(tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[5]["labels"]]))

NameError: name 'tokenizer' is not defined

In [19]:
# Run the actual training process
# 2:21 minutes on the 4070 Ti Super
FastLanguageModel.for_training(model) # Enable native 2x faster inference
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 51,627 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 16 | Gradient Accumulation steps = 2
\        /    Total batch size = 32 | Total steps = 1,613
 "-____-"     Number of trainable parameters = 24,313,856


Step,Training Loss
50,0.4054
100,0.1955
150,0.1601
200,0.152
250,0.1434
300,0.1337
350,0.1317
400,0.1233
450,0.114
500,0.0999


In [32]:
model.save_pretrained("sentiment_tuned_llama_3.2_3b")  # Local saving
tokenizer.save_pretrained("sentiment_tuned_llama_3.2_3b")

('sentiment_tuned_llama_3.2_3b/tokenizer_config.json',
 'sentiment_tuned_llama_3.2_3b/special_tokens_map.json',
 'sentiment_tuned_llama_3.2_3b/tokenizer.json')

In [12]:
if True:
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "sentiment_tuned_llama_3.2_3b", # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = 2048,
        dtype = "float16",
        load_in_4bit = True,
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference

messages = [
    {"role": "user", "content": "Describe a tall tower in the capital of France."},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
).to("cuda")

==((====))==  Unsloth 2025.1.5: Fast Llama patching. Transformers: 4.48.0.
   \\   /|    GPU: NVIDIA GeForce RTX 4070 Ti SUPER. Max memory: 15.692 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.9. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Device supports bfloat16 but you selected float16. Will change to bfloat16.


In [15]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
predicted_outputs, correct_outputs = predict_from_messages(test_inputs_extracted, test_dataset, model, 16, 2048)


Predicting:  14%|█▎        | 9/66 [00:20<03:11,  3.37s/it]

128 10 <|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 July 2024

You are a thoughtful assistant that does sentiment classification and returns one of the following classes: suicidal, personality disorder, normal, anxiety, depression, stress, bipolar
Please only return the class name, not any other text.<|eot_id|><|start_header_id|>user<|end_header_id|>

My entire life has spontaneously combusted over the last couple of months. A 7-year relationship that I moved across the country for four years ago ended suddenly. My ex-partner, my best friend in the world, within a month of our breakup, started dating a mutual friend who has spent the entirety of the pandemic harassing, bullying, manipulating, and gaslighting our friends -- conveniently, everyone but my partner. She has been aware of this, to the extent that I have been comfortable confiding in her, because this friend has been making many of us feel like *garbage*. Me

Predicting: 100%|██████████| 66/66 [01:57<00:00,  1.79s/it]


In [16]:
count_correct(predicted_outputs, correct_outputs, test_dataset)

Considering suicide again. Shit sucks dick and I am tired of life and being a failure. I cannot keep even a minimum wage job. cannot get my license because I need to work shit out with the sheriffs office before I can (long story). Wish I had access to a gun because my survival instinct makes jumping in front of a train too hard. And there is no bridges high enough that are near me. Just a dumb vent because I had nowhere else to post it. Hope everyone here is coping alright, take care of yourselves In a really dark spot right now, do not know if its the usual where it flares up at night and I am fine in the morning, or if I am getting bad again. :   depression    suicidal
when all you can feel is you re failing at everything failure failure failure so you give up and you attempt to end it all and even fail at that how am i supposed to feel then :   depression    depression
This is my first time posting on here. I did not really think I was depressed before, but now I do not know. I do 