In [1]:
!nvidia-smi

Sat Dec 13 12:52:45 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   46C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
import torch
import gc

gc.collect()
torch.cuda.empty_cache()


In [3]:
## Importing necessary libraries
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import load_dataset, Dataset
import pandas as pd
import torch

In [4]:
# loading the final dataset
## for reproduction purposes, we are using the final cleaned dataset directly from the github repo created for the dataset
df = pd.read_csv(
    "https://raw.githubusercontent.com/nyarderr/moodmate-data/refs/heads/main/goemotions_final.csv"
)
df.head()

Unnamed: 0,text,final_emotion
0,"""If you don't wear BROWN AND ORANGE...YOU DON...",anger
1,"""What do Scottish people look like?"" How I wo...",neutral
2,"### A surprise, to be sure, but a welcome one",surprise
3,"'*Pray*, v. To ask that the laws of the unive...",neutral
4,">it'll get invaded by tankie, unfortunately. ...",neutral


In [5]:
## Convert the dataset to Hugging Face Dataset format
def to_hf_dataset(row):
    return {
        "text": row["text"],
        "labels": row["final_emotion"],
    }

dataset = df.apply(to_hf_dataset, axis=1, result_type="expand")
hf_dataset = Dataset.from_pandas(dataset)
hf_dataset = hf_dataset.train_test_split(test_size=0.1, seed=42)
hf_dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'labels'],
        num_rows: 49529
    })
    test: Dataset({
        features: ['text', 'labels'],
        num_rows: 5504
    })
})

In [6]:
## Load Qwen tokenizer and model
model_name = "Qwen/Qwen1.5-1.8B"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)


## make offload folder for better memory management
#import os
#os.makedirs("../models/offload", exist_ok=True)


bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)


model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    quantization_config=bnb_config,
    # load_in_8bit=True,
    device_map={"": 0},
    # llm_int8_enable_fp32_cpu_offload=True,
    # offload_folder="../models/offload"
)

model = prepare_model_for_kbit_training(model)

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

PackageNotFoundError: No package metadata was found for bitsandbytes

In [8]:
### define tokenizer function
def tokenize(batch):
    ## full training string
    prompt = (f"Instrcution: Identify the emotion of the following text.\n"
              f"Text:{batch['text']}\n"
              f"Emotion:")

    full_text = f"{prompt} {batch['labels']}"


    ## tokenizing full text
    tokenized = tokenizer(full_text, padding='max_length', truncation=True, max_length=256)

    ## labels only
    labels = tokenized["input_ids"].copy()

    ## getting prompt token ids
    prompt_tokens = tokenizer(prompt, padding='max_length', truncation=True, max_length=256)["input_ids"]

    ## masking labels
    for i in range(len(prompt_tokens)):
        if prompt_tokens[i] != tokenizer.pad_token_id:
            labels[i] = -100
    
    tokenized["labels"] = labels
    return tokenized

## Applying the tokenization function to the dataset
tokenized_dataset = hf_dataset.map(tokenize, batched=False)


In [9]:
### It is important to note that since we are using Hugging Face's Trainer API, we need to login to Hugging Face Hub to access tokens and models.SInce in vs code notebook this is not possible, we will have to login via terminal using the command:
### huggingface-cli login

In [10]:
tokenized_dataset

In [11]:
## Applying LoRA to the model
#LoraConfig?
lora_config = LoraConfig(
    r=8, # rank(number of wheels)
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1, # regularization
    bias="none",
    task_type="CAUSAL_LM" # casual language modeling task
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

In [12]:
### choose max steps to epochs
batch_size = 4*4  # per_device_train_batch_size * gradient_accumulation_steps
num_samples = len(tokenized_dataset['train'])
steps_per_epoch = num_samples // batch_size
steps_per_epoch # 3095 steps per epoch

In [13]:
## Training arguments
training_args = TrainingArguments(
    output_dir="../models/qwen-lora-goemotions",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    warmup_steps=50,
    max_steps=3095,
    learning_rate=2e-4,
    fp16=True,  #half precision training
    logging_steps=20,
    report_to="wandb",
    save_steps=200)

In [None]:
## save the fine-tuned model
trainer.save_model("../models/qwen-lora-goemotions")

In [18]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
)

trainer.train(resume_from_checkpoint=True)