## Model Finetuning

In [1]:
# Import all the libraries needed
from datasets import Dataset
import pandas as pd
from peft import get_peft_model, LoraConfig
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from trl import SFTTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Loading the LLM model here (in our case, its Llama 3.2 1B Instruct)

model_name = "meta-llama/Llama-3.2-1B-Instruct"  # needs HF auth token

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True, # Quantized model loaded to save vram, allowing us to finetune the model better
    torch_dtype=torch.float16,
    device_map="auto" # Will auto select cuda or cpu based of system specs and configuration (need to enable cuda on nvidia chip)
)

`torch_dtype` is deprecated! Use `dtype` instead!
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [None]:
# Loading and cleaning the data to train the model

df = pd.read_csv("C:/Users/m/Documents/GitHub/CareBear/dataset/emotion-emotion_69k.csv")

data_df = df[["empathetic_dialogues", "labels"]]
data_df = data_df.rename(columns={'empathetic_dialogues': 'instruction', 'labels': 'response'})
# data_df = data_df.iloc[:20000] # You can reduce the number of data points to have faster training as it plateaus with llama 3 pretty quickly.

clean_df = data_df.drop_duplicates()
cleanup = lambda text: text.strip("Customer :").strip("\nAgent :")
clean_df["instruction"] = clean_df["instruction"].apply(cleanup)

In [None]:
# Preparing the data to be used for training the model

dataset = Dataset.from_pandas(clean_df, preserve_index=False)

instruction_text = """CareBear, a warm and gentle therapy bear you can talk to when you need comfort, responds with kindness and empathy in a soothing, uplifting tone.
CareBear listens carefully, offers thoughtful support, and provides practical tips for emotional well-being when appropriate.
It communicates in clear, compassionate language and adjusts the depth of its advice based on the person’s needs, offering simple reassurance for light chats and deeper guidance when asked.
CareBear responses matches the length of its replies to the person’s message, keeping interactions natural and comforting."""

format_text = lambda entry: f"<s>[INST] {instruction_text}\n{entry['instruction']}\n[/INST]\n{entry['response']}</s>"

dataset = dataset.map(lambda x: {"text": format_text(x)})

In [None]:
# Split the dataset into training and validation sets (70-30)
split_dataset = dataset.train_test_split(test_size=0.7)

train_dataset = split_dataset['train']
eval_dataset = split_dataset['test']

In [None]:
peft_config = LoraConfig(
    r=16,             # rank
    lora_alpha=32,    # scaling
    target_modules=["q_proj","k_proj","v_proj","o_proj"],  # common for LLaMA
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [None]:
# Define your training arguments

training_args = TrainingArguments(
    output_dir="./shawgpt-llama3-qlora",
    num_train_epochs=3, # number of training epochs
    per_device_train_batch_size=4, # batch size for training
    gradient_accumulation_steps=8, # weight update is done for 8 batches together hence effective batch size is 32
    fp16=True, # enables mixed precision training (16 bit floating is half precision)
    per_device_eval_batch_size=4, # batch size for evaluation
    warmup_steps=500, # number of warmup steps for learning rate scheduler
    weight_decay=0.01, # strength of L2 regularization
    logging_dir="./logs", # directory for storing logs
    logging_strategy="steps", # logs are saved every `logging_steps`
    logging_steps=10, # log every 10 steps
    eval_strategy="steps", # evaluation is done every `eval_steps`
    eval_steps=100, # evaluate every 100 steps
    save_strategy="steps", # checkpoints are saved every `save_steps`
    save_steps=100, # save a checkpoint every 100 steps
    save_total_limit=3, # only keep the last 3 checkpoints
    load_best_model_at_end=True, # load the best model based on evaluation loss at the end of training
    optim="adamw_torch",
    warmup_ratio=0.03, # slow increase in lr until reaches target rate
    lr_scheduler_type="linear", # lr decreares linearly
    report_to=["tensorboard"], # or ["none"] if you don’t want it
    run_name="shawgpt-qlora" # label for training run
)

# Pass the validation dataset to the trainer
trainer = SFTTrainer(
    model=model,
    peft_config=peft_config,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset, # Pass the validation dataset here
    args=training_args
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

# Start training
trainer.train()

## Short inference code to see the model output

In [None]:
from transformers import AutoModelForCausalLM
from peft import PeftModel
import torch

base_model_name = "meta-llama/Llama-3.2-1B-Instruct"
checkpoint_path = r"C:\Users\m\Documents\GitHub\CareBear\model_finetuning_and_safety\shawgpt-llama3-qlora\shawgpt-llama3-qlora\checkpoint-1010"

# Load the base model first
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    load_in_4bit=True,
    torch_dtype=torch.float16,
    device_map="auto"
)

# Load the LoRA adapter on top of the base model
model = PeftModel.from_pretrained(base_model, checkpoint_path)

# (Optional) Merge LoRA weights for standalone inference
model = model.merge_and_unload()


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [11]:

instruction_text = f"""ShawGPT, functioning as a virtual data science consultant on YouTube, communicates in clear, accessible language, escalating to technical depth upon request. \ 
It reacts to feedback aptly and ends responses with its signature '–ShawGPT'. \ 
ShawGPT will tailor the length of its responses to match the viewer's comment, providing concise acknowledgments to brief expressions of gratitude or feedback, \ 
thus skeeping the interaction natural and engaging. Please respond to the following comment. """

format_text = lambda entry: f"<s>[INST] {instruction_text}\n{entry}\n[/INST]\n"

prompt = format_text("I moved out of my parents house. I am feeling quite home sick. Can you advice me to feel better")
# print(f"\nPrompt: {prompt}\n")

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=256,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )

response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Generated Response:\n")
print(response)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated Response:

<s>[INST] ShawGPT, functioning as a virtual data science consultant on YouTube, communicates in clear, accessible language, escalating to technical depth upon request. \ 
It reacts to feedback aptly and ends responses with its signature '–ShawGPT'. \ 
ShawGPT will tailor the length of its responses to match the viewer's comment, providing concise acknowledgments to brief expressions of gratitude or feedback, \ 
thus skeeping the interaction natural and engaging. Please respond to the following comment. 
I moved out of my parents house. I am feeling quite home sick. Can you advice me to feel better
[/INST]
<s>[INST] Hey, I can sense your home sickness. I'd like to help you feel better. First, take care of your physical health. Get plenty of rest, eat nutritious food, and drink plenty of water. \ 
</s>
