In [None]:
!pip install --no-deps git+https://github.com/huggingface/transformers@v4.49.0-Gemma-3

In [None]:
!pip install unsloth

In [None]:
from unsloth import FastModel
import torch
max_seq_length = 1024

fourbit_models = [
    # 4bit dynamic quants for superior accuracy and low memory use
    "unsloth/gemma-3-1b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-4b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-12b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-27b-it-unsloth-bnb-4bit",

    # Other popular models!
    "unsloth/Llama-3.1-8B",
    "unsloth/Llama-3.2-3B",
    "unsloth/Llama-3.3-70B",
    "unsloth/mistral-7b-instruct-v0.3",
    "unsloth/Phi-4",
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/gemma-3-1b-it",
    max_seq_length = max_seq_length, # Choose any for long context!
    load_in_4bit = False,  # 4 bit quantization to reduce memory
    load_in_8bit = False, # [NEW!] A bit more accurate, uses 2x memory
    full_finetuning = False, # [NEW!] We have full finetuning now!
    # token = "hf_...", # use one if using gated models
)

In [None]:
model = FastModel.get_peft_model(
    model,
    finetune_vision_layers     = False, # Turn off for just text!
    finetune_language_layers   = True,  # Should leave on!
    finetune_attention_modules = True,  # Attention good for GRPO
    finetune_mlp_modules       = True,  # SHould leave on always!

    r = 8,           # Larger = higher accuracy, but might overfit
    lora_alpha = 8,  # Recommended alpha == r at least
    lora_dropout = 0,
    bias = "none",
    random_state = 3407,
)

In [None]:
import pandas as pd
df = pd.read_csv("/kaggle/input/preprocess-empathy-chat/preprocess_empathy_data.csv")
df = df.rename(columns={"utterance": "response"})
df = df.dropna(subset=["prompt", "response"])  # Remove empty entries
df["prompt"] = df["prompt"].astype(str)       # Ensure string type
df["response"] = df["response"].astype(str)    # Ensure string type
df.head()

In [None]:
reasoning_start = "<start_working_out>"
reasoning_end   = "<end_working_out>"
solution_start  = "<SOLUTION>"
solution_end    = "</SOLUTION>"

In [None]:
# 3. Wrap your responses to include tags
def wrap_response(resp):
    return f"{reasoning_start}Let me understand how you feel.{reasoning_end}{solution_start}{resp}{solution_end}"

df["response"] = df["response"].apply(wrap_response)

In [None]:
df["response"]

In [None]:
from datasets import Dataset
#Convert to HF Dataset and format for unsloth
dataset = Dataset.from_pandas(df)

In [None]:
# Create system prompt
system_prompt = f"""You are an empathetic AI and your friend. always give lovely caring message.
Understand the user's feelings between {reasoning_start} and {reasoning_end}.
Then provide a caring response between {solution_start} and {solution_end}. please give response as good friend also talk with lovely word like baby, my cutey and etc"""

In [None]:
# Convert to Unsloth prompt format
dataset = dataset.map(lambda x: {
    "prompt": [
        {"role": "system", "content": system_prompt},
        {"role": "user",   "content": x["prompt"]},
    ],
    "answer": x["response"]
})

In [None]:
dataset

In [None]:
dataset['answer'][0]

In [None]:
import re
# Step 8: Define reward functions

match_format = re.compile(
    rf"^[\s]{{0,}}{reasoning_start}.+?{reasoning_end}.*?{solution_start}(.+?){solution_end}[\s]{{0,}}$",
    flags=re.MULTILINE | re.DOTALL
)

def match_format_exactly(completions, **kwargs):
    return [1 if match_format.search(c[0]["content"]) else 0 for c in completions]

def match_format_approximately(completions, **kwargs):
    scores = []
    for completion in completions:
        response = completion[0]["content"]
        score = 0
        score += 0.5 if response.count(reasoning_start) == 1 else -0.5
        score += 0.5 if response.count(reasoning_end)   == 1 else -0.5
        score += 0.5 if response.count(solution_start)  == 1 else -0.5
        score += 0.5 if response.count(solution_end)    == 1 else -0.5
        scores.append(score)
    return scores

def check_answer(prompts, completions, answer, **kwargs):
    responses = [c[0]["content"] for c in completions]
    extracted = [match_format.search(r).group(1).strip() if match_format.search(r) else None for r in responses]
    scores = []
    for guess, true in zip(extracted, answer):
        if not guess:
            scores.append(0)
        elif guess.strip() == true.strip():
            scores.append(2.0)
        else:
            scores.append(1.0 if true.strip().lower() in guess.strip().lower() else 0.0)
    return scores

match_numbers = re.compile(rf"{solution_start}.*?([\d\.]+)", flags=re.MULTILINE | re.DOTALL)

def check_numbers(prompts, completions, answer, **kwargs):
    responses = [c[0]["content"] for c in completions]
    extracted = [match_numbers.search(r).group(1) if match_numbers.search(r) else None for r in responses]
    scores = []
    for guess, true in zip(extracted, answer):
        try:
            score = 1.5 if float(guess) == float(true) else 0
        except:
            score = 0
        scores.append(score)
    return scores

In [None]:
# Step 9: Configure GRPO
from trl import GRPOConfig, GRPOTrainer

max_prompt_length = 256

training_args = GRPOConfig(
    learning_rate = 5e-6,
    adam_beta1 = 0.9,
    adam_beta2 = 0.99,
    weight_decay = 0.1,
    warmup_ratio = 0.1,
    lr_scheduler_type = "cosine",
    optim = "adamw_torch_fused",
    logging_steps = 1,
    per_device_train_batch_size = 1,
    gradient_accumulation_steps = 1,
    num_generations = 4,
    max_prompt_length = max_prompt_length,
    max_completion_length = max_seq_length - max_prompt_length,
    max_steps = 50,
    save_steps = 50,
    max_grad_norm = 0.1,
    report_to = "none",
    output_dir = "outputs",
)

In [None]:
# Step 10: Run training
trainer = GRPOTrainer(
    model = model,
    processing_class = tokenizer,
    reward_funcs = [
        match_format_exactly,
        match_format_approximately,
        check_answer,
        check_numbers,
    ],
    args = training_args,
    train_dataset = dataset,
)

trainer.train()

In [None]:
messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": "I'm feeling really alone. anyone don't love for me"}
]

text = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,
    tokenize = False,
)

from transformers import TextStreamer

_ = model.generate(
    **tokenizer(text, return_tensors="pt").to("cuda"),
    max_new_tokens = 200,
    temperature = 0.9,
    top_p = 0.95,
    top_k = 50,
    streamer = TextStreamer(tokenizer, skip_prompt=True),
)


In [None]:
!pip install -q huggingface_hub


In [None]:
output_dir = "empathy-chat-gemma"

model.save_pretrained_merged(
    output_dir,
    tokenizer,
    save_method = "merged_16bit",   # smaller size, good for inference
    push_to_hub = False,            # we'll push manually next
)


In [None]:
# from huggingface_hub import login, HfApi
# import os

# # Login if not done yet
# login(token="")

# # Path to your merged model directory
# output_dir = "empathy-chat-gemma"

# # Push to the hub
# api = HfApi()
# api.upload_folder(
#     folder_path=output_dir,
#     repo_id="sajeewa/empathy-chat-gemma",  # your repo path
#     repo_type="model"
# )
