# WhatsApp Sender Style Fine-Tuning (Hugging Face)

This brief notebook sketches a minimal, GPU-friendly fine-tuning workflow to teach an open-source model to mimic a specific sender's messaging style.

What you'll need (not included here):
- A dataset of conversations where the target sender's replies are known. See the Data section below for the expected format and where to plug it in.
- A GPU environment (e.g., Runpod) with enough VRAM for the chosen base model.

Notes:
- Uses QLoRA via `peft` and `trl` for efficient fine-tuning.
- Defaults to a small chat model; feel free to switch to a larger one if you have resources.
- Be mindful of privacy, TOS, and consent when training on personal chats.


In [1]:
!uv pip install -q transformers datasets accelerate peft trl bitsandbytes einops


In [None]:
from dataclasses import dataclass
from typing import Optional
import os

import torch
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)
from trl import SFTTrainer, SFTConfig
from peft import LoraConfig, TaskType

# ---- Configuration ----
@dataclass
class TrainConfig:
    base_model: str = "HuggingFaceH4/zephyr-7b-beta"  # change if VRAM limited
    out_dir: str = "./outputs/style-adapter"
    bf16: bool = True
    per_device_train_batch_size: int = 2
    gradient_accumulation_steps: int = 4
    max_steps: int = 500  # keep small for demo; increase for real training
    learning_rate: float = 1e-4
    warmup_ratio: float = 0.03
    logging_steps: int = 10

    # QLoRA
    lora_r: int = 16
    lora_alpha: int = 32
    lora_dropout: float = 0.05

    # Tokenization/formatting
    max_seq_len: int = 1024

cfg = TrainConfig()

# 4-bit quantization for QLoRA
bnb_cfg = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16 if cfg.bf16 else torch.float16,
    bnb_4bit_use_double_quant=True,
)

device_map = "auto"

tokenizer = AutoTokenizer.from_pretrained(cfg.base_model, use_fast=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    cfg.base_model,
    device_map=device_map,
    quantization_config=bnb_cfg,
    trust_remote_code=True,
)

peft_cfg = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=cfg.lora_r,
    lora_alpha=cfg.lora_alpha,
    lora_dropout=cfg.lora_dropout,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
)



## Data placeholder (bring your own)

You need a dataset of conversations where the target sender's replies are present.
A minimal single-turn schema for supervised fine-tuning:

- `prompt`: the conversation context and the latest user message, formatted as a single string.
- `response`: the exact reply written by the target sender.

Example (JSONL) rows you will construct elsewhere:
```json
{"prompt": "[Alex]: hey are we still on for 7?", "response": "yeh see u then"}
{"prompt": "[Sam]: send me the doc pls", "response": "sending now"}
```

In multi-turn settings, you can concatenate history; keep sequences under `max_seq_len`.


In [None]:
# TODO: Replace this with real data loading
# Expected: a list/dataset with fields: {"prompt": str, "response": str}

examples = [
    {"prompt": "[Friend]: wanna grab coffee?", "response": "down, where?"},
    {"prompt": "[Teammate]: can you review PR #42?", "response": "on it"},
]

dset = Dataset.from_list(examples)

BOS = ""  # adjust to your model's chat template if needed
EOS = tokenizer.eos_token

def format_example(rec):
    # Basic supervised format: concatenate prompt + response
    text = f"{BOS}{rec['prompt']}\n{rec['response']}{EOS}"
    return {"text": text}

train_dset = dset.map(format_example, remove_columns=dset.column_names)

print(train_dset[0]["text"][:200])


In [None]:
training_args = SFTConfig(
    output_dir=cfg.out_dir,
    bf16=cfg.bf16,
    per_device_train_batch_size=cfg.per_device_train_batch_size,
    gradient_accumulation_steps=cfg.gradient_accumulation_steps,
    max_steps=cfg.max_steps,
    learning_rate=cfg.learning_rate,
    warmup_ratio=cfg.warmup_ratio,
    logging_steps=cfg.logging_steps,
    save_steps=0,
    eval_strategy="no",
    dataset_text_field="text",
    packing=True,
    max_seq_length=cfg.max_seq_len,
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dset,
    peft_config=peft_cfg,
    args=training_args,
)

trainer.train()


In [None]:
# Save LoRA adapter
os.makedirs(cfg.out_dir, exist_ok=True)
trainer.model.save_pretrained(cfg.out_dir)
tokenizer.save_pretrained(cfg.out_dir)
print(f"Saved adapter to {cfg.out_dir}")

# Quick inference demo (merges LoRA on the fly)
from peft import PeftModel

base = AutoModelForCausalLM.from_pretrained(
    cfg.base_model,
    device_map=device_map,
    quantization_config=bnb_cfg,
    trust_remote_code=True,
)
adapted = PeftModel.from_pretrained(base, cfg.out_dir)
adapted.eval()

prompt = "[Alex]: do you want to play tennis later?"
inputs = tokenizer(prompt, return_tensors="pt").to(adapted.device)
with torch.no_grad():
    out = adapted.generate(**inputs, max_new_tokens=40, do_sample=True, temperature=0.7)
print(tokenizer.decode(out[0], skip_special_tokens=True))
