# Qwen Technical Support Agent — GSPO Fine-tuning

This notebook fine-tunes a Qwen model for **technical support** using **GSPO + LoRA**.

Notes:
- Start with a smaller model (3B) to validate your setup.
- If you scale to 7B, you’ll likely want `use_4bit=True`.
- This GSPO implementation uses a prompt set (from environment scenarios) + a reward model.


In [None]:
import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ.setdefault("WANDB_MODE", "disabled")


In [None]:
# Pick a base model
MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"  # try 7B with use_4bit=True
OUTPUT_DIR = "./outputs/notebooks/qwen_support_gspo"


In [None]:
from stateset_agents import MultiTurnAgent
from stateset_agents.core.agent import AgentConfig
from stateset_agents.core.environment import ConversationEnvironment, CONVERSATION_CONFIGS
from stateset_agents.rewards import create_domain_reward
from stateset_agents.training import GSPOConfig, train_with_gspo

# Build a small prompt set from environment scenarios.
env_config = CONVERSATION_CONFIGS["technical_support"].copy()
env_config["scenarios"] = env_config["scenarios"] + [
    {
        "topic": "install_error",
        "user_goal": "Fix installation failure",
        "context": "The installer fails with an error code. Provide step-by-step troubleshooting.",
    },
    {
        "topic": "update_regression",
        "user_goal": "Recover after a bad update",
        "context": "After updating, the app won’t open. Suggest rollback/reinstall and log collection.",
    },
    {
        "topic": "login_issue",
        "user_goal": "Fix login failures",
        "context": "User can’t log in and sees an authentication error. Provide diagnosis steps.",
    },
    {
        "topic": "performance_issue",
        "user_goal": "Improve slow performance",
        "context": "The system is slow. Provide a prioritized checklist (resources, drivers, background apps).",
    },
    {
        "topic": "network_troubleshooting",
        "user_goal": "Restore network connectivity",
        "context": "Wi-Fi connects but there’s no internet. Provide OS/network troubleshooting steps.",
    },
]

environment = ConversationEnvironment(**env_config)
reward_model = create_domain_reward("technical_support")

system_prompt = (
    "You are Qwen, a knowledgeable technical support specialist. "
    "Ask concise clarifying questions, then provide step-by-step fixes, "
    "including verification steps and safe fallback options."
)

# Do NOT call agent.initialize() here: GSPO will load the model once and attach it.
agent = MultiTurnAgent(
    AgentConfig(
        model_name=MODEL_NAME,
        system_prompt=system_prompt,
        max_new_tokens=384,
        temperature=0.7,
    ),
    memory_window=8,
)


In [None]:
# Training config tuned for a laptop GPU.
# Increase num_outer_iterations and add more scenarios for real training.

config = GSPOConfig(
    model_name=MODEL_NAME,
    output_dir=OUTPUT_DIR,
    report_to="none",
    # "iterations" in this GSPO implementation
    num_outer_iterations=5,
    num_iterations=1,
    generations_per_iteration=len(environment.scenarios),
    # group size (responses per prompt)
    num_generations=4,
    # memory/perf knobs
    use_lora=True,
    lora_r=32,
    lora_alpha=64,
    lora_dropout=0.05,
    gradient_checkpointing=True,
    use_4bit=False,
    use_8bit=False,
    bf16=True,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    learning_rate=8e-6,
    max_prompt_length=512,
    max_completion_length=384,
    temperature=0.7,
    top_p=0.9,
    logging_steps=1,
    save_steps=2,
)


In [None]:
trained_agent = await train_with_gspo(
    config=config,
    agent=agent,
    environment=environment,
    reward_model=reward_model,
)


In [None]:
# Quick sanity check
test_messages = [
    {
        "role": "user",
        "content": "My app crashes on launch after the latest update. What should I do?",
    }
]

print(await trained_agent.generate_response(test_messages))


## Artifacts

- Checkpoints: `OUTPUT_DIR/checkpoint-*`
- Final save: `OUTPUT_DIR/final_model`

If `use_lora=True`, the saved folder is a **LoRA adapter** (not the full base model).
