# LLM Tuning with LionAGI's RL System

This notebook demonstrates how to use LionAGI's RL capabilities to improve LLM performance through:
1. Parameter tuning
2. Synthetic data generation
3. Hybrid optimization

In [None]:
import sys

sys.path.append("../..")

from lionagi import Branch, iModel
from lionagi.operations.rl import OptimizationMode, TuningConfig, RL

## 1. Setup Training Data

Let's create some example training data for a task (e.g., improving JSON generation)

In [None]:
# Example training data
training_data = [
    {
        "prompt": "Convert this to JSON: name: John Smith, age: 30, city: New York",
        "target": '{"name": "John Smith", "age": 30, "city": "New York"}',
    },
    {
        "prompt": "Create JSON for: title: Software Engineer, company: TechCo, years: 5",
        "target": '{"title": "Software Engineer", "company": "TechCo", "years": 5}',
    },
]

# Evaluation data
eval_data = [
    {
        "prompt": "Make JSON from: product: Laptop, price: 999.99, brand: TechBrand",
        "target": '{"product": "Laptop", "price": 999.99, "brand": "TechBrand"}',
    }
]

## 2. Create Branch and Model

In [None]:
# Initialize model
model = iModel(provider="openai", model="gpt-4o", temperature=0.7)

# Create branch with system prompt
branch = Branch(
    name="JsonGenerator",
    system="You are an expert at converting text to valid JSON format.",
    chat_model=model,
)

## 3. Custom Reward Function

Let's define a reward function that checks JSON validity and similarity

In [None]:
import json
from difflib import SequenceMatcher


async def json_reward(response: str, target: str) -> float:
    """Compute reward based on JSON validity and similarity"""
    try:
        # Check JSON validity
        json.loads(response)

        # Compute string similarity
        similarity = SequenceMatcher(None, response, target).ratio()

        # Return combined score
        return 0.5 + (
            0.5 * similarity
        )  # 0.5 for valid JSON, up to 0.5 for similarity

    except json.JSONDecodeError:
        return 0.0  # Invalid JSON

## 4. Configure and Run Training

In [None]:
# Training configuration
config = {
    "max_steps": 100,
    "batch_size": 8,
    "learning_rate": 1e-5,
    "target_reward": 0.95,
    "eval_interval": 10,
}

# Run training with synthetic data generation and parameter tuning
results = await RL(
    branch=branch,
    training_data=training_data,
    eval_data=eval_data,
    mode=OptimizationMode.HYBRID,
    tuning_config=config,
    reward_fn=json_reward,
    verbose=True,
)

## 5. Analyze Results

In [None]:
import matplotlib.pyplot as plt

# Extract evaluation metrics
eval_rewards = [r["metrics"]["reward"] for r in results["eval_results"]]
eval_steps = [i * config["eval_interval"] for i in range(len(eval_rewards))]

# Plot training progress
plt.figure(figsize=(10, 5))
plt.plot(eval_steps, eval_rewards, marker="o")
plt.title("Training Progress")
plt.xlabel("Steps")
plt.ylabel("Evaluation Reward")
plt.grid(True)
plt.show()

## 6. Test Improved Model

In [None]:
# Test with new example
test_prompt = (
    "Create JSON for: user: Alice Cooper, email: alice@email.com, active: true"
)

response = await branch.communicate(test_prompt)
print("Generated JSON:")
print(response)