In [None]:
# %% [markdown]
# === Install & imports ===
# (Re-run if your environment is fresh)

# %%capture
%pip install -q dspy-ai pandas numpy

In [None]:


import os
import random
import json
import pandas as pd
import numpy as np
import dspy

# ==============================
# CONFIG
# ==============================
# Path to your input CSV:
INPUT_CSV  = "training_examples.csv"
# Where to save the results:
OUTPUT_CSV = "optimized_prompt_with_inputs.csv"
# Optional: set your LLM (change to what you use; requires proper credentials)
# Examples:
#   dspy.LM("openai/gpt-4o-mini")  -> needs OPENAI_API_KEY
#   dspy.LM("anthropic/claude-3-5-sonnet") -> needs ANTHROPIC_API_KEY
#   dspy.LM("ollama/llama3") -> if you run Ollama locally
MODEL_NAME = "openai/gpt-4o-mini"

# Fixed random seed for reproducibility of example selection
SEED = 13
random.seed(SEED)
np.random.seed(SEED)

# ==============================
# READ DATA
# ==============================
df = pd.read_csv(INPUT_CSV).fillna("")

required_cols = {"initial_prompt", "input_text", "ideal_output"}
missing = required_cols - set(df.columns.str.lower())
if missing:
    raise ValueError(
        f"Your CSV must contain columns: {sorted(required_cols)}; missing: {sorted(missing)}"
    )

# Normalize column names just in case
colmap = {c: c.lower() for c in df.columns}
df.rename(columns=colmap, inplace=True)

if len(df) < 4:
    print(
        "⚠️ Tip: Add more rows for better optimization. "
        "DSPy works best with a handful to dozens of labeled examples."
    )

# Use the first row's initial_prompt as the instruction seed
instruction_text = df.iloc[0]["initial_prompt"].strip()
if not instruction_text:
    instruction_text = "Perform the task described using the input and produce the ideal output."

# ==============================
# CONFIGURE DSPy
# ==============================
# Make sure your API key is available if you use a hosted model (e.g., OpenAI/Anthropic).
# os.environ["OPENAI_API_KEY"] = "sk-..."  # uncomment and set if needed

dspy.settings.configure(lm=dspy.LM(MODEL_NAME, max_tokens=512))

# ------------------------------
# Define a Signature and Module
# ------------------------------
class TaskSignature(dspy.Signature):
    """{instruction}"""
    input_text: str = dspy.InputField(desc="The input for the task.")
    prediction: str = dspy.OutputField(desc="The ideal output for the task.")

# Inject the instruction into the signature docstring dynamically:
TaskSignature.__doc__ = instruction_text or TaskSignature.__doc__

# A simple module that maps input_text -> prediction
task_module = dspy.Predict(TaskSignature)

# ==============================
# BUILD TRAIN/DEV SETS
# ==============================
# Convert rows to DSPy Examples
examples = [
    dspy.Example(input_text=row["input_text"], prediction=row["ideal_output"]).with_inputs("input_text")
    for _, row in df.iterrows()
]

# Small, simple split (80/20); for tiny datasets, DSPy can still run but with variance.
if len(examples) >= 5:
    split = int(0.8 * len(examples))
else:
    split = max(1, len(examples) - 1)

trainset = examples[:split]
devset   = examples[split:] if (len(examples) - split) > 0 else examples[:1]

# ==============================
# DEFINE A METRIC
# ==============================
def token_f1(prediction: str, gold: str) -> float:
    """Simple, model-agnostic F1 over whitespace tokens."""
    p = prediction.lower().split()
    g = gold.lower().split()
    if not p and not g:
        return 1.0
    if not p or not g:
        return 0.0
    p_set, g_set = set(p), set(g)
    inter = len(p_set & g_set)
    if inter == 0:
        return 0.0
    precision = inter / (len(p_set) + 1e-9)
    recall    = inter / (len(g_set) + 1e-9)
    return 2 * precision * recall / (precision + recall + 1e-9)

def metric_fn(example, pred) -> float:
    """DSPy metric: higher is better. Compare predicted text to ideal_output."""
    return token_f1(pred.prediction, example.prediction)

# ==============================
# OPTIMIZE THE PROMPT WITH DSPy
# ==============================
# BootstrapFewShot selects and arranges few‑shot examples to improve the prompt
# You can tune these knobs for your dataset size/time budget:
optimizer = dspy.BootstrapFewShot(
    metric=metric_fn,
    max_bootstrapped_demos=4,  # synthetic demos
    max_labeled_demos=16,      # uses your CSV examples
    max_rounds=3
)

compiled = optimizer.compile(
    student=task_module,
    trainset=trainset,
    valset=devset
)

# ==============================
# EXTRACT THE "IMPROVED PROMPT"
# ==============================
# DSPy does not expose a single raw "prompt string" API because it abstracts prompts,
# but we can reconstruct a faithful, deployable template from:
#   - the (possibly refined) instruction
#   - the selected few‑shot demos (compiled.demos)
#   - a canonical input/output format
#
# This string is what you can reuse elsewhere if you want a plain prompt.

def make_prompt_string(program: dspy.Module) -> str:
    # Instruction (signature docstring can be refined by some optimizers)
    instr = getattr(program, "signature", None)
    instr_text = ""
    if instr is not None:
        instr_text = getattr(instr, "doc", "") or getattr(instr, "__doc__", "") or ""

    # Demos (few-shot examples chosen/bootstrapped by the optimizer)
    demos = []
    # compiled.demos may be dspy.Example objects or dicts (after save/load).
    for demo in getattr(program, "demos", []):
        if hasattr(demo, "toDict"):
            d = demo.toDict()
        elif isinstance(demo, dict):
            d = demo
        else:
            # best-effort fallback
            d = {"input_text": getattr(demo, "input_text", ""), "prediction": getattr(demo, "prediction", "")}
        demos.append(d)

    lines = []
    lines.append("### Instruction")
    lines.append(instr_text.strip() or "Use the input_text to produce the best possible prediction.\n")
    if demos:
        lines.append("\n### Few‑Shot Examples")
        for i, d in enumerate(demos, 1):
            lines.append(f"\n# Example {i}")
            lines.append(f"Input:\n{d.get('input_text','').strip()}")
            lines.append(f"Output:\n{d.get('prediction','').strip()}")

    # Canonical I/O template
    lines.append("\n### Now answer for this new input")
    lines.append("Input:\n{input_text}")
    lines.append("Output:")
    return "\n".join(lines).strip()

optimized_prompt_str = make_prompt_string(compiled)

# ==============================
# SAVE: final improved prompt + inputs
# ==============================
out = pd.DataFrame({
    "optimized_prompt": [optimized_prompt_str] * len(df),
    "input_text": df["input_text"].tolist(),
    "ideal_output": df["ideal_output"].tolist()
})

out.to_csv(OUTPUT_CSV, index=False)

print("✅ Done!")
print(f"- Seed instruction (from CSV):\n{instruction_text}\n")
print(f"- Optimized prompt written to: {OUTPUT_CSV}")
print("\n--- Preview of the optimized prompt ---\n")
print(optimized_prompt_str[:2000] + ("\n...[truncated]..." if len(optimized_prompt_str) > 2000 else ""))
