# Experiment 10: Predictive Framework for Prompt Effectiveness

**Goal:** Build a model to predict prompt effectiveness without full evaluation.

**Key Questions:**
- Can we predict effectiveness from prompt features alone?
- Which features are most predictive?
- Can we build a simple scoring heuristic?

In [None]:
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error

from src.model_utils import load_model
from src.visualization import set_style

set_style()

In [None]:
model = load_model("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

## 1. Generate Training Data

We need diverse prompts with measured effectiveness.

In [None]:
# Components to mix
PERSONAS = ["", "You are an expert.", "You are a helpful assistant.", "You are a teacher.", "As a specialist,"]
INSTRUCTIONS = ["", "Answer accurately.", "Be precise.", "Think carefully.", "Respond concisely."]
COT_TRIGGERS = ["", "Let's think step by step.", "Let's work through this.", "Step by step:"]
FORMATS = [
    "{q}",
    "Question: {q}\nAnswer:",
    "Q: {q}\nA:",
    "### Question\n{q}\n### Answer"
]

QUESTIONS = [
    {"q": "What is 25 + 37?", "a": "62"},
    {"q": "What is the capital of Italy?", "a": "Rome"},
    {"q": "Is 'wonderful' positive or negative?", "a": "positive"},
    {"q": "What is 8 * 9?", "a": "72"},
    {"q": "What color is the sky?", "a": "blue"},
]

In [None]:
def generate_prompt(persona, instruction, cot, fmt, question):
    """Generate a prompt from components."""
    parts = [p for p in [persona, instruction] if p]
    prefix = " ".join(parts)
    
    formatted_q = fmt.format(q=question)
    
    if prefix:
        prompt = f"{prefix}\n\n{formatted_q}"
    else:
        prompt = formatted_q
    
    if cot:
        prompt = f"{prompt}\n\n{cot}"
    
    return prompt

In [None]:
# Generate diverse prompts and measure effectiveness
import random
random.seed(42)

training_data = []
n_samples = 200  # Sample from the combinatorial space

for _ in tqdm(range(n_samples), desc="Generating samples"):
    # Random components
    persona = random.choice(PERSONAS)
    instruction = random.choice(INSTRUCTIONS)
    cot = random.choice(COT_TRIGGERS)
    fmt = random.choice(FORMATS)
    qa = random.choice(QUESTIONS)
    
    prompt = generate_prompt(persona, instruction, cot, fmt, qa["q"])
    
    # Measure effectiveness
    seq_probs = model.get_sequence_log_probs(prompt, " " + qa["a"])
    dist = model.get_next_token_distribution(prompt)
    
    training_data.append({
        "prompt": prompt,
        "persona": persona,
        "instruction": instruction,
        "cot": cot,
        "format": fmt,
        "question": qa["q"],
        "target_log_prob": seq_probs["total_log_prob"],
        "entropy": dist["entropy"]
    })

df = pd.DataFrame(training_data)
print(f"Generated {len(df)} training samples")

## 2. Feature Engineering

In [None]:
def extract_features(row):
    """Extract predictive features from a prompt."""
    prompt = row["prompt"]
    
    features = {
        # Length features
        "prompt_length": len(prompt),
        "word_count": len(prompt.split()),
        "line_count": prompt.count("\n") + 1,
        
        # Component presence
        "has_persona": 1 if row["persona"] else 0,
        "has_instruction": 1 if row["instruction"] else 0,
        "has_cot": 1 if row["cot"] else 0,
        
        # Keyword presence
        "has_expert": 1 if "expert" in prompt.lower() else 0,
        "has_step": 1 if "step" in prompt.lower() else 0,
        "has_think": 1 if "think" in prompt.lower() else 0,
        "has_careful": 1 if "careful" in prompt.lower() else 0,
        "has_accurate": 1 if "accura" in prompt.lower() else 0,
        "has_precise": 1 if "precise" in prompt.lower() else 0,
        
        # Format features
        "has_qa_format": 1 if "Question:" in prompt or "Q:" in prompt else 0,
        "has_markdown": 1 if "###" in prompt else 0,
        "has_colon": prompt.count(":"),
        "has_newlines": prompt.count("\n"),
        
        # Punctuation
        "ends_with_colon": 1 if prompt.strip().endswith(":") else 0,
        "ends_with_period": 1 if prompt.strip().endswith(".") else 0,
        "question_marks": prompt.count("?"),
        
        # Complexity
        "avg_word_length": np.mean([len(w) for w in prompt.split()]) if prompt.split() else 0,
        "unique_words": len(set(prompt.lower().split())),
    }
    
    return features

In [None]:
# Extract features for all samples
feature_rows = []
for _, row in df.iterrows():
    features = extract_features(row)
    features["target"] = row["target_log_prob"]
    feature_rows.append(features)

feature_df = pd.DataFrame(feature_rows)
print(f"Extracted {len(feature_df.columns) - 1} features")
print(feature_df.head())

## 3. Train Predictive Models

In [None]:
# Prepare data
feature_cols = [c for c in feature_df.columns if c != "target"]
X = feature_df[feature_cols].values
y = feature_df["target"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training set: {len(X_train)}, Test set: {len(X_test)}")

In [None]:
# Train multiple models
models_to_test = {
    "Ridge": Ridge(alpha=1.0),
    "Lasso": Lasso(alpha=0.1),
    "RandomForest": RandomForestRegressor(n_estimators=50, max_depth=10, random_state=42)
}

results = {}
for name, model_obj in models_to_test.items():
    # Fit
    if name == "RandomForest":
        model_obj.fit(X_train, y_train)
        y_pred = model_obj.predict(X_test)
    else:
        model_obj.fit(X_train_scaled, y_train)
        y_pred = model_obj.predict(X_test_scaled)
    
    # Evaluate
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    
    results[name] = {"model": model_obj, "r2": r2, "mae": mae, "y_pred": y_pred}
    print(f"{name}: R² = {r2:.4f}, MAE = {mae:.4f}")

In [None]:
# Visualize predictions vs actual
import os
os.makedirs('../results', exist_ok=True)

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

for idx, (name, res) in enumerate(results.items()):
    ax = axes[idx]
    ax.scatter(y_test, res["y_pred"], alpha=0.5)
    ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
    ax.set_xlabel('Actual Log Probability')
    ax.set_ylabel('Predicted Log Probability')
    ax.set_title(f'{name}\nR² = {res["r2"]:.3f}')

plt.tight_layout()
plt.savefig('../results/exp10_predictions.png', dpi=150, bbox_inches='tight')
plt.show()

## 4. Feature Importance Analysis

In [None]:
# Get feature importances from different models
importances = {}

# Ridge coefficients
ridge_importance = pd.Series(
    np.abs(results["Ridge"]["model"].coef_),
    index=feature_cols
).sort_values(ascending=False)
importances["Ridge"] = ridge_importance

# Lasso coefficients (sparse)
lasso_importance = pd.Series(
    np.abs(results["Lasso"]["model"].coef_),
    index=feature_cols
).sort_values(ascending=False)
importances["Lasso"] = lasso_importance

# Random Forest
rf_importance = pd.Series(
    results["RandomForest"]["model"].feature_importances_,
    index=feature_cols
).sort_values(ascending=False)
importances["RandomForest"] = rf_importance

In [None]:
# Visualize feature importances
fig, axes = plt.subplots(1, 3, figsize=(16, 6))

for idx, (name, imp) in enumerate(importances.items()):
    ax = axes[idx]
    top_10 = imp.head(10)
    ax.barh(range(len(top_10)), top_10.values, color='steelblue', alpha=0.7)
    ax.set_yticks(range(len(top_10)))
    ax.set_yticklabels(top_10.index)
    ax.set_xlabel('Importance')
    ax.set_title(f'{name} - Top 10 Features')
    ax.invert_yaxis()

plt.tight_layout()
plt.savefig('../results/exp10_feature_importance.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Consensus important features
print("=== Feature Importance Consensus ===")

# Rank features in each model and average ranks
ranks = pd.DataFrame({
    name: imp.rank(ascending=False)
    for name, imp in importances.items()
})
avg_rank = ranks.mean(axis=1).sort_values()

print("\nTop 10 features (by average rank across models):")
for feat, rank in avg_rank.head(10).items():
    print(f"  {feat:25s}: avg rank = {rank:.1f}")

## 5. Build Simple Scoring Heuristic

In [None]:
# Use Lasso to identify key features (automatic feature selection)
lasso_nonzero = lasso_importance[lasso_importance > 0.01]
print("=== Features Selected by Lasso ===")
print(lasso_nonzero)

In [None]:
# Create simple scoring function based on top features
def simple_prompt_score(prompt):
    """
    Simple heuristic score for prompt effectiveness.
    Based on learned feature importance.
    """
    score = 0
    
    # Positive factors (adjust weights based on your findings)
    if "step" in prompt.lower():
        score += 2
    if "expert" in prompt.lower():
        score += 1
    if "Question:" in prompt or "Q:" in prompt:
        score += 1
    if prompt.strip().endswith(":"):
        score += 1
    
    # Moderate prompt length is good
    word_count = len(prompt.split())
    if 10 <= word_count <= 50:
        score += 1
    elif word_count > 100:
        score -= 1
    
    # Structure helps
    if prompt.count("\n") >= 2:
        score += 1
    
    return score

In [None]:
# Validate heuristic
df["heuristic_score"] = df["prompt"].apply(simple_prompt_score)

corr = df["heuristic_score"].corr(df["target_log_prob"])
print(f"Correlation between heuristic score and actual performance: {corr:.4f}")

In [None]:
# Compare heuristic groups
df["score_group"] = pd.cut(df["heuristic_score"], bins=[-np.inf, 1, 3, np.inf], labels=["Low", "Medium", "High"])

print("\n=== Performance by Heuristic Score Group ===")
for group in ["Low", "Medium", "High"]:
    subset = df[df["score_group"] == group]
    print(f"  {group}: n={len(subset)}, mean log_prob={subset['target_log_prob'].mean():.3f}")

## 6. Key Findings

In [None]:
print("="*60)
print("EXPERIMENT 10 SUMMARY: Predictive Framework")
print("="*60)

print("\n1. Model Performance:")
best_model = max(results.items(), key=lambda x: x[1]["r2"])
print(f"   Best model: {best_model[0]} (R² = {best_model[1]['r2']:.3f})")

print("\n2. Most Predictive Features:")
for feat in avg_rank.head(5).index:
    print(f"   - {feat}")

print("\n3. Simple Heuristic:")
print(f"   Correlation with actual: {corr:.3f}")
print("   Key factors: step-by-step, Q/A format, structured layout")

print("\n4. Practical Guidelines:")
print("   - [Fill after running: What predicts good prompts?]")

In [None]:
# Save results
import json

save_data = {
    "model_performance": {name: {"r2": res["r2"], "mae": res["mae"]} for name, res in results.items()},
    "top_features": avg_rank.head(10).to_dict(),
    "heuristic_correlation": float(corr)
}

with open('../results/exp10_predictive_framework.json', 'w') as f:
    json.dump(save_data, f, indent=2)

print("Results saved.")