# Experiment 5: The Minimal Effective Prompt

**Goal:** Find the smallest prompt modification that produces a given performance gain.

**Key Questions:**
- What is the minimal change needed to improve performance?
- Are there universal "magic words" that always help?
- Can we predict prompt effectiveness from simple features?

In [None]:
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from itertools import combinations

from src.model_utils import load_model
from src.metrics import ExperimentResults
from src.visualization import set_style

set_style()

In [None]:
model = load_model("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

## 1. Define Candidate Additions

In [None]:
CANDIDATE_ADDITIONS = {
    "please": "Please ",
    "carefully": "Carefully ",
    "correctly": "Correctly ",
    "step_by_step": "Step by step, ",
    "think_carefully": "Think carefully. ",
    "lets_think": "Let's think. ",
    "be_precise": "Be precise. ",
    "expert": "As an expert, ",
    "assistant": "As a helpful assistant, ",
    "answer_colon": "Answer: ",
    "important": "Important: ",
    "remember": "Remember: "
}

TEST_QUESTIONS = [
    {"q": "What is 15 + 28?", "a": "43"},
    {"q": "What is the capital of Japan?", "a": "Tokyo"},
    {"q": "Is 'happy' a positive or negative word?", "a": "positive"},
]

## 2. Single Addition Testing

In [None]:
def test_single_additions(model, question, expected, additions):
    """Test impact of adding each candidate to the base prompt."""
    base_prompt = question
    baseline_probs = model.get_sequence_log_probs(base_prompt, " " + expected)
    baseline = baseline_probs["total_log_prob"]
    
    results = [{"addition": "baseline", "prompt": base_prompt, "log_prob": baseline, "improvement": 0}]
    
    for name, addition in additions.items():
        prefix_prompt = addition + base_prompt
        prefix_probs = model.get_sequence_log_probs(prefix_prompt, " " + expected)
        
        results.append({
            "addition": name,
            "prompt": prefix_prompt,
            "log_prob": prefix_probs["total_log_prob"],
            "improvement": prefix_probs["total_log_prob"] - baseline
        })
    
    return results, baseline

In [None]:
all_addition_results = []

for test in tqdm(TEST_QUESTIONS, desc="Testing questions"):
    results, baseline = test_single_additions(model, test["q"], test["a"], CANDIDATE_ADDITIONS)
    for r in results:
        r["question"] = test["q"]
        r["baseline"] = baseline
    all_addition_results.extend(results)

df = pd.DataFrame(all_addition_results)

In [None]:
print("=== Average Improvement by Addition ===")
improvements = df[df['addition'] != 'baseline'].groupby('addition')['improvement'].agg(['mean', 'std'])
improvements = improvements.sort_values('mean', ascending=False)

print("\nTop 5:")
for idx, row in improvements.head(5).iterrows():
    print(f"  {idx:20s}: mean={row['mean']:+.4f}")

print("\nWorst 3:")
for idx, row in improvements.tail(3).iterrows():
    print(f"  {idx:20s}: mean={row['mean']:+.4f}")

In [None]:
import os
os.makedirs('../results', exist_ok=True)

fig, ax = plt.subplots(figsize=(12, 8))
additions = improvements.index.tolist()
means = improvements['mean'].values
colors = ['green' if m > 0 else 'red' for m in means]

ax.barh(range(len(additions)), means, color=colors, alpha=0.7)
ax.set_yticks(range(len(additions)))
ax.set_yticklabels(additions)
ax.set_xlabel('Improvement in Log Probability')
ax.set_title('Impact of Single-Word/Phrase Additions')
ax.axvline(x=0, color='gray', linestyle='-', linewidth=0.5)

plt.tight_layout()
plt.savefig('../results/exp5_single_additions.png', dpi=150, bbox_inches='tight')
plt.show()

## 3. Combination Search

In [None]:
def test_addition_combinations(model, question, expected, additions, max_combo_size=2):
    baseline = model.get_sequence_log_probs(question, " " + expected)["total_log_prob"]
    results = []
    addition_items = list(additions.items())
    
    for combo in combinations(addition_items, max_combo_size):
        names = [c[0] for c in combo]
        texts = [c[1] for c in combo]
        prompt = "".join(texts) + question
        probs = model.get_sequence_log_probs(prompt, " " + expected)
        results.append({
            "combination": "+".join(names),
            "log_prob": probs["total_log_prob"],
            "improvement": probs["total_log_prob"] - baseline
        })
    return results, baseline

In [None]:
test = TEST_QUESTIONS[0]
top_additions = dict(list(CANDIDATE_ADDITIONS.items())[:6])

print(f"Testing combinations for: {test['q']}")
combo_results, baseline = test_addition_combinations(model, test["q"], test["a"], top_additions)
combo_results = sorted(combo_results, key=lambda x: x["improvement"], reverse=True)

print(f"\nTop 10 combinations:")
for r in combo_results[:10]:
    print(f"  {r['combination']:35s}: {r['improvement']:+.4f}")

## 4. Minimal Substring Analysis

In [None]:
def find_minimal_effective_substring(model, question, expected, effective_addition):
    baseline = model.get_sequence_log_probs(question, " " + expected)["total_log_prob"]
    full_prompt = effective_addition + question
    full_improvement = model.get_sequence_log_probs(full_prompt, " " + expected)["total_log_prob"] - baseline
    
    results = []
    words = effective_addition.split()
    
    for length in range(len(words), 0, -1):
        for start in range(len(words) - length + 1):
            substring = " ".join(words[start:start+length]) + " "
            prompt = substring + question
            improvement = model.get_sequence_log_probs(prompt, " " + expected)["total_log_prob"] - baseline
            results.append({
                "substring": substring.strip(),
                "length": length,
                "improvement": improvement,
                "pct_of_full": (improvement / full_improvement * 100) if full_improvement != 0 else 0
            })
    return results, full_improvement

In [None]:
test = TEST_QUESTIONS[0]
effective_phrase = "Let's think step by step. "

substring_results, full_improvement = find_minimal_effective_substring(
    model, test["q"], test["a"], effective_phrase
)

print(f"Full phrase improvement: {full_improvement:.4f}\n")
sorted_results = sorted(substring_results, key=lambda x: x["improvement"], reverse=True)

for r in sorted_results[:10]:
    print(f"  '{r['substring']:20s}' (len={r['length']}): {r['improvement']:+.4f} ({r['pct_of_full']:.1f}%)")

## 5. Feature Analysis

In [None]:
def extract_prompt_features(prompt):
    return {
        "length_chars": len(prompt),
        "length_words": len(prompt.split()),
        "has_colon": ":" in prompt,
        "has_please": "please" in prompt.lower(),
        "has_step": "step" in prompt.lower(),
        "has_think": "think" in prompt.lower(),
        "has_expert": "expert" in prompt.lower(),
    }

features_list = []
for _, row in df.iterrows():
    if 'prompt' in row:
        features = extract_prompt_features(row['prompt'])
        features['log_prob'] = row.get('log_prob', 0)
        features_list.append(features)

feature_df = pd.DataFrame(features_list)
correlations = {col: feature_df[col].astype(float).corr(feature_df['log_prob'])
                for col in feature_df.columns if col != 'log_prob'}

print("=== Feature Correlations ===")
for f, c in sorted(correlations.items(), key=lambda x: abs(x[1]), reverse=True):
    if not np.isnan(c):
        print(f"  {f:20s}: {c:+.4f}")

## 6. Summary

In [None]:
print("="*60)
print("EXPERIMENT 5 SUMMARY")
print("="*60)

print("\n1. Best Single Additions:")
for idx, row in improvements.head(3).iterrows():
    print(f"   '{idx}': {row['mean']:+.4f}")

print("\n2. Key Findings:")
print("   - [Fill after running: Which additions are most effective?]")
print("   - [Fill after running: Are effects consistent across questions?]")
print("   - [Fill after running: What features predict success?]")

In [None]:
import json

save_data = {
    "single_additions": improvements.to_dict(),
    "feature_correlations": {k: v for k, v in correlations.items() if not np.isnan(v)}
}

with open('../results/exp5_minimal_prompt_results.json', 'w') as f:
    json.dump(save_data, f, indent=2, default=float)

print("Results saved.")