In [5]:
import sys
from pathlib import Path
import random
import os
import json
from reformat import PromptReformatter
from reformat import PromptImprover
from reformat.rules import (
    SeparatorRule,
    CasingRule,
    ItemFormattingRule,
    EnumerationRule,
)

import logging

# Disable httpx logging
logging.getLogger("httpx").setLevel(logging.WARNING)

# Disable reformat.improver logging 
logging.getLogger("reformat.improver").setLevel(logging.WARNING)


In [6]:
test_prompts = {
    "simple": "This is a test prompt",
    "enumerated": """1. First item
2. Second item
3. Third item""",
    "fields": """Title: Test Document
Author: John Doe
Date: 2024-03-15
Content: This is a test document with multiple fields.""",
    "complex": """TASK: Classify the sentiment
INPUT: "I absolutely love this product! Best purchase ever."
OPTIONS:
1. Positive
2. Negative
3. Neutral
ANSWER:""",
    "instruction": """You are a helpful AI assistant.
Follow these rules:
1. Be concise
2. Be accurate
3. Be helpful
Now answer the user's question:
Q: What is the capital of France?
A:""",
}

In [8]:
def test_response_improvement(prompt: str, model: str = "llama-3.3-70b-versatile"):
    """Test how formatting affects response quality."""
    print("=== Testing Response Improvement ===")
    print("\nOriginal Prompt:")
    print(prompt)

    improver = PromptImprover(model=model)
    result = improver.improve(
        prompt,
        num_candidates=5,  # Small number for testing
        num_iterations=5,
    )

    print("\nOriginal Response:")
    print(result["original_response"])
    print("\nImproved Response:")
    print(result["improved_response"])
    print(f"\nImprovement Score: {result['improvement_score']:.3f}")
    print("\nBest Format Used:")
    print(json.dumps(result["best_format"], indent=2))

    return result


# Test with different prompts
prompt = test_prompts["complex"]
print(f"\n\n{'=' * 50}")
print("=" * 50)
result = test_response_improvement(prompt)



=== Testing Response Improvement ===

Original Prompt:
TASK: Classify the sentiment
INPUT: "I absolutely love this product! Best purchase ever."
OPTIONS:
1. Positive
2. Negative
3. Neutral
ANSWER:

Original Response:
The sentiment of the input statement is clearly expressing enthusiasm and satisfaction with the product. 

ANSWER: 1. Positive

Improved Response:
The sentiment of the input statement is clearly expressing a strong positive emotion. The use of words like "ABSOLUTELY LOVE" and "BEST PURCHASE EVER" emphasizes the customer's satisfaction and enthusiasm for the product.

ANSWER: <I>. POSITIVE

Improvement Score: 0.800

Best Format Used:
{
  "separator_rule": "Double Colon",
  "casing_rule": "Upper",
  "item_formatting_rule": "Angle Brackets",
  "enumeration_rule": "Roman",
  "score": 0.8
}


In [5]:
# Create reformatter
reformatter = PromptReformatter()

# Test formatting on different prompt types
for prompt_type, prompt in test_prompts.items():
    print(f"\n=== Testing {prompt_type} prompt ===")
    print("\nOriginal:")
    print(prompt)
    print("\nFormatted:")
    print(reformatter.format(prompt))


=== Testing simple prompt ===

Original:
This is a test prompt

Formatted:
This is a test prompt

=== Testing enumerated prompt ===

Original:
1. First item
2. Second item
3. Third item

Formatted:
(1). First item
(2). Second item
(3). Third item

=== Testing fields prompt ===

Original:
Title: Test Document
Author: John Doe
Date: 2024-03-15
Content: This is a test document with multiple fields.

Formatted:
Title: Test Document
Author: John Doe
Date: (2)0(2)(4)-0(3)-(1)(5)
Content: This is a test document with multiple fields.

=== Testing complex prompt ===

Original:
TASK: Classify the sentiment
INPUT: "I absolutely love this product! Best purchase ever."
OPTIONS:
1. Positive
2. Negative
3. Neutral
ANSWER:

Formatted:
TASK: Classify the sentiment
INPUT: "I absolutely love this product! Best purchase ever."
OPTIONS:
(1). Positive
(2). Negative
(3). Neutral
ANSWER:

=== Testing instruction prompt ===

Original:
You are a helpful AI assistant.
Follow these rules:
1. Be concise
2. Be ac

In [6]:
separator_rules = [
    SeparatorRule("Double Newline", "Use double newline", "\n\n"),
    SeparatorRule("Dash", "Use dash separator", " - "),
]

casing_rules = [
    CasingRule("Title", "Use title case"),
    CasingRule("Upper", "Use uppercase"),
]

# Test different combinations
for separator in separator_rules:
    for casing in casing_rules:
        print(f"\n=== Testing {separator.name} + {casing.name} ===")
        reformatter = PromptReformatter(
            separator_rules=[separator], casing_rules=[casing]
        )
        formatted = reformatter.format(test_prompts["fields"])
        print(formatted)


=== Testing Double Newline + Title ===
Title: Test Document

Author: John Doe

Date: (2)0(2)(4)-0(3)-(1)(5)

Content: This Is A Test Document With Multiple Fields.

=== Testing Double Newline + Upper ===
TITLE: TEST DOCUMENT

AUTHOR: JOHN DOE

DATE: (2)0(2)(4)-0(3)-(1)(5)

CONTENT: THIS IS A TEST DOCUMENT WITH MULTIPLE FIELDS.

=== Testing Dash + Title ===
Title: Test Document - Author: John Doe - Date: (2)0(2)(4)-0(3)-(1)(5) - Content: This Is A Test Document With Multiple Fields.

=== Testing Dash + Upper ===
TITLE: TEST DOCUMENT - AUTHOR: JOHN DOE - DATE: (2)0(2)(4)-0(3)-(1)(5) - CONTENT: THIS IS A TEST DOCUMENT WITH MULTIPLE FIELDS.


In [9]:
target_models = [
    "llama-3.3-70b-versatile",  # High quality but slower
    "llama-3.3-8b-instant",  # Faster but lower quality
]

test_prompt = test_prompts["complex"]  # Use the complex prompt for testing

for model in target_models:
    print(f"\n=== Testing with {model} ===")

    improver = PromptImprover(model=model, api_key=os.getenv("GROQ_API_KEY"))

    result = improver.improve(
        test_prompt,
        num_candidates=3,  # Small number for testing
        num_iterations=1,  # Single iteration for testing
        temperature=0.1,
    )

    print("\nOriginal Prompt:")
    print(test_prompt)
    print("\nBest Formatted Prompt:")
    print(result["improved_prompt"])
    print("\nModel Response:")
    print(result["model_response"])
    print(f"\nFormat Score: {result['improvement_score']:.3f}")
    print("\nBest Format:")
    print(json.dumps(result["best_format"], indent=2))

INFO:reformat.improver:Starting iteration 1/1



=== Testing with llama-3.3-70b-versatile ===


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 400 Bad Request"


BadRequestError: Error code: 400 - {'error': {'message': 'The model `llama-3.1-70b-versatile` has been decommissioned and is no longer supported. Please refer to https://console.groq.com/docs/deprecations for a recommendation on which model to use instead.', 'type': 'invalid_request_error', 'code': 'model_decommissioned'}}

In [6]:
improver = PromptImprover()
prompt = test_prompts["simple"]

print("First run (no cache):")
result1 = improver.improve(prompt, num_iterations=1, num_candidates=2)
print(f"Candidates evaluated: {result1['num_candidates_evaluated']}")

print("\nSecond run (should use cache):")
result2 = improver.improve(prompt, num_iterations=1, num_candidates=2)
print(f"Candidates evaluated: {result2['num_candidates_evaluated']}")

INFO:reformat.reformat.improver:Starting iteration 1/1
INFO:reformat.reformat.improver:New best score: 0.587
INFO:reformat.reformat.improver:New best score: 0.723
INFO:reformat.reformat.improver:Starting iteration 1/1
INFO:reformat.reformat.improver:New best score: 0.326
INFO:reformat.reformat.improver:New best score: 0.688


First run (no cache):
Candidates evaluated: 2

Second run (should use cache):
Candidates evaluated: 2


In [7]:
def run_format_analysis(prompt, num_trials=50):
    improver = PromptImprover()
    format_scores = []

    for _ in range(num_trials):
        candidate = improver.sample_candidate()
        formatted = improver.format_prompt(prompt, candidate)
        score = improver.evaluate_format(prompt, formatted)
        format_scores.append({"format": candidate.to_dict(), "score": score})

    # Sort by score
    format_scores.sort(key=lambda x: x["score"], reverse=True)

    print("Top 5 performing formats:")
    for i, result in enumerate(format_scores[:5], 1):
        print(f"\n{i}. Score: {result['score']:.3f}")
        print(json.dumps(result["format"], indent=2))


# Run analysis on complex prompt
print("Analyzing format performance on complex prompt:")
run_format_analysis(test_prompts["complex"], num_trials=20)

Analyzing format performance on complex prompt:
Top 5 performing formats:

1. Score: 0.954
{
  "separator_rule": "Double Colon",
  "casing_rule": "Title",
  "item_formatting_rule": "Brackets",
  "enumeration_rule": "Roman",
  "score": null
}

2. Score: 0.785
{
  "separator_rule": "Dash",
  "casing_rule": "Lower",
  "item_formatting_rule": "Parentheses",
  "enumeration_rule": "Roman",
  "score": null
}

3. Score: 0.739
{
  "separator_rule": "Space",
  "casing_rule": "Title",
  "item_formatting_rule": "Angle Brackets",
  "enumeration_rule": "Numeric",
  "score": null
}

4. Score: 0.725
{
  "separator_rule": "Double Colon",
  "casing_rule": "No Change",
  "item_formatting_rule": "Parentheses",
  "enumeration_rule": "Alpha",
  "score": null
}

5. Score: 0.719
{
  "separator_rule": "Newline",
  "casing_rule": "No Change",
  "item_formatting_rule": "Parentheses",
  "enumeration_rule": "Numeric",
  "score": null
}


In [None]:
def interactive_format_test(prompt):
    """Interactive function to test different format combinations."""
    reformatter = PromptReformatter()

    # Get all available rules
    separator_rules = SeparatorRule.get_default_rules()
    casing_rules = CasingRule.get_default_rules()
    item_rules = ItemFormattingRule.get_default_rules()
    enum_rules = EnumerationRule.get_default_rules()

    # Print available rules
    print("Available Rules:")
    print("\nSeparator Rules:", [r.name for r in separator_rules])
    print("Casing Rules:", [r.name for r in casing_rules])
    print("Item Formatting Rules:", [r.name for r in item_rules])
    print("Enumeration Rules:", [r.name for r in enum_rules])

    # Format with selected rules
    selected_separator = separator_rules[0]  # Change index to test different rules
    selected_casing = casing_rules[0]  # Change index to test different rules

    reformatter = PromptReformatter(
        separator_rules=[selected_separator], casing_rules=[selected_casing]
    )

    print("\nOriginal Prompt:")
    print(prompt)
    print(f"\nFormatted with {selected_separator.name} + {selected_casing.name}:")
    print(reformatter.format(prompt))


# Test interactive formatting
test_prompt = test_prompts["instruction"]
interactive_format_test(test_prompt)