# Prompt Optimization


In [None]:
!pip3 install google-generativeai google-api-core --quiet

import google.generativeai as genai
import json
import re
import time
import random
from typing import Dict, Any, Optional
from google.api_core.exceptions import TooManyRequests

API_KEY = "AI..."
genai.configure(api_key=API_KEY)

thinking_model = genai.GenerativeModel('models/gemini-2.5-flash')
lite_model = genai.GenerativeModel('models/gemini-2.0-flash-lite')


You should consider upgrading via the '/Applications/Xcode.app/Contents/Developer/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [44]:
for m in genai.list_models():
    if 'generateContent' in m.supported_generation_methods:
        print(f'{m.name}')


models/gemini-2.5-flash
models/gemini-2.5-pro
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.5-flash-preview-tts
models/gemini-2.5-pro-preview-tts
models/gemma-3-1b-it
models/gemma-3-4b-it
models/gemma-3-12b-it
models/gemma-3-27b-it
models/gemma-3n-e4b-it
models/gemma-3n-e2b-it
models/gemini-flash-latest
models/gemini-flash-lite-latest
models/gemini-pro-latest
models/gemini-2.5-flash-lite
models/gemini-2.5-flash-image-preview
models/gemini-2.5-flash-image
models/gemini-2.5-flash-preview-09-2025
models/gemini-2.5-flash-lite-preview-09-2025
models/gemini-3-pro-preview
models/gemini-3-pro-image-preview
models/nano-banana-pro-preview
models/gemini-robotics-er-1.5-preview
models/gemini-2.5-computer-use-preview-10-2025


In [45]:
def call_with_retry(func, max_retries=5):
    for attempt in range(max_retries):
        try:
            return func()
        except TooManyRequests as e:
            retry_after = getattr(e, "retry_after", None)
            if retry_after is None:
                retry_after = 2 ** attempt + random.random()
            if attempt < max_retries - 1:
                time.sleep(min(retry_after, 60.0))
            else:
                raise
        except Exception:
            raise
    raise Exception("Max retries exceeded")


In [46]:
def generate_with_lite(prompt: str, task_input: Optional[str] = None) -> str:
    if task_input and "{user_input}" in prompt:
        full_prompt = prompt.replace("{user_input}", task_input)
    elif task_input:
        full_prompt = f"{prompt}\n\n{task_input}"
    else:
        full_prompt = prompt
    
    def _call():
        response = lite_model.generate_content(full_prompt)
        return getattr(response, "text", str(response)).strip()
    
    return call_with_retry(_call)


In [47]:
def get_feedback_and_optimized_prompt(
    lite_model_prompt: str,
    lite_model_output: str,
    task_input: Optional[str] = None,
    task_expected_output: Optional[str] = None,
    task_description: str = "text generation"
) -> Dict[str, str]:
    evaluation_context = ""
    if task_input:
        evaluation_context += f"\nTask Input: \"{task_input}\""
    if task_expected_output:
        evaluation_context += f"\nExpected Output: \"{task_expected_output}\""
    
    optimization_prompt = f"""
You are a prompt optimizer for a small text-generation model ("lite model").

Your job:
- Analyze the original prompt and the lite model's output.
- Evaluate the output quality (accuracy, format, completeness, etc.).
- Suggest an improved prompt that will help the lite model produce better results.
- Explain what was weak about the output and why the new prompt should help.

Task: {task_description}
{evaluation_context}

Rules:
- Keep the same basic goal as the original prompt.
- Focus on: clarity, style instructions, format constraints, examples, etc.
- Be practical and concise.
- If the output is already perfect, suggest minor improvements for robustness.

Return ONLY valid JSON with exactly these keys:
{{
  "optimized_prompt": "...",
  "feedback_text": "...",
  "rationale": "...",
  "grade": "Perfect" | "Good" | "Needs Improvement" | "Poor"
}}

Do NOT include anything before or after the JSON. Do NOT use markdown code blocks.

ORIGINAL_PROMPT:
{lite_model_prompt}

LITE_MODEL_OUTPUT:
{lite_model_output}
"""
    
    def _call():
        response = thinking_model.generate_content(optimization_prompt)
        return getattr(response, "text", str(response)).strip()
    
    raw_text = call_with_retry(_call)
    
    json_match = re.search(r"```json\s*([\s\S]*?)\s*```", raw_text)
    if json_match:
        json_str = json_match.group(1).strip()
    else:
        json_match = re.search(r"\{[\s\S]*\}", raw_text)
        if json_match:
            json_str = json_match.group(0)
        else:
            json_str = raw_text
    
    try:
        data = json.loads(json_str)
        return {
            "optimized_prompt": data.get("optimized_prompt", lite_model_prompt),
            "feedback_text": data.get("feedback_text", ""),
            "rationale": data.get("rationale", ""),
            "grade": data.get("grade", "Unknown"),
        }
    except json.JSONDecodeError:
        return {
            "optimized_prompt": lite_model_prompt,
            "feedback_text": "JSON parsing failed",
            "rationale": "Could not parse response",
            "grade": "Error",
        }


In [48]:
def run_optimization_loop(
    initial_prompt: str,
    num_turns: int = 5,
    task_input: Optional[str] = None,
    task_expected_output: Optional[str] = None,
    task_description: str = "text generation"
) -> Dict[str, Any]:
    current_prompt = initial_prompt
    history = []
    
    for turn in range(1, num_turns + 1):
        print(f"Turn {turn}/{num_turns}")
        try:
            lite_output = generate_with_lite(current_prompt, task_input)
            print(f"Output: {lite_output}")
        except Exception as e:
            lite_output = f"Error: {str(e)}"
            print(f"Error: {e}")
        
        try:
            feedback = get_feedback_and_optimized_prompt(
                lite_model_prompt=current_prompt,
                lite_model_output=lite_output,
                task_input=task_input,
                task_expected_output=task_expected_output,
                task_description=task_description
            )
            
            print(f"Grade: {feedback['grade']}")
            print(f"Feedback: {feedback['feedback_text']}")
            
            history.append({
                "turn": turn,
                "prompt": current_prompt,
                "output": lite_output,
                "feedback": feedback["feedback_text"],
                "rationale": feedback["rationale"],
                "grade": feedback["grade"],
                "optimized_prompt": feedback["optimized_prompt"]
            })
            
            current_prompt = feedback["optimized_prompt"]
            
        except Exception as e:
            print(f"Error getting feedback: {e}")
            history.append({
                "turn": turn,
                "prompt": current_prompt,
                "output": lite_output,
                "error": str(e)
            })
            break
    
    print(f"\nFinal prompt: {current_prompt}")
    return {
        "final_prompt": current_prompt,
        "history": history,
        "initial_prompt": initial_prompt
    }


In [49]:
initial_prompt = "Translate the following Croatian technical sentence into English. Do not include any commentary, explanations, or prefixes. Just output the translation."
task_input = "Sustav generira izvještaje o performansama u stvarnom vremenu."
task_expected_output = "The system generates real-time performance reports."

results = run_optimization_loop(
    initial_prompt=initial_prompt,
    num_turns=5,
    task_input=task_input,
    task_expected_output=task_expected_output,
    task_description="Croatian to English Technical Translation"
)


Turn 1/5
Error: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.0-flash-lite
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.0-flash-lite
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.0-flash-lite
Please retry in 23.748492706s. [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputToken

KeyboardInterrupt: 