In [1]:
# install and import libraries
import google.generativeai as genai
import json
import time
import random
import re

from google.api_core.exceptions import TooManyRequests
from google.colab import userdata

GOOGLE_API_KEY = userdata.get('api_new')

# configure Gemini
genai.configure(api_key=GOOGLE_API_KEY)

# define models:
# - thinking_model: bigger model for feedback / optimization
# - lite_model: smaller model we want to improve
thinking_model = genai.GenerativeModel('gemini-2.5-pro')
lite_model = genai.GenerativeModel('gemini-2.0-flash-lite')

In [2]:
# simple retry helper for handling 429 (TooManyRequests) errors
def call_with_retry(func, max_retries=5):
    for attempt in range(max_retries):
        try:
            return func()
        except TooManyRequests as e:
            # if API suggests retry_after, use it; otherwise exponential backoff
            retry_after = getattr(e, "retry_after", None)
            if retry_after is None:
                retry_after = 2 ** attempt + random.random()
            print(f"Quota hit, retrying in {retry_after:.2f} seconds...")
            time.sleep(retry_after)
    raise Exception("Max retries exceeded")


# call lite model (small model) and get text output
def generate_with_lite(prompt: str) -> str:
    def _call():
        response = lite_model.generate_content(prompt)
        return getattr(response, "text", str(response))
    return call_with_retry(_call)


# use thinking model to give feedback and a better prompt
def get_feedback_and_optimized_prompt(lite_model_prompt: str,
                                      lite_model_output: str) -> dict:
    """
    Use the thinking_model to:
    - Evaluate the lite model's output
    - Suggest a better prompt
    - Explain the feedback and reasoning

    Returns a dict with:
      - optimized_prompt
      - feedback_text
      - rationale
    """

    optimization_prompt = f"""
You are a prompt optimizer for a small text-generation model ("lite model").

Your job:
- Look at the original prompt and the lite model's output.
- Improve the prompt so the lite model can give a better answer next time.
- Explain what was weak about the answer and why the new prompt should help.

Rules:
- Keep the same basic goal as the original prompt.
- Focus on: clarity, style instructions, format, constraints, etc.
- Be practical and concise.

Return ONLY valid JSON with exactly these keys:
{{
  "optimized_prompt": "...",
  "feedback_text": "...",
  "rationale": "..."
}}

Do NOT include anything before or after the JSON.

ORIGINAL_PROMPT:
{lite_model_prompt}

LITE_MODEL_OUTPUT:
{lite_model_output}
"""

    def _call():
        response = thinking_model.generate_content(optimization_prompt)
        return getattr(response, "text", str(response))

    raw_text = call_with_retry(_call).strip()

    # try to extract JSON from ```json ... ``` block if it exists
    match = re.search(r"```json\s*([\s\S]*?)\s*```", raw_text)
    if match:
        json_str = match.group(1).strip()
    else:
        json_str = raw_text

    try:
        data = json.loads(json_str)
    except json.JSONDecodeError:
        # if parsing fails, keep the original prompt
        print("Could not parse JSON from thinking model response.")
        print("Raw thinking model response:\n", raw_text)
        return {
            "optimized_prompt": lite_model_prompt,
            "feedback_text": (
                "Could not parse JSON from thinking model. "
                "Keeping the original prompt."
            ),
            "rationale": (
                "JSON parsing failed, so the prompt was not changed in this iteration."
            ),
        }

    return {
        "optimized_prompt": data.get("optimized_prompt", lite_model_prompt),
        "feedback_text": data.get("feedback_text", ""),
        "rationale": data.get("rationale", ""),
    }


In [3]:
def run_optimization_loop(initial_prompt: str, num_turns: int = 5):
    """
    Optimization loop:

    For each turn:
      1. Send current prompt to lite model.
      2. Get lite model output.
      3. Ask thinking model to:
         - give feedback,
         - explain reasoning,
         - propose a better prompt.
      4. Use the improved prompt in the next turn.
    """
    current_prompt = initial_prompt

    print("=== START PROMPT OPTIMIZATION (healthy recipes) ===\n")

    for turn in range(1, num_turns + 1):
        print(f"\n--- TURN {turn} ---")

        # show current prompt
        print("\nCurrent prompt for lite model:")
        print("----------------------------------------")
        print(current_prompt)
        print("----------------------------------------")

        # get lite model output
        print("\n[1] Calling lite_model...")
        lite_output = generate_with_lite(current_prompt)
        print("Lite model output:")
        print("----------------------------------------")
        print(lite_output)
        print("----------------------------------------")

        # get feedback + optimized prompt from thinking model
        print("\n[2] Getting feedback and optimized prompt from thinking_model...")
        feedback = get_feedback_and_optimized_prompt(current_prompt, lite_output)

        print("\nFeedback:")
        print("----------------------------------------")
        print(feedback["feedback_text"])
        print("----------------------------------------")

        print("\nRationale:")
        print("----------------------------------------")
        print(feedback["rationale"])
        print("----------------------------------------")

        print("\nNew optimized prompt for next turn:")
        print("----------------------------------------")
        print(feedback["optimized_prompt"])
        print("----------------------------------------")

        # update prompt
        current_prompt = feedback["optimized_prompt"]

    print("\n=== END OF OPTIMIZATION ===")
    print("\nFinal optimized prompt:")
    print("----------------------------------------")
    print(current_prompt)
    print("----------------------------------------")

# prompt
initial_prompt = "Suggest a healthy dinner recipe that is high in protein."

# number of optimization cycles
num_iterations = 5

# run
run_optimization_loop(initial_prompt, num_iterations)

=== START PROMPT OPTIMIZATION (healthy recipes) ===


--- TURN 1 ---

Current prompt for lite model:
----------------------------------------
Suggest a healthy dinner recipe that is high in protein.
----------------------------------------

[1] Calling lite_model...
Lite model output:
----------------------------------------
## Grilled Salmon with Quinoa & Roasted Asparagus

This recipe is packed with protein from the salmon and quinoa, and also provides healthy fats, fiber, and vitamins from the vegetables.

**Yields:** 2 servings
**Prep time:** 15 minutes
**Cook time:** 20 minutes

**Ingredients:**

*   **For the Salmon:**
    *   2 salmon fillets (4-6 oz each), skin on or off, your preference
    *   1 tbsp olive oil
    *   1 tbsp lemon juice
    *   1 clove garlic, minced
    *   Salt and black pepper to taste
    *   Optional: Fresh dill or parsley for garnish
*   **For the Quinoa:**
    *   1 cup quinoa, rinsed
    *   2 cups water or vegetable broth
    *   Salt to taste
*   **



BadRequest: 400 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?%24alt=json%3Benum-encoding%3Dint: API key expired. Please renew the API key.