In [4]:
from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
import google.generativeai as genai
genai.configure(api_key=GOOGLE_API_KEY)

import json
import re
import time
from typing import Dict, Any

from tenacity import (
    retry,
    wait_exponential,
    stop_after_attempt,
    retry_if_exception_type,
)
import google.generativeai.types as genai_types
from google.api_core.exceptions import TooManyRequests


# --- MODEL SETUP (Assuming global availability in this execution environment) ---
# Judge/Thinking Model (Gemini 2.5 Pro for best reasoning)
gemini_model_thinking = genai.GenerativeModel('gemini-2.5-pro')

# Student/Lite Model (Gemini 2.0 Flash-Lite for speed/cost)
gemini_model_lite = genai.GenerativeModel('gemini-2.0-flash-lite')
# ------------------------------------------------------------------------------


# ==============================
# Prompt optimization function
# ==============================

def get_feedback_and_optimized_prompt(
    lite_model_prompt: str,
    lite_model_output: str,
    task_input: str,
    task_expected_output: str
) -> Dict[str, str]:
    """
    Evaluates the lite_model_output against the task and the prompt using
    gemini_model_thinking to generate an optimized prompt, feedback, and
    rationale.
    """

    optimization_instruction = f"""
    You are a prompt optimizer (Gemini 2.5 Pro). Your task is to evaluate a
    `lite_model_output` (from Gemini 2.0 Flash-Lite) against a given task.

    Task: Croatian to English Technical Translation.
    Evaluation Criteria:
    1. Accuracy (Correct translation of meaning).
    2. Terminology Preservation (e.g., 'funkcija' MUST be translated as 'function').
    3. Output Format (ONLY the English translation text should be returned, no extra commentary).

    Based on this evaluation, provide constructive feedback and suggest an
    `optimized_prompt` for the Lite model to produce a better output.

    Your output MUST be in JSON format with the following keys:
    - `optimized_prompt`: The new, improved prompt for the lite model.
    - `feedback_text`: Detailed feedback (1-3 sentences) on the Lite model's output and how the new prompt addresses the issues.
    - `rationale`: Explanation of the reasoning behind the optimization, focusing on why the changes improve the chance of success with a smaller model.
    - `grade`: A simple qualitative assessment: 'Perfect', 'Good', 'Needs Improvement'.

    --- Input for Evaluation ---
    Task Input (Croatian): "{task_input}"
    Task Expected Output (English): "{task_expected_output}"
    Original Prompt: "{lite_model_prompt}"
    Lite Model Output: "{lite_model_output}"

    Please provide your optimized prompt, feedback, and rationale in JSON format.
    """

    # Retry helper functions (omitted for brevity, assume they work as in the original)
    def _extract_retry_after_seconds(exc: Exception) -> float | None:
        message = str(exc)
        match = re.search(r"Please retry in ([0-9.]+)s", message)
        if match:
            try:
                return float(match.group(1))
            except ValueError:
                return None
        return None

    @retry(
        wait=wait_exponential(multiplier=1, min=4, max=10),
        stop=stop_after_attempt(5),
        retry=retry_if_exception_type(
            (genai_types.BlockedPromptException, TooManyRequests)
        ),
        reraise=True,
    )
    def call_gemini_model_thinking_with_retry(instruction: str):
        try:
            return gemini_model_thinking.generate_content(instruction)
        except genai_types.BlockedPromptException as e:
            print(f"Caught BlockedPromptException: {e}. Retrying...", flush=True)
            raise
        except TooManyRequests as e:
            retry_after = _extract_retry_after_seconds(e)
            if retry_after is not None:
                capped_delay = min(retry_after, 60.0)
                time.sleep(capped_delay)
            raise

    response = None
    try:
        response = call_gemini_model_thinking_with_retry(optimization_instruction)
        raw_text = getattr(response, "text", str(response))

        # Extract JSON string from potentially markdown-formatted response
        json_match = re.search(r"```json\n([\s\S]*?)\n```", raw_text)
        json_string = json_match.group(1) if json_match else raw_text

        optimization_results = json.loads(json_string)

        return {
            "optimized_prompt": optimization_results.get("optimized_prompt", lite_model_prompt),
            "feedback_text": optimization_results.get("feedback_text", "Could not parse feedback."),
            "rationale": optimization_results.get("rationale", "Could not parse rationale."),
            "grade": optimization_results.get("grade", "Error"),
        }

    except Exception as e:
        print(f"An error occurred during optimization: {e}", flush=True)
        return {
            "optimized_prompt": lite_model_prompt,
            "feedback_text": f"Optimization failed: {e}",
            "rationale": "Optimization step skipped due to error.",
            "grade": "Error",
        }


# ==============================
# Streaming optimization loop
# (Adapted to include task-specific input)
# ==============================

def run_optimization_loop(
    initial_prompt: str,
    task_input: str,
    task_expected_output: str,
    num_turns: int = 5,
):
    """
    Runs the lite model + thinking model optimization loop for the translation task.
    """
    current_prompt = initial_prompt
    history = []

    for turn in range(1, num_turns + 1):
        print(f"--- Optimization Turn {turn} ---", flush=True)

        # Full prompt includes the current optimized prompt + the task input
        full_lite_prompt = f"{current_prompt}\n\nCroatian Text: \"{task_input}\""

        # 1) PROMPT SENT TO LITE MODEL
        print("Lite Model's Full Input Prompt:", flush=True)
        print("------------------------------------------------", flush=True)
        print(full_lite_prompt, flush=True)
        print("------------------------------------------------", flush=True)

        # 2) LITE MODEL OUTPUT
        lite_response = gemini_model_lite.generate_content(full_lite_prompt)
        lite_output_text = getattr(lite_response, "text", "").strip()

        print("\nLite Model Output:", flush=True)
        print(lite_output_text, flush=True)
        print()  # blank line

        # 3) FEEDBACK + RATIONALE + NEW OPTIMIZED PROMPT (using the thinking model)
        feedback_results = get_feedback_and_optimized_prompt(
            lite_model_prompt=current_prompt, # Only pass the system part of the prompt
            lite_model_output=lite_output_text,
            task_input=task_input,
            task_expected_output=task_expected_output
        )

        feedback_text = feedback_results.get("feedback_text", "")
        rationale = feedback_results.get("rationale", "")
        optimized_prompt = feedback_results.get("optimized_prompt", current_prompt)
        grade = feedback_results.get("grade", "N/A")

        print("--- Feedback from Gemini 2.5 Pro ---", flush=True)
        print(f"Grade: {grade}", flush=True)
        print("Feedback:", flush=True)
        print(feedback_text, flush=True)
        print("Rationale:", flush=True)
        print(rationale, flush=True)
        print(
            "New Optimized Prompt (for next turn):",
            optimized_prompt,
            flush=True,
        )
        print("------------------------------------------------", flush=True)

        # Save history
        history.append({
            "turn": turn,
            "prompt": current_prompt,
            "output": lite_output_text,
            "grade": grade,
            "feedback": feedback_text
        })

        # Update prompt for next turn
        current_prompt = optimized_prompt

    print("\n--- OPTIMIZATION SUMMARY ---", flush=True)
    for h in history:
        print(f"Turn {h['turn']} | Grade: {h['grade']} | Prompt: {h['prompt'][:70]}...", flush=True)

# -------------------------------------------------------------
# EXECUTION SETUP
# -------------------------------------------------------------

# 1. Define task parameters
TASK_INPUT_CROATIAN = "Nadzorna ploča prikazuje trenutno opterećenje sustava i greške u funkcija obrade podataka."
TASK_EXPECTED_ENGLISH = "The dashboard shows the current system load and errors in the data processing function."

# 2. Write initial system prompt for Flash-Lite
INITIAL_PROMPT = (
    "Translate the following Croatian technical sentence into English. "
    "Do not include any commentary, explanations, or prefixes. Just output the translation."
)

# 3. Set iteration count
NUM_ITERATIONS = 5

# 4. Run the optimization loop
run_optimization_loop(
    initial_prompt=INITIAL_PROMPT,
    task_input=TASK_INPUT_CROATIAN,
    task_expected_output=TASK_EXPECTED_ENGLISH,
    num_turns=NUM_ITERATIONS
)

--- Optimization Turn 1 ---
Lite Model's Full Input Prompt:
------------------------------------------------
Translate the following Croatian technical sentence into English. Do not include any commentary, explanations, or prefixes. Just output the translation.

Croatian Text: "Nadzorna ploča prikazuje trenutno opterećenje sustava i greške u funkcija obrade podataka."
------------------------------------------------

Lite Model Output:
The dashboard displays the current system load and errors in the data processing functions.

--- Feedback from Gemini 2.5 Pro ---
Grade: Good
Feedback:
The lite model's translation was accurate in meaning but incorrectly pluralized the key term 'function' to 'functions'. The optimized prompt adds an explicit rule and a one-shot example to guide the model in correctly preserving the grammatical number of specific technical terms. This direct guidance significantly increases the chance of a precise translation.
Rationale:
Lite models benefit from highly ex