In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/multilingual-translation/Translation Data Files/part_5.jsonl
/kaggle/input/multilingual-translation/Translation Data Files/part_2.jsonl
/kaggle/input/multilingual-translation/Translation Data Files/part_4.jsonl
/kaggle/input/multilingual-translation/Translation Data Files/part_6.jsonl
/kaggle/input/multilingual-translation/Translation Data Files/part_3.jsonl
/kaggle/input/multilingual-translation/Translation Data Files/part_1.jsonl
/kaggle/input/multilingual-translation/Translation Data Files/api_keys/part_6_api_key.txt
/kaggle/input/multilingual-translation/Translation Data Files/api_keys/part_2_api_key.txt
/kaggle/input/multilingual-translation/Translation Data Files/api_keys/part_1_api_key.txt
/kaggle/input/multilingual-translation/Translation Data Files/api_keys/part_5_api_key.txt
/kaggle/input/multilingual-translation/Translation Data Files/api_keys/part_4_api_key.txt
/kaggle/input/multilingual-translation/Translation Data Files/api_keys/part_3_api_key.txt


In [None]:
# Enhanced Mathematical Translation System using Gemini API
# Translates math problems from English to French, Kazakh, Finnish, Lithuanian, and Turkish
# With improved prompts and Exact Answer translation

import os
import re
import time
import json
import queue
import random
import logging
import threading
import traceback
from tqdm import tqdm
from collections import deque
from datetime import datetime, timezone
from google import genai
from typing import Optional, Dict, Any, List, Tuple

# ---------------------------
# Logging Configuration
# ---------------------------
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("math_translation_system.log"),
        logging.StreamHandler()
    ]
)

# ---------------------------
# Load API Keys
# ---------------------------
with open('/kaggle/input/multilingual-translation/Translation Data Files/api_keys/part_2_api_key.txt','r') as fh:
    keys = fh.read().strip()

if keys:
    os.environ['GEMINI_API_KEYS'] = keys
    logging.info("Loaded API keys successfully")

# ---------------------------
# Gemini API Manager (reusing from original code with modifications)
# ---------------------------
class GeminiTranslationApiManager:
    """
    Manages multiple Gemini API keys with rotation and rate limiting for translation tasks.
    """

    def __init__(self, api_keys, calls_per_day=1000, rate_limit_delay=5):
        if not api_keys:
            raise ValueError("api_keys must contain at least one key")

        self.api_keys = deque(api_keys)
        self.calls_per_day = calls_per_day
        self.rate_limit_delay = rate_limit_delay

        self.usage_count = {key: 0 for key in api_keys}
        self.current_key = self.api_keys[0]
        self.client = genai.Client(api_key=self.current_key)

        self.lock = threading.Lock()
        self.call_queue = queue.Queue()
        self.worker_thread = threading.Thread(target=self._process_queue, name="GeminiWorker")
        self.worker_thread.daemon = True
        self.worker_thread.start()

        logging.info(f"Translation API Manager initialized with {len(api_keys)} keys")

    def _rotate_key(self):
        """Rotate to the next available API key."""
        with self.lock:
            self.api_keys.rotate(1)
            self.current_key = self.api_keys[0]
            self.client = genai.Client(api_key=self.current_key)
            usage = self.usage_count.get(self.current_key, 0)
        logging.info(f"Rotated to new API key (usage: {usage})")

    def _find_available_key(self):
        """Find an API key that hasn't reached the daily limit."""
        with self.lock:
            if self.usage_count.get(self.current_key, 0) < self.calls_per_day:
                return True

        initial_key = self.current_key
        for _ in range(len(self.api_keys)):
            self._rotate_key()
            with self.lock:
                if self.usage_count.get(self.current_key, 0) < self.calls_per_day:
                    return True
            if self.current_key == initial_key:
                return False
        return False

    def _process_queue(self):
        """Process the queue of API calls."""
        while True:
            try:
                args, kwargs, result_queue = self.call_queue.get()

                if not self._find_available_key():
                    err = {"error": "All API keys have reached their daily limit"}
                    result_queue.put(err)
                    self.call_queue.task_done()
                    time.sleep(10)
                    continue

                try:
                    response = self.client.models.generate_content(*args, **kwargs)
                    result_queue.put({"response": response})
                    with self.lock:
                        self.usage_count[self.current_key] += 1

                except Exception as api_exc:
                    msg = str(api_exc).lower()
                    if 'quota' in msg or 'rate limit' in msg:
                        with self.lock:
                            self.usage_count[self.current_key] = self.calls_per_day
                        logging.warning(f"API key reached quota/rate-limit: {api_exc}")
                    result_queue.put({"error": str(api_exc)})

                time.sleep(self.rate_limit_delay)
                self.call_queue.task_done()

            except Exception as e:
                logging.error(f"Queue processing error: {e}\n{traceback.format_exc()}")
                time.sleep(1)

    def generate_content(self, *args, timeout=300, **kwargs):
        """Make an API call to generate content."""
        result_queue = queue.Queue()
        self.call_queue.put((args, kwargs, result_queue))

        try:
            result = result_queue.get(timeout=timeout)
        except queue.Empty:
            raise TimeoutError("Timed out waiting for API response")

        if "error" in result:
            raise Exception(result["error"])
        return result["response"]

    def get_usage_stats(self):
        """Get usage statistics for all keys."""
        with self.lock:
            per_key = dict(self.usage_count)
        total_used = sum(per_key.values())
        total_available = len(self.api_keys) * self.calls_per_day
        return {
            "per_key": per_key,
            "total_used": total_used,
            "total_available": total_available,
            "percent_used": (total_used / total_available) * 100 if total_available > 0 else 0
        }

# ---------------------------
# Enhanced Translation Prompts with More Guidance
# ---------------------------

TRANSLATION_PROMPTS = {
    "question": {
        "French": """You are an expert French mathematics educator with extensive experience in translating academic mathematical content. Your task is to translate the following mathematical question from English to French with the highest professional standards.

TRANSLATION GUIDELINES:
1. MATHEMATICAL ELEMENTS:
   - Preserve ALL mathematical notation exactly: numbers, variables, equations, symbols (∑, ∫, ∂, √, etc.)
   - Keep mathematical expressions in their original LaTeX/ASCII format if present
   - Maintain the exact same mathematical structure and relationships

2. FRENCH MATHEMATICAL CONVENTIONS:
   - Use French decimal notation (comma instead of period): 3,14 instead of 3.14
   - Use proper French mathematical vocabulary:
     • "soit" for "let"
     • "tel que" for "such that"
     • "ensemble" for "set"
     • "fonction" for "function"
     • "équation" for "equation"
     • "résoudre" for "solve"
     • "calculer" for "calculate"
     • "déterminer" for "determine"
     • "montrer que" for "show that"
     • "démontrer" for "prove"

3. FORMATTING AND STRUCTURE:
   - Preserve the exact question structure (multiple choice options, parts a), b), c), etc.)
   - Keep the same level of mathematical formality
   - Maintain any emphasis (bold, italic) through appropriate French equivalents

4. QUALITY CHECKS:
   - Ensure the translation reads naturally in French
   - Verify no mathematical information is lost or altered
   - Confirm the difficulty level remains identical

English Question: {text}

French Translation:""",

        "Kazakh": """You are an expert Kazakh mathematics educator with extensive experience in translating academic mathematical content. Your task is to translate the following mathematical question from English to Kazakh with the highest professional standards.

TRANSLATION GUIDELINES:
1. MATHEMATICAL ELEMENTS:
   - Preserve ALL mathematical notation exactly: numbers, variables, equations, symbols (∑, ∫, ∂, √, etc.)
   - Keep mathematical expressions in their original LaTeX/ASCII format if present
   - Maintain the exact same mathematical structure and relationships

2. KAZAKH MATHEMATICAL CONVENTIONS:
   - Use proper Kazakh mathematical terminology:
     • "болсын" for "let"
     • "функция" for "function"
     • "теңдеу" for "equation"
     • "шешу" for "solve"
     • "есептеу" for "calculate"
     • "анықтау" for "determine"
     • "дәлелдеу" for "prove"
     • "көрсету" for "show"
     • "жиын" for "set"
     • "сан" for "number"

3. FORMATTING AND STRUCTURE:
   - Preserve the exact question structure (multiple choice options, parts a), b), c), etc.)
   - Keep the same level of mathematical formality
   - Use appropriate Kazakh sentence structure while maintaining clarity

4. QUALITY CHECKS:
   - Ensure the translation uses standard academic Kazakh
   - Verify no mathematical information is lost or altered
   - Confirm the difficulty level remains identical

English Question: {text}

Kazakh Translation:""",

        "Finnish": """You are an expert Finnish mathematics educator with extensive experience in translating academic mathematical content. Your task is to translate the following mathematical question from English to Finnish with the highest professional standards.

TRANSLATION GUIDELINES:
1. MATHEMATICAL ELEMENTS:
   - Preserve ALL mathematical notation exactly: numbers, variables, equations, symbols (∑, ∫, ∂, √, etc.)
   - Keep mathematical expressions in their original LaTeX/ASCII format if present
   - Maintain the exact same mathematical structure and relationships

2. FINNISH MATHEMATICAL CONVENTIONS:
   - Use Finnish decimal notation (comma instead of period): 3,14 instead of 3.14
   - Use proper Finnish mathematical vocabulary:
     • "olkoon" for "let"
     • "siten että" for "such that"
     • "joukko" for "set"
     • "funktio" for "function"
     • "yhtälö" for "equation"
     • "ratkaista" for "solve"
     • "laskea" for "calculate"
     • "määrittää" for "determine"
     • "osoittaa" for "show that"
     • "todistaa" for "prove"

3. FORMATTING AND STRUCTURE:
   - Preserve the exact question structure (multiple choice options, parts a), b), c), etc.)
   - Keep the same level of mathematical formality
   - Use appropriate Finnish partitive and genitive cases where needed

4. QUALITY CHECKS:
   - Ensure the translation follows Finnish academic writing standards
   - Verify no mathematical information is lost or altered
   - Confirm the difficulty level remains identical

English Question: {text}

Finnish Translation:""",

        "Lithuanian": """You are an expert Lithuanian mathematics educator with extensive experience in translating academic mathematical content. Your task is to translate the following mathematical question from English to Lithuanian with the highest professional standards.

TRANSLATION GUIDELINES:
1. MATHEMATICAL ELEMENTS:
   - Preserve ALL mathematical notation exactly: numbers, variables, equations, symbols (∑, ∫, ∂, √, etc.)
   - Keep mathematical expressions in their original LaTeX/ASCII format if present
   - Maintain the exact same mathematical structure and relationships

2. LITHUANIAN MATHEMATICAL CONVENTIONS:
   - Use Lithuanian decimal notation (comma instead of period): 3,14 instead of 3.14
   - Use proper Lithuanian mathematical vocabulary:
     • "tegu" or "tegul" for "let"
     • "toks kad" for "such that"
     • "aibė" for "set"
     • "funkcija" for "function"
     • "lygtis" for "equation"
     • "spręsti" for "solve"
     • "apskaičiuoti" for "calculate"
     • "nustatyti" for "determine"
     • "parodyti" for "show that"
     • "įrodyti" for "prove"

3. FORMATTING AND STRUCTURE:
   - Preserve the exact question structure (multiple choice options, parts a), b), c), etc.)
   - Keep the same level of mathematical formality
   - Use appropriate Lithuanian grammatical cases

4. QUALITY CHECKS:
   - Ensure the translation uses standard academic Lithuanian
   - Verify no mathematical information is lost or altered
   - Confirm the difficulty level remains identical

English Question: {text}

Lithuanian Translation:""",

        "Turkish": """You are an expert Turkish mathematics educator with extensive experience in translating academic mathematical content. Your task is to translate the following mathematical question from English to Turkish with the highest professional standards.

TRANSLATION GUIDELINES:
1. MATHEMATICAL ELEMENTS:
   - Preserve ALL mathematical notation exactly: numbers, variables, equations, symbols (∑, ∫, ∂, √, etc.)
   - Keep mathematical expressions in their original LaTeX/ASCII format if present
   - Maintain the exact same mathematical structure and relationships

2. TURKISH MATHEMATICAL CONVENTIONS:
   - Use Turkish decimal notation (comma instead of period): 3,14 instead of 3.14
   - Use proper Turkish mathematical vocabulary:
     • "olsun" for "let"
     • "öyle ki" for "such that"
     • "küme" for "set"
     • "fonksiyon" for "function"
     • "denklem" for "equation"
     • "çözmek" for "solve"
     • "hesaplamak" for "calculate"
     • "belirlemek" for "determine"
     • "göstermek" or "gösteriniz" for "show that"
     • "ispatlamak" or "kanıtlamak" for "prove"

3. FORMATTING AND STRUCTURE:
   - Preserve the exact question structure (multiple choice options, parts a), b), c), etc.)
   - Keep the same level of mathematical formality
   - Use appropriate Turkish suffixes and postpositions

4. QUALITY CHECKS:
   - Ensure the translation follows Turkish academic writing standards
   - Verify no mathematical information is lost or altered
   - Confirm the difficulty level remains identical

English Question: {text}

Turkish Translation:"""
    },

    "solution": {
        "French": """You are an expert French mathematics educator specializing in translating detailed mathematical solutions. Your task is to translate the following mathematical solution from English to French while maintaining complete accuracy and clarity.

TRANSLATION GUIDELINES:
1. MATHEMATICAL CONTENT:
   - Preserve ALL mathematical notation, formulas, and equations EXACTLY as written
   - Keep all numerical calculations unchanged (except decimal notation)
   - Maintain all intermediate steps and working

2. SOLUTION STRUCTURE:
   - Keep the exact same logical flow and step numbering
   - Translate transitional phrases appropriately:
     • "First," → "Premièrement," or "D'abord,"
     • "Next," → "Ensuite,"
     • "Therefore," → "Donc," or "Par conséquent,"
     • "Hence," → "D'où," or "Ainsi,"
     • "We have" → "Nous avons"
     • "It follows that" → "Il s'ensuit que"
     • "By substitution" → "Par substitution"
     • "Simplifying" → "En simplifiant"

3. FRENCH MATHEMATICAL STYLE:
   - Use "on" instead of "nous" for general mathematical statements
   - Use French decimal notation: 3,14 instead of 3.14
   - Properly translate mathematical reasoning phrases:
     • "Q.E.D." → "C.Q.F.D." (Ce qu'il fallait démontrer)
     • "which gives" → "ce qui donne"
     • "let us consider" → "considérons"

4. FORMATTING:
   - Maintain all formatting (bold, italics, underlining)
   - Keep equation numbering and references
   - Preserve any boxed or highlighted final answers

English Solution: {text}

French Translation:""",

        "Kazakh": """You are an expert Kazakh mathematics educator specializing in translating detailed mathematical solutions. Your task is to translate the following mathematical solution from English to Kazakh while maintaining complete accuracy and clarity.

TRANSLATION GUIDELINES:
1. MATHEMATICAL CONTENT:
   - Preserve ALL mathematical notation, formulas, and equations EXACTLY as written
   - Keep all numerical calculations unchanged
   - Maintain all intermediate steps and working

2. SOLUTION STRUCTURE:
   - Keep the exact same logical flow and step numbering
   - Translate transitional phrases appropriately:
     • "First," → "Біріншіден," or "Алдымен,"
     • "Next," → "Келесі,"
     • "Therefore," → "Сондықтан,"
     • "Hence," → "Демек,"
     • "We have" → "Бізде бар"
     • "It follows that" → "Бұдан шығады"
     • "By substitution" → "Ауыстыру арқылы"
     • "Simplifying" → "Ықшамдап"

3. KAZAKH MATHEMATICAL STYLE:
   - Use appropriate Kazakh mathematical language
   - Properly translate mathematical reasoning phrases
   - Maintain formal academic tone

4. FORMATTING:
   - Maintain all formatting (bold, italics, underlining)
   - Keep equation numbering and references
   - Preserve any boxed or highlighted final answers

English Solution: {text}

Kazakh Translation:""",

        "Finnish": """You are an expert Finnish mathematics educator specializing in translating detailed mathematical solutions. Your task is to translate the following mathematical solution from English to Finnish while maintaining complete accuracy and clarity.

TRANSLATION GUIDELINES:
1. MATHEMATICAL CONTENT:
   - Preserve ALL mathematical notation, formulas, and equations EXACTLY as written
   - Keep all numerical calculations unchanged (except decimal notation)
   - Maintain all intermediate steps and working

2. SOLUTION STRUCTURE:
   - Keep the exact same logical flow and step numbering
   - Translate transitional phrases appropriately:
     • "First," → "Ensiksi," or "Aluksi,"
     • "Next," → "Seuraavaksi,"
     • "Therefore," → "Siksi," or "Näin ollen,"
     • "Hence," → "Täten,"
     • "We have" → "Meillä on"
     • "It follows that" → "Tästä seuraa"
     • "By substitution" → "Sijoittamalla"
     • "Simplifying" → "Sieventämällä"

3. FINNISH MATHEMATICAL STYLE:
   - Use Finnish decimal notation: 3,14 instead of 3.14
   - Use passive voice where appropriate in Finnish
   - Properly translate mathematical reasoning phrases

4. FORMATTING:
   - Maintain all formatting (bold, italics, underlining)
   - Keep equation numbering and references
   - Preserve any boxed or highlighted final answers

English Solution: {text}

Finnish Translation:""",

        "Lithuanian": """You are an expert Lithuanian mathematics educator specializing in translating detailed mathematical solutions. Your task is to translate the following mathematical solution from English to Lithuanian while maintaining complete accuracy and clarity.

TRANSLATION GUIDELINES:
1. MATHEMATICAL CONTENT:
   - Preserve ALL mathematical notation, formulas, and equations EXACTLY as written
   - Keep all numerical calculations unchanged (except decimal notation)
   - Maintain all intermediate steps and working

2. SOLUTION STRUCTURE:
   - Keep the exact same logical flow and step numbering
   - Translate transitional phrases appropriately:
     • "First," → "Pirma," or "Pirmiausia,"
     • "Next," → "Toliau,"
     • "Therefore," → "Todėl,"
     • "Hence," → "Taigi,"
     • "We have" → "Turime"
     • "It follows that" → "Iš to seka"
     • "By substitution" → "Pakeitę"
     • "Simplifying" → "Suprastinę"

3. LITHUANIAN MATHEMATICAL STYLE:
   - Use Lithuanian decimal notation: 3,14 instead of 3.14
   - Use appropriate Lithuanian grammatical cases
   - Properly translate mathematical reasoning phrases

4. FORMATTING:
   - Maintain all formatting (bold, italics, underlining)
   - Keep equation numbering and references
   - Preserve any boxed or highlighted final answers

English Solution: {text}

Lithuanian Translation:""",

        "Turkish": """You are an expert Turkish mathematics educator specializing in translating detailed mathematical solutions. Your task is to translate the following mathematical solution from English to Turkish while maintaining complete accuracy and clarity.

TRANSLATION GUIDELINES:
1. MATHEMATICAL CONTENT:
   - Preserve ALL mathematical notation, formulas, and equations EXACTLY as written
   - Keep all numerical calculations unchanged (except decimal notation)
   - Maintain all intermediate steps and working

2. SOLUTION STRUCTURE:
   - Keep the exact same logical flow and step numbering
   - Translate transitional phrases appropriately:
     • "First," → "İlk olarak," or "Öncelikle,"
     • "Next," → "Sonra,"
     • "Therefore," → "Bu nedenle," or "Dolayısıyla,"
     • "Hence," → "Böylece,"
     • "We have" → "Elimizde"
     • "It follows that" → "Buradan çıkar ki"
     • "By substitution" → "Yerine koyarak"
     • "Simplifying" → "Sadeleştirerek"

3. TURKISH MATHEMATICAL STYLE:
   - Use Turkish decimal notation: 3,14 instead of 3.14
   - Use appropriate Turkish suffixes and verb forms
   - Properly translate mathematical reasoning phrases

4. FORMATTING:
   - Maintain all formatting (bold, italics, underlining)
   - Keep equation numbering and references
   - Preserve any boxed or highlighted final answers

English Solution: {text}

Turkish Translation:"""
    },

    "exact_answer": {
        "French": """Translate the following mathematical answer to French. This is typically a number, expression, or short answer.

RULES:
- If it's a pure number or mathematical expression, keep it unchanged
- If it contains units, translate the units (e.g., "meters" → "mètres")
- If it contains words like "Yes/No", "True/False", translate them
- Use comma for decimals: 3.14 → 3,14
- Translate common mathematical terms if present

English Answer: {text}

French Answer:""",

        "Kazakh": """Translate the following mathematical answer to Kazakh. This is typically a number, expression, or short answer.

RULES:
- If it's a pure number or mathematical expression, keep it unchanged
- If it contains units, translate the units to Kazakh
- If it contains words like "Yes/No", "True/False", translate them ("Иә/Жоқ", "Дұрыс/Бұрыс")
- Translate common mathematical terms if present

English Answer: {text}

Kazakh Answer:""",

        "Finnish": """Translate the following mathematical answer to Finnish. This is typically a number, expression, or short answer.

RULES:
- If it's a pure number or mathematical expression, keep it unchanged
- If it contains units, translate the units (e.g., "meters" → "metriä")
- If it contains words like "Yes/No", "True/False", translate them ("Kyllä/Ei", "Tosi/Epätosi")
- Use comma for decimals: 3.14 → 3,14
- Translate common mathematical terms if present

English Answer: {text}

Finnish Answer:""",

        "Lithuanian": """Translate the following mathematical answer to Lithuanian. This is typically a number, expression, or short answer.

RULES:
- If it's a pure number or mathematical expression, keep it unchanged
- If it contains units, translate the units (e.g., "meters" → "metrai")
- If it contains words like "Yes/No", "True/False", translate them ("Taip/Ne", "Tiesa/Netiesa")
- Use comma for decimals: 3.14 → 3,14
- Translate common mathematical terms if present

English Answer: {text}

Lithuanian Answer:""",

        "Turkish": """Translate the following mathematical answer to Turkish. This is typically a number, expression, or short answer.

RULES:
- If it's a pure number or mathematical expression, keep it unchanged
- If it contains units, translate the units (e.g., "meters" → "metre")
- If it contains words like "Yes/No", "True/False", translate them ("Evet/Hayır", "Doğru/Yanlış")
- Use comma for decimals: 3.14 → 3,14
- Translate common mathematical terms if present

English Answer: {text}

Turkish Answer:"""
    }
}

# ---------------------------
# Enhanced Verification Prompts
# ---------------------------

VERIFICATION_PROMPTS = {
    "question": """You are an expert bilingual mathematics educator fluent in both English and {language}.

Review this translation for accuracy and proficiency:

Original English: {original}

{language} Translation: {translation}

EVALUATION CRITERIA:
1. Mathematical Accuracy [Critical]:
   - Are ALL numbers, variables, and symbols preserved exactly?
   - Are mathematical relationships maintained?
   - Is the problem's difficulty unchanged?

2. Terminology [Important]:
   - Is standard {language} mathematical vocabulary used?
   - Are technical terms correctly translated?

3. Clarity [Important]:
   - Is the translation as clear as the original?
   - Does it flow naturally in {language}?

4. Completeness [Critical]:
   - Is ALL information from the original present?
   - Are there any additions or omissions?

5. Conventions [Important]:
   - Does it follow {language} mathematical writing conventions?
   - Is decimal notation appropriate for {language}?

RESPONSE INSTRUCTIONS:
- If the translation is PERFECT in all aspects, respond with ONLY: "APPROVED"
- If ANY corrections are needed, provide ONLY the complete corrected translation without any explanation

Your response:""",

    "solution": """You are an expert bilingual mathematics educator fluent in both English and {language}.

Review this solution translation for accuracy and proficiency:

Original English: {original}

{language} Translation: {translation}

EVALUATION CRITERIA:
1. Mathematical Accuracy [Critical]:
   - Are ALL equations and calculations preserved exactly?
   - Are all steps in the correct order?
   - Are numerical results unchanged (except decimal notation)?

2. Logical Flow [Critical]:
   - Is the reasoning sequence maintained?
   - Are all cause-effect relationships preserved?
   - Are transitions properly translated?

3. Terminology [Important]:
   - Is standard {language} mathematical vocabulary used?
   - Are proof/solution phrases correctly translated?

4. Completeness [Critical]:
   - Are ALL steps from the original present?
   - Is the final answer clearly indicated?
   - Are all intermediate results included?

5. Style [Important]:
   - Does it follow {language} mathematical solution conventions?
   - Is the formal tone appropriate?

RESPONSE INSTRUCTIONS:
- If the translation is PERFECT in all aspects, respond with ONLY: "APPROVED"
- If ANY corrections are needed, provide ONLY the complete corrected translation without any explanation

Your response:"""
}

# ---------------------------
# Translation Functions
# ---------------------------

def translate_text(api_manager: GeminiTranslationApiManager,
                  text: str,
                  text_type: str,
                  language: str,
                  max_retries: int = 3) -> Tuple[bool, str]:
    """
    Translate text to the specified language.

    Returns: (success, translated_text)
    """
    if text_type not in TRANSLATION_PROMPTS:
        return False, f"Unknown text type: {text_type}"

    if language not in TRANSLATION_PROMPTS[text_type]:
        return False, f"Unknown language: {language}"

    prompt = TRANSLATION_PROMPTS[text_type][language].format(text=text)

    for attempt in range(max_retries):
        try:
            response = api_manager.generate_content(
                model="gemini-2.5-flash-lite",
                contents=prompt,
                timeout=300
            )

            # Extract response text
            response_text = ""
            if hasattr(response, 'text'):
                response_text = response.text
            elif isinstance(response, dict) and 'text' in response:
                response_text = response['text']
            else:
                response_text = str(response)

            response_text = response_text.strip()

            if response_text:
                logging.info(f"Translation successful for {language} {text_type} (attempt {attempt + 1})")
                return True, response_text

            if attempt < max_retries - 1:
                time.sleep(2 + attempt)

        except Exception as e:
            logging.error(f"Translation error for {language} {text_type}: {e}")
            if attempt < max_retries - 1:
                time.sleep(5 + attempt * 2)

    return False, f"Translation failed after {max_retries} attempts"

def verify_translation(api_manager: GeminiTranslationApiManager,
                      original: str,
                      translation: str,
                      text_type: str,
                      language: str,
                      max_retries: int = 2) -> Tuple[bool, str]:
    """
    Verify and potentially correct a translation.

    Returns: (is_approved, final_translation)
    """
    prompt = VERIFICATION_PROMPTS[text_type].format(
        language=language,
        original=original,
        translation=translation
    )

    for attempt in range(max_retries):
        try:
            response = api_manager.generate_content(
                model="gemini-2.5-flash-lite",
                contents=prompt,
                timeout=300
            )

            # Extract response text
            response_text = ""
            if hasattr(response, 'text'):
                response_text = response.text
            elif isinstance(response, dict) and 'text' in response:
                response_text = response['text']
            else:
                response_text = str(response)

            response_text = response_text.strip()

            if response_text:
                if "APPROVED" in response_text:
                    logging.info(f"Translation approved for {language} {text_type}")
                    return True, translation
                else:
                    # Use the corrected version
                    logging.info(f"Translation corrected for {language} {text_type}")
                    return True, response_text

            if attempt < max_retries - 1:
                time.sleep(2 + attempt)

        except Exception as e:
            logging.error(f"Verification error for {language} {text_type}: {e}")
            if attempt < max_retries - 1:
                time.sleep(3 + attempt * 2)

    # If verification fails, return the original translation
    logging.warning(f"Verification failed for {language} {text_type}, using original translation")
    return False, translation

def process_single_item(api_manager: GeminiTranslationApiManager,
                       item: Dict[str, Any],
                       languages: List[str]) -> Dict[str, Any]:
    """
    Process a single JSONL item, translating question, solution, and exact answer to all languages.
    """
    # Extract original fields
    question = item.get('Question', '') or item.get('question', '')
    solution = item.get('Solution', '') or item.get('solution', '')
    exact_answer = item.get('Exact Answer', '') or item.get('ExactAnswer', '') or item.get('exact_answer', '')

    if not question or not solution:
        logging.warning("Missing question or solution in item")
        return item

    # Process each language
    for language in languages:
        logging.info(f"Processing {language} translations...")

        # Translate question
        success_q, translated_q = translate_text(api_manager, question, "question", language)
        if success_q:
            # Verify question translation
            approved_q, final_q = verify_translation(api_manager, question, translated_q, "question", language)
            item[f'{language} Question'] = final_q
        else:
            item[f'{language} Question'] = f"[Translation failed: {translated_q}]"

        # Small delay between API calls
        time.sleep(1)

        # Translate solution
        success_s, translated_s = translate_text(api_manager, solution, "solution", language)
        if success_s:
            # Verify solution translation
            approved_s, final_s = verify_translation(api_manager, solution, translated_s, "solution", language)
            item[f'{language} Solution'] = final_s
        else:
            item[f'{language} Solution'] = f"[Translation failed: {translated_s}]"

        # Small delay between API calls
        time.sleep(1)

        # Translate exact answer (no verification needed)
        if exact_answer:
            success_a, translated_a = translate_text(api_manager, exact_answer, "exact_answer", language)
            if success_a:
                item[f'{language} Exact Answer'] = translated_a
                logging.info(f"Exact answer translated for {language}: {exact_answer} -> {translated_a}")
            else:
                item[f'{language} Exact Answer'] = exact_answer  # Keep original if translation fails
                logging.warning(f"Exact answer translation failed for {language}, keeping original")
        else:
            item[f'{language} Exact Answer'] = ""

        # Add metadata
        item[f'{language}_translation_metadata'] = {
            'question_verified': success_q and approved_q if success_q else False,
            'solution_verified': success_s and approved_s if success_s else False,
            'exact_answer_translated': success_a if exact_answer else None,
            'timestamp': datetime.now(timezone.utc).isoformat()
        }

        # Small delay between languages
        time.sleep(1)

    return item

def process_jsonl_file(api_manager: GeminiTranslationApiManager,
                      input_file_path: str,
                      output_file_path: str,
                      languages: List[str] = None):
    """
    Process entire JSONL file for translations.
    """
    if languages is None:
        languages = ["French", "Kazakh", "Finnish", "Lithuanian", "Turkish"]

    logging.info(f"Starting translation process for: {input_file_path}")
    logging.info(f"Target languages: {', '.join(languages)}")

    # Count total objects
    total_objects = 0
    with open(input_file_path, 'r', encoding='utf-8') as f:
        for _ in f:
            total_objects += 1

    logging.info(f"Found {total_objects} objects to process")

    # Processing statistics
    processed_count = 0
    successful_translations = {
        lang: {'questions': 0, 'solutions': 0, 'exact_answers': 0} 
        for lang in languages
    }

    with open(input_file_path, 'r', encoding='utf-8') as input_file, \
         open(output_file_path, 'w', encoding='utf-8') as output_file:

        for line_num, line in enumerate(tqdm(input_file, total=total_objects, desc="Translating Math Problems"), 1):
            try:
                obj = json.loads(line.strip())

                # Process translations
                obj = process_single_item(api_manager, obj, languages)

                # Update statistics
                for lang in languages:
                    if f'{lang} Question' in obj and not obj[f'{lang} Question'].startswith('[Translation failed'):
                        successful_translations[lang]['questions'] += 1
                    if f'{lang} Solution' in obj and not obj[f'{lang} Solution'].startswith('[Translation failed'):
                        successful_translations[lang]['solutions'] += 1
                    if f'{lang} Exact Answer' in obj and obj[f'{lang} Exact Answer']:
                        successful_translations[lang]['exact_answers'] += 1

                # Write result
                output_file.write(json.dumps(obj, ensure_ascii=False) + '\n')
                processed_count += 1

                # Progress logging
                if processed_count % 5 == 0:
                    stats = api_manager.get_usage_stats()
                    logging.info(f"Progress: {processed_count}/{total_objects}")
                    logging.info(f"API Usage: {stats['total_used']}/{stats['total_available']} ({stats['percent_used']:.1f}%)")

                    # Log translation success rates
                    for lang in languages:
                        q_rate = (successful_translations[lang]['questions'] / processed_count) * 100
                        s_rate = (successful_translations[lang]['solutions'] / processed_count) * 100
                        a_rate = (successful_translations[lang]['exact_answers'] / processed_count) * 100
                        logging.info(f"{lang}: Questions {q_rate:.1f}%, Solutions {s_rate:.1f}%, Answers {a_rate:.1f}%")

            except Exception as e:
                logging.error(f"Line {line_num}: Processing error: {e}")
                traceback.print_exc()
                # Write original object if processing fails
                try:
                    output_file.write(line)
                except:
                    pass
                continue

    # Final statistics
    print("\n" + "="*60)
    print("TRANSLATION PROCESS COMPLETED")
    print("="*60)
    print(f"Total processed: {processed_count}")
    print("\nTranslation Success Rates:")
    for lang in languages:
        q_rate = (successful_translations[lang]['questions'] / processed_count) * 100 if processed_count > 0 else 0
        s_rate = (successful_translations[lang]['solutions'] / processed_count) * 100 if processed_count > 0 else 0
        a_rate = (successful_translations[lang]['exact_answers'] / processed_count) * 100 if processed_count > 0 else 0
        print(f"{lang}:")
        print(f"  Questions: {successful_translations[lang]['questions']}/{processed_count} ({q_rate:.1f}%)")
        print(f"  Solutions: {successful_translations[lang]['solutions']}/{processed_count} ({s_rate:.1f}%)")
        print(f"  Exact Answers: {successful_translations[lang]['exact_answers']}/{processed_count} ({a_rate:.1f}%)")
    print(f"\nOutput saved to: {output_file_path}")

# ---------------------------
# Enhanced Analysis Functions
# ---------------------------

def analyze_translations(jsonl_file_path: str, languages: List[str] = None):
    """
    Analyze the translation results including exact answers.
    """
    if languages is None:
        languages = ["French", "Kazakh", "Finnish", "Lithuanian", "Turkish"]

    print(f"\nAnalyzing translations from: {jsonl_file_path}")

    if not os.path.exists(jsonl_file_path):
        print(f"File not found: {jsonl_file_path}")
        return

    stats = {
        lang: {'questions': 0, 'solutions': 0, 'exact_answers': 0, 'verified': 0} 
        for lang in languages
    }
    total_items = 0
    sample_translations = {lang: [] for lang in languages}

    with open(jsonl_file_path, 'r', encoding='utf-8') as f:
        for line in f:
            try:
                obj = json.loads(line.strip())
                total_items += 1

                for lang in languages:
                    # Check question
                    q_key = f'{lang} Question'
                    if q_key in obj and not str(obj[q_key]).startswith('[Translation failed'):
                        stats[lang]['questions'] += 1

                        # Collect sample for display
                        if len(sample_translations[lang]) < 2:
                            sample_translations[lang].append({
                                'original_q': obj.get('Question', obj.get('question', ''))[:100],
                                'translated_q': obj[q_key][:100],
                                'original_a': obj.get('Exact Answer', obj.get('ExactAnswer', obj.get('exact_answer', ''))),
                                'translated_a': obj.get(f'{lang} Exact Answer', '')
                            })

                    # Check solution
                    s_key = f'{lang} Solution'
                    if s_key in obj and not str(obj[s_key]).startswith('[Translation failed'):
                        stats[lang]['solutions'] += 1

                    # Check exact answer
                    a_key = f'{lang} Exact Answer'
                    if a_key in obj and obj[a_key]:
                        stats[lang]['exact_answers'] += 1

                    # Check verification status
                    meta_key = f'{lang}_translation_metadata'
                    if meta_key in obj:
                        metadata = obj[meta_key]
                        if metadata.get('question_verified') and metadata.get('solution_verified'):
                            stats[lang]['verified'] += 1

            except Exception as e:
                print(f"Error analyzing line: {e}")
                continue

    print("\n" + "="*60)
    print("TRANSLATION ANALYSIS RESULTS")
    print("="*60)
    print(f"Total items: {total_items}")

    for lang in languages:
        print(f"\n{lang}:")
        q_rate = (stats[lang]['questions'] / total_items * 100) if total_items > 0 else 0
        s_rate = (stats[lang]['solutions'] / total_items * 100) if total_items > 0 else 0
        a_rate = (stats[lang]['exact_answers'] / total_items * 100) if total_items > 0 else 0
        v_rate = (stats[lang]['verified'] / total_items * 100) if total_items > 0 else 0

        print(f"  Successful question translations: {stats[lang]['questions']}/{total_items} ({q_rate:.1f}%)")
        print(f"  Successful solution translations: {stats[lang]['solutions']}/{total_items} ({s_rate:.1f}%)")
        print(f"  Successful exact answer translations: {stats[lang]['exact_answers']}/{total_items} ({a_rate:.1f}%)")
        print(f"  Fully verified translations: {stats[lang]['verified']}/{total_items} ({v_rate:.1f}%)")

        # Show sample translations
        if sample_translations[lang]:
            print(f"\n  Sample translations:")
            for i, sample in enumerate(sample_translations[lang][:1], 1):
                print(f"    Example {i}:")
                print(f"      Question (EN): {sample['original_q']}...")
                print(f"      Question ({lang}): {sample['translated_q']}...")
                if sample['original_a']:
                    print(f"      Answer (EN): {sample['original_a']}")
                    print(f"      Answer ({lang}): {sample['translated_a']}")

# ---------------------------
# Main Function
# ---------------------------

def main():
    """Main function to execute the enhanced translation process."""

    # Initialize API keys
    api_keys = None

    if 'keys' in globals() and keys:
        api_keys = [k.strip() for k in keys.split(',') if k.strip()]

    if not api_keys:
        env_val = os.environ.get('GEMINI_API_KEYS', '').strip()
        if env_val:
            api_keys = [k.strip() for k in env_val.split(',') if k.strip()]

    if not api_keys:
        err_msg = "ERROR: No API keys found. Please set up single_key.txt or GEMINI_API_KEYS environment variable."
        print(err_msg)
        raise SystemExit(err_msg)

    print(f"Initialized with {len(api_keys)} API keys")

    # Initialize API manager with adjusted rate limiting for translation tasks
    api_manager = GeminiTranslationApiManager(
        api_keys=api_keys,
        calls_per_day=1000,
        rate_limit_delay=5  # Shorter delay since translations are simpler than perturbations
    )

    # File paths
    input_jsonl_path = "/kaggle/input/multilingual-translation/Translation Data Files/part_2.jsonl"
    output_jsonl_path = "/kaggle/working/Multilingual_Translations_Part_2.jsonl"

    # Languages to translate to
    target_languages = ["French", "Kazakh", "Finnish", "Lithuanian", "Turkish"]

    if not os.path.exists(input_jsonl_path):
        print(f"Input file not found: {input_jsonl_path}")
        return

    print("\nStarting Enhanced Mathematical Translation System")
    print("="*60)
    print("Target Languages:")
    for lang in target_languages:
        print(f"  • {lang}")
    print("\nProcess Overview:")
    print("  1. Translate question and solution with enhanced prompts")
    print("  2. Verify each translation with LLM-as-judge")
    print("  3. Translate exact answers (no verification needed)")
    print("  4. Apply corrections if needed")
    print("  5. Save all translations with metadata")
    print("\nEnhancements:")
    print("  • More detailed and guided prompts for better quality")
    print("  • Language-specific mathematical conventions")
    print("  • Exact answer translation support")
    print("  • Comprehensive translation metadata")
    print("="*60)

    try:
        # Process the file
        process_jsonl_file(api_manager, input_jsonl_path, output_jsonl_path, target_languages)

        # Analyze results
        if os.path.exists(output_jsonl_path):
            analyze_translations(output_jsonl_path, target_languages)

        # Final API usage summary
        final_stats = api_manager.get_usage_stats()
        print("\n" + "="*60)
        print("FINAL API USAGE SUMMARY")
        print("="*60)
        print(f"Total API calls made: {final_stats['total_used']}")
        print(f"Total API calls available: {final_stats['total_available']}")
        print(f"Utilization: {final_stats['percent_used']:.1f}%")

    except Exception as e:
        print(f"Error during translation process: {e}")
        logging.error(f"Error during translation process: {e}")
        traceback.print_exc()

if __name__ == "__main__":
    main()