# **Bangla**

In [None]:
import json
import os
import re
from datetime import datetime
from tqdm import tqdm

# Ollama client
try:
    import ollama
except Exception as e:
    raise ImportError("The 'ollama' package is required. Install it and make sure the Ollama daemon is running.") from e

# -------------------------
# Answer type normalization
# -------------------------
# Canonical set
_CANONICAL_TYPES = {"symbolic", "numerical", "proof"}

# mapping common noisy labels to canonical (English + Bengali variants)
_ANSWER_TYPE_MAP_SIMPLE = {
    # Proof variants (English -> Bengali)
    "proof": "proof", "prove": "proof",
    "প্রমাণ": "proof", "প্রমাণ করুন": "proof", "প্রমাণ করা": "proof", "সিদ্ধ": "proof", "প্রমাণিত": "proof",

    # Numerical variants
    "numerical": "numerical", "numeric": "numerical", "number": "numerical", "calculation": "numerical",
    "সংখ্যাগত": "numerical", "সংখ্যা": "numerical", "গণনা": "numerical", "হিসাব": "numerical", "মান": "numerical", "মানটিপান": "numerical",

    # Symbolic variants
    "symbolic": "symbolic", "symbol": "symbolic", "equation": "symbolic", "algebraic": "symbolic",
    "চিহ্নাত্মক": "symbolic", "প্রতীকী": "symbolic", "সমীকরণ": "symbolic", "বীজগণিত": "symbolic",
}


def normalize_answer_type(raw_label: str, question_text: str = "", exact_answer: str = "") -> str:
    """
    Normalize a dataset label to one of: 'symbolic', 'numerical', 'proof'.
    Heuristics expanded to handle Bengali (Bangla) phrases and labels (plus English).
    """
    # Helper to clean label
    def _clean_label(lbl: str) -> str:
        if not lbl:
            return ""
        s = lbl.strip().lower()
        # allow Bengali Unicode block and latin letters/digits
        s = re.sub(r'[^0-9\w\s\u0980-\u09FF\-]', ' ', s, flags=re.UNICODE)
        s = re.sub(r'\s+', ' ', s).strip()
        return s

    s = _clean_label(raw_label)

    # direct mapping
    if s in _ANSWER_TYPE_MAP_SIMPLE:
        return _ANSWER_TYPE_MAP_SIMPLE[s]

    # partial matches (allow English or Bengali keywords appearing)
    for k, v in _ANSWER_TYPE_MAP_SIMPLE.items():
        if k in s:
            return v

    # heuristics using question or exact_answer (both English & Bengali checks)
    q = (question_text or "").lower()
    a = (exact_answer or "").lower()

    # Proof indicators (English + Bengali)
    if re.search(r'\b(prove|show that|prove that|proof)\b', q) or re.search(r'\b(প্রমাণ|সিদ্ধ|প্রমাণ করুন|প্রমাণ করা|দেখান)\b', q, flags=re.IGNORECASE):
        return "proof"

    # logarithm related: English/Bengali
    if re.search(r'\b(log|ln|logarithm)\b', q) or re.search(r'\b(log|ln|লঘুগুণ|লগ|লগারিথম|লগারিথম)\b', q) or re.search(r'\blog\b', a):
        # prefer numerical if exact answer contains digits
        if re.search(r'\d', a):
            return "numerical"
        return "symbolic"

    # set theory indicators: English / Bengali ('set' -> 'সেট' or 'জোট' sometimes)
    if re.search(r'\b(set|subset|union|intersection)\b', q) or re.search(r'\b(সেট|উপসেট|একীকরণ|ছেদ|ইন্টারসেকশন|ইউনিয়ন)\b', q, flags=re.IGNORECASE):
        return "symbolic"

    # units (English + Bengali)
    if re.search(r'\bmeter\b|\bm\b|\bcm\b|\bkg\b|\bliter\b|\bl\b|\bkm\b|\bmile\b', q + " " + a) or \
       re.search(r'\bমিটার\b|\bমি\b|\bসেমি\b|\bকেজি\b|\blic\b|\bলিটার\b|\bকিমি\b', q + " " + a, flags=re.IGNORECASE):
        return "numerical"

    # equation-solving heuristics (English + Bengali)
    if re.search(r'\bequation\b|solve for|solve|= x|x\s*=', q) or re.search(r'\b(সমীকরণ|সমাধান|সমাধান করুন|হল|খুঁজুন)\b', q, flags=re.IGNORECASE):
        return "symbolic"

    # trig / geometry indicators (English + Bengali)
    if re.search(r'\b(trig|sin|cos|tan|geometry|triangle|circle)\b', q) or re.search(r'\b(ট্রিগ|সাইন|কসাইন|ট্যান|জ্যামিতি|ত্রিভুজ|বৃত্ত|চক্র)\b', q, flags=re.IGNORECASE):
        if re.search(r'\d', a):
            return "numerical"
        return "symbolic"

    # numeric indicators (English + Bengali) -> numerical
    if re.search(r'\d', q) or re.search(r'find the value|compute|calculate|evaluate', q) or \
       re.search(r'মান বের|মানটি|মানটি বের|গণনা করুন|হিসাব করুন|পাওয়া', q, flags=re.IGNORECASE):
        return "numerical"

    # If exact_answer looks numeric, prefer numerical (but check for LaTeX)
    if re.search(r'[0-9]|\\frac|\\sqrt', a):
        if re.search(r'\\frac|\\sqrt|\{|\\', a):
            return "symbolic"
        return "numerical"

    # fallback
    return "symbolic"


# -------------------------
# EnhancedAnswerExtractor (adapted to behave like SimplifiedAnswerExtractor)
# and now handles Bengali markers as well
# -------------------------
class EnhancedAnswerExtractor:
    """
    Adapter that implements the simplified extraction behavior,
    extended to recognize Bengali (Bangla) formatting/conclusion words as well.
    Provides:
      - extract_final_answer(text) -> str
      - extract_all_final_answers(generated_solution) -> list
    and internal helpers _clean_answer and _is_valid_answer.
    """

    @staticmethod
    def _clean_answer(answer: str) -> str:
        if not answer:
            return ""
        # Start with a trimmed answer and normalize whitespace
        a = answer.strip()
        # collapse whitespace
        a = re.sub(r'\s+', ' ', a)

        # remove outer $$ if present (multiline)
        a = re.sub(r'^\$\$(.*)\$\$$', r'\1', a, flags=re.DOTALL)
        # remove surrounding single $ if the whole string is wrapped
        a = re.sub(r'^\$(.*)\$$', r'\1', a, flags=re.DOTALL)

        # strip standalone leading/trailing $ characters and spaces
        a = a.strip('$ ')

        # Remove common prefixes (kept after stripping $ to catch cases like "$Final Answer: ...$")
        prefixes_to_remove = [
            # English
            r'Final Answer:\s*', r'Answer:\s*', r'The answer is\s*',
            r'Therefore,?\s*', r'Thus,?\s*', r'Hence,?\s*', r'So,?\s*', r'∴\s*',

            # Bengali
            r'উত্তর[:\s]*', r'ফল[:\s]*', r'চূড়ান্ত উত্তর[:\s]*', r'চূড়ান্ত[:\s]*',
            r'উত্তরটি[:]?\s*', r'তাহলে[:\s]*', r'অতএব[,]?\s*', r'সুতরাং[,]?\s*', r'তাই[,]?\s*', r'উপসংহার[:\s]*'
        ]
        for prefix in prefixes_to_remove:
            a = re.sub(f'^{prefix}', '', a, flags=re.IGNORECASE)

        # remove various boxed wrappers with optional backslashes and optional surrounding $
        # e.g. $$\boxed{...}$$, $\boxed{...}$, \boxed{...}
        a = re.sub(r'\$?\s*(?:\\){0,3}boxed\{([^}]*)\}\s*\$?', r'\1', a, flags=re.DOTALL | re.IGNORECASE)
        # also ensure plain \boxed{...} is unwrapped (redundant but safe)
        a = re.sub(r'\\boxed\{([^}]*)\}', r'\1', a)

        # convert common LaTeX to readable forms
        a = re.sub(r'\\frac\{([^}]*)\}\{([^}]*)\}', r'(\1)/(\2)', a)
        a = re.sub(r'\\sqrt\{([^}]*)\}', r'√(\1)', a)

        # remove bold/italic wrappers
        a = re.sub(r'\*\*([^*]+)\*\*', r'\1', a)
        a = re.sub(r'\*([^*]+)\*', r'\1', a)

        # collapse multiple spaces again (in case replacements introduced them)
        a = re.sub(r'\s+', ' ', a).strip()

        # trim trailing punctuation/words
        a = a.rstrip(' \t\n.,;:')

        # remove trailing words like "proved" or "the answer" (English & Bengali)
        a = re.sub(r'\b(proved|completed|finished|the answer|প্রমাণিত|প্রমাণ|উত্তর|ফল)\b[.\s]*$', '', a, flags=re.IGNORECASE).strip()

        return a

    @staticmethod
    def _is_valid_answer(answer: str) -> bool:
        if not answer:
            return False
        # not only punctuation
        if re.match(r'^[\W_]+$', answer):
            return False
        # contains at least some alphanumeric characters (or common math symbols); include Bengali unicode range
        if not re.search(r'[0-9A-Za-z\u0980-\u09FF\\]', answer):
            return False
        # length sanity
        if len(answer) > 1000:
            return False
        # avoid answers that end with only concluding words (English & Bengali)
        blacklist = [r'therefore$', r'thus$', r'hence$', r'so$', r'we get$', r'we have$', r'অতএব$', r'সুতরাং$', r'তাই$']
        for b in blacklist:
            if re.search(b, answer.strip(), flags=re.IGNORECASE):
                return False
        return True

    @staticmethod
    def extract_final_answer_simple(text: str) -> str:
        """
        Primary method: Extract final answer using the last lines approach,
        with fallback to pattern-based extraction.
        """
        if not text:
            return ""

        # Clean the text and split into lines
        lines = [line.strip() for line in text.strip().split('\n') if line.strip()]

        # Strategy 1: Try last two lines combined
        if len(lines) >= 2:
            last_two = ' '.join(lines[-2:])
            cleaned = EnhancedAnswerExtractor._clean_answer(last_two)
            if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned

        # Strategy 2: Try last line only
        if lines:
            cleaned = EnhancedAnswerExtractor._clean_answer(lines[-1])
            if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned

        # Strategy 3: Try last 3 lines if we have them (sometimes answers span multiple lines)
        if len(lines) >= 3:
            last_three = ' '.join(lines[-3:])
            cleaned = EnhancedAnswerExtractor._clean_answer(last_three)
            if EnhancedAnswerExtractor._is_valid_answer(cleaned) and len(cleaned) < 500:
                return cleaned

        # Strategy 4: Fallback to pattern-based extraction
        return EnhancedAnswerExtractor._extract_with_patterns(text)

    @staticmethod
    def _extract_with_patterns(text: str) -> str:
        """
        Pattern-based extraction as fallback method.
        Recognizes both English and Bengali answer/conclusion patterns.
        """
        # Check for <final> tags
        raw_matches = re.findall(r'<final>(.*?)</final>', text, re.DOTALL | re.IGNORECASE)
        for m in raw_matches:
            c = EnhancedAnswerExtractor._clean_answer(m)
            if EnhancedAnswerExtractor._is_valid_answer(c):
                return c

        # Try common answer patterns (English + Bengali)
        patterns = [
            r'\*\*Final Answer:\*\*\s*(.+?)(?:\n|$)',
            r'Final Answer:\s*(.+?)(?:\n|$)',
            r'Final Answer\s*[:\-]\s*(.+?)(?:\n|$)',

            # Bengali patterns
            r'\*\*উত্তর:\*\*\s*(.+?)(?:\n|$)',
            r'উত্তর[:\s]*(.+?)(?:\n|$)',
            r'ফল[:\s]*(.+?)(?:\n|$)',
            r'চূড়ান্ত উত্তর[:\s]*(.+?)(?:\n|$)',
            r'উপসংহার[:\s]*(.+?)(?:\n|$)',

            # English conclusion markers
            r'Therefore[,:\s]*(.+?)(?:\.|$|\n)',
            r'Hence[,:\s]*(.+?)(?:\.|$|\n)',
            r'Thus[,:\s]*(.+?)(?:\.|$|\n)',
            r'∴\s*(.+?)(?:\.|$|\n)',

            # Bengali conclusion markers
            r'অতএব[,:\s]*(.+?)(?:\.|$|\n)',
            r'সুতরাং[,:\s]*(.+?)(?:\.|$|\n)',
            r'তাই[,:\s]*(.+?)(?:\.|$|\n)',
            r'উপসংহার[,:\s]*(.+?)(?:\.|$|\n)',

            r'Answer[:\s]*(.+?)(?:\n|$)',
            r'Result[:\s]*(.+?)(?:\n|$)',
            r'Solution[:\s]*(.+?)(?:\n|$)',
        ]

        for pat in patterns:
            matches = re.findall(pat, text, re.MULTILINE | re.DOTALL | re.IGNORECASE)
            if matches:
                answer = matches[-1].strip()
                cleaned = EnhancedAnswerExtractor._clean_answer(answer)
                if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                    return cleaned

        # Try boxed math expressions (LaTeX boxed)
        boxed_patterns = [
            r'\$\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$\$',
            r'\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$',
            r'(?:\\){0,3}boxed\{(.+?)\}',
        ]

        for pat in boxed_patterns:
            m = re.search(pat, text, re.DOTALL | re.IGNORECASE)
            if m:
                cand = EnhancedAnswerExtractor._clean_answer(m.group(1))
                if EnhancedAnswerExtractor._is_valid_answer(cand):
                    return cand

        return ""

    @staticmethod
    def extract_all_final_answers(generated_solution: str) -> list:
        """
        Extract multiple final answers using simplified approach.
        Returns a list (possibly empty) of cleaned answers found inside all <final>...</final> tags.
        Falls back to the single simplified extraction if no tags are found.
        """
        if not generated_solution:
            return []

        # Find all <final>...</final> (non-greedy)
        raw_matches = re.findall(r'<final>(.*?)</final>', generated_solution, re.DOTALL | re.IGNORECASE)
        cleaned = []
        for m in raw_matches:
            c = EnhancedAnswerExtractor._clean_answer(m)
            if EnhancedAnswerExtractor._is_valid_answer(c):
                cleaned.append(c)

        if cleaned:
            return cleaned

        # Fallback: try to extract a single final using the simpler logic
        simple_answer = EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)
        if simple_answer:
            return [simple_answer]

        return []

    @staticmethod
    def extract_final_answer(generated_solution: str) -> str:
        """
        Backwards-compatible extractor that delegates to the simplified extraction.
        If multiple <final> tags exist, returns a JSON array string of cleaned answers.
        """
        if not generated_solution:
            return ""

        # Prefer explicit <final> tags (can be multiple)
        raw_matches = re.findall(r'<final>(.*?)</final>', generated_solution, re.DOTALL | re.IGNORECASE)
        if raw_matches:
            cleaned = []
            for m in raw_matches:
                c = EnhancedAnswerExtractor._clean_answer(m)
                if EnhancedAnswerExtractor._is_valid_answer(c):
                    cleaned.append(c)
            if not cleaned:
                # fall through to simpler single extraction
                return EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)
            if len(cleaned) == 1:
                return cleaned[0]
            try:
                return json.dumps(cleaned, ensure_ascii=False)
            except Exception:
                return " ||| ".join(cleaned)

        # No explicit finals: use simplified single extraction
        return EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)


# -------------------------
# Ollama-based Math Solver (ZERO-SHOT) — PROMPTS IN ENGLISH (problem is Bengali)
# -------------------------
class OllamaZeroShotMathSolver:
    """
    Zero-shot math solver that uses the Ollama daemon via the Python client.
    Prompts/instructions are written in English (as requested), but the solver is informed that
    the problem text will be in BENGALI (Bangla) and is instructed to provide a concise final answer in BENGALI.
    - Zero-shot prompt: forbids chain-of-thought and requests concise final <final> tag
    - Single-pass only.
    """

    def __init__(self, model_name="qwen3:8b"):
        """
        model_name: the Ollama model reference (e.g., "qwen3:8b")
        """
        self.model_name = model_name
        self.client = self._load_client()

    def _load_client(self):
        print(f"Initializing Ollama client for model: {self.model_name}")
        client = ollama.Client()
        return client

    def cleanup(self):
        if hasattr(self, 'client'):
            del self.client

    def _get_format_instructions(self, answer_type):
        """
        Zero-shot formatting instructions (WRITTEN IN ENGLISH).
        The model is explicitly informed that the problem text is in BENGALI and that the concise final
        answer should be provided in BENGALI. Examples/formats are shown in English but demonstrate
        that the final tag must contain the concise Bengali answer.
        """
        t = (answer_type or "symbolic").strip().lower()
        if t not in _CANONICAL_TYPES:
            t = "symbolic"

        base = (
            "CRITICAL FORMATTING REQUIREMENTS (ZERO-SHOT):\n"
            "- The math problem you will receive is written in BENGALI (Bangla). Provide your concise FINAL ANSWER in BENGALI.\n"
            "- DO NOT provide chain-of-thought or step-by-step internal reasoning. Do NOT reveal private chain-of-thought.\n"
            "- If a very short justification is necessary, include a single-line 'Explanation:' with at most one sentence (in Bengali).\n"
            "- Always end with a machine-readable final tag <final>...</final> containing ONLY the concise final answer in BENGALI (no extra reasoning inside the tag).\n"
        )

        if t == "proof":
            return base + (
                "FOR PROOFS (zero-shot):\n"
                "- Provide a concise conclusion or a one-sentence proof sketch (in BENGALI) labeled 'উপসংহার:' or 'প্রমাণ সংক্ষেপ:' if needed. Do NOT provide a full step-by-step proof.\n"
                "Format example (English instructions):\n"
                "Final Answer (in Bengali):\n"
                "[Concise Bengali conclusion or one-sentence sketch]\n\n"
                "<final>[Concise Bengali conclusion]</final>\n"
            )
        elif t == "numerical":
            return base + (
                "FOR NUMERICAL RESULTS:\n"
                "- Provide the numeric result in exact form if available (fractions/radicals). Otherwise provide a decimal rounded to 4 decimal places.\n"
                "Format example (English instructions):\n"
                "Final Answer (in Bengali):\n"
                "[Numeric result]\n\n"
                "<final>[Numeric result]</final>\n"
            )
        else:  # symbolic
            return base + (
                "FOR SYMBOLIC RESULTS:\n"
                "- Provide the final symbolic expression (LaTeX allowed) in a concise form. The expression may be LaTeX but any wording should be in Bengali if used.\n"
                "Format example (English instructions):\n"
                "Final Answer (in Bengali):\n"
                "[Final symbolic expression]\n\n"
                "<final>[LaTeX expression or concise symbolic expression — in Bengali if words are used]</final>\n"
            )

    def _create_prompt(self, question, answer_type="symbolic"):
        format_instructions = self._get_format_instructions(answer_type)

        # The main instruction is in English (per your requirement), but it states the problem language and required answer language.
        prompt = f"""You are an expert mathematician. The following math problem is written in BENGALI (Bangla).
Provide a concise final answer in BENGALI. DO NOT produce chain-of-thought or step-by-step internal reasoning.
If you include a justification, it must be one short sentence and labeled 'Explanation:' (in Bengali).

MATH PROBLEM (in Bengali):
{question}

{format_instructions}

Begin your concise answer (in Bengali) now:
"""
        return prompt

    def _generate_once(self, prompt_text: str, enable_thinking: bool = False) -> str:
        """
        Call ollama.Client.chat WITHOUT temperature/max_tokens.
        Handles several possible response shapes.
        """
        messages = [{"role": "user", "content": prompt_text}]

        # Call Ollama client.chat without temperature/max_tokens
        try:
            resp = self.client.chat(model=self.model_name, messages=messages, think=enable_thinking)
        except TypeError:
            # Some client versions have different signatures
            try:
                resp = self.client.chat(self.model_name, messages=messages, think=enable_thinking)
            except Exception:
                resp = self.client.chat(self.model_name, messages)

        # Normalize response into a string
        full_output = ""
        if isinstance(resp, dict):
            if 'message' in resp and isinstance(resp['message'], dict) and 'content' in resp['message']:
                full_output = resp['message']['content']
            elif 'choices' in resp and isinstance(resp['choices'], (list, tuple)) and resp['choices']:
                choice = resp['choices'][0]
                if isinstance(choice, dict) and 'message' in choice and isinstance(choice['message'], dict):
                    full_output = choice['message'].get('content', '')
                else:
                    full_output = str(choice)
            else:
                full_output = str(resp)
        else:
            # resp might be an object with .message.content
            try:
                full_output = resp.message.content
            except Exception:
                full_output = str(resp)

        if isinstance(full_output, bytes):
            full_output = full_output.decode('utf-8', errors='ignore')
        return (full_output or "").strip()

    def solve_problem(self, question, answer_type="symbolic"):
        """
        Zero-shot single-pass solve via Ollama.
        """
        prompt = self._create_prompt(question, answer_type)
        full_output = self._generate_once(prompt, enable_thinking=False)

        # No thinking parsing for zero-shot mode
        thinking_content = ""
        generated_answer = full_output
        final_tag_output = ""  # no second pass

        extracted_final_answer = EnhancedAnswerExtractor.extract_final_answer(generated_answer)

        return {
            "thinking_content": thinking_content,
            "generated_answer": generated_answer,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer
        }


# -------------------------
# Dataset Processor
# -------------------------
class DatasetProcessor:
    def __init__(self, solver: OllamaZeroShotMathSolver, failed_folder=None):
        self.solver = solver
        self.extractor = EnhancedAnswerExtractor()
        self.failed_folder = failed_folder or "failed_extractions"
        os.makedirs(self.failed_folder, exist_ok=True)

    def process_dataset(self, dataset_path, output_base_path, start_idx=0, end_idx=None,
                        folder_name=None, create_timestamped_folder=True):
        dataset = self._load_dataset(dataset_path)
        if end_idx is None:
            end_idx = len(dataset)

        output_folder = self._create_output_folder(output_base_path, folder_name, start_idx, end_idx, create_timestamped_folder)
        results = []

        print(f"Processing problems {start_idx} to {end_idx-1} ({end_idx-start_idx} total)")
        print(f"Output will be saved in: {output_folder}")

        for idx in tqdm(range(start_idx, min(end_idx, len(dataset)))):
            problem = dataset[idx]
            try:
                result_entry = self._process_single_problem(idx, problem)
                results.append(result_entry)
                self._print_progress(idx, result_entry)
                if (idx - start_idx + 1) % 10 == 0:
                    self._save_intermediate_results(results, output_folder, idx - start_idx + 1)
            except Exception as e:
                print(f"Error processing problem {idx+1}: {str(e)}")
                error_entry = self._create_error_entry(idx, problem, str(e))
                results.append(error_entry)

        final_output_path = self._save_final_results(results, output_folder, start_idx, end_idx)
        self._create_summary_file(results, output_folder, dataset_path, start_idx, end_idx)
        return results, output_folder

    def _create_output_folder(self, base_path, folder_name, start_idx, end_idx, add_timestamp):
        if folder_name is None:
            folder_name = f"results_{start_idx}_to_{end_idx-1}"
        if add_timestamp:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            folder_name = f"{folder_name}_{timestamp}"
        output_folder = os.path.join(base_path, folder_name)
        os.makedirs(output_folder, exist_ok=True)
        return output_folder

    def _load_dataset(self, dataset_path):
        dataset = []
        with open(dataset_path, 'r', encoding='utf-8') as f:
            for line in f:
                if line.strip():
                    dataset.append(json.loads(line))
        return dataset

    def _process_single_problem(self, idx, problem):
        language = problem.get("Language", "")
        chapter_num = problem.get("Chapter Number", "")
        example_num = problem.get("Example Number", "")
        question = problem.get("Question", "")
        exact_answer = problem.get("Exact Answer", "")
        raw_answer_type = problem.get("Answer Type", "") or ""

        # Normalize/infer canonical answer type: 'symbolic', 'numerical', 'proof'
        canonical_type = normalize_answer_type(raw_answer_type, question_text=question, exact_answer=exact_answer)

        # If exact_answer strongly indicates numeric, prefer numerical
        if exact_answer and re.search(r'\d', str(exact_answer)):
            # if exact contains LaTeX expressions like \frac or \sqrt, keep symbolic
            if re.search(r'\\frac|\\sqrt|\\boxed', str(exact_answer)):
                pass
            else:
                canonical_type = "numerical"

        print(f"\nProcessing Problem {idx+1}: Chapter {chapter_num}, Example {example_num}")
        print(f"Raw Answer Type: '{raw_answer_type}'  --> canonical: '{canonical_type}'")

        # Generate solution (use canonical_type) -- zero-shot, single pass
        solution_result = self.solver.solve_problem(question, answer_type=canonical_type)
        generated_answer = solution_result.get('generated_answer', '')
        thinking_content = solution_result.get('thinking_content', '')  # will be empty
        final_tag_output = solution_result.get('final_tag_output', '')

        # --- NEW extraction logic: keep both forms (single string & list) ---
        # Try to get all <final> answers first (preferred)
        all_finals = EnhancedAnswerExtractor.extract_all_final_answers(generated_answer)
        extracted_final_answer = ""
        extracted_final_answers = []

        # If none found in generated_answer, try final_tag_output (unused here)
        if not all_finals and final_tag_output:
            all_finals = EnhancedAnswerExtractor.extract_all_final_answers(final_tag_output)

        # If still none, fall back to single-answer extractor
        if not all_finals:
            single = EnhancedAnswerExtractor.extract_final_answer(generated_answer)
            if single:
                extracted_final_answer = single
                extracted_final_answers = [single]
            else:
                # try whole combined text (thinking + generated + final_tag)
                combined = "\n".join([thinking_content or "", generated_answer or "", final_tag_output or ""])
                single = EnhancedAnswerExtractor.extract_final_answer(combined)
                if single:
                    extracted_final_answer = single
                    extracted_final_answers = [single]
                else:
                    extracted_final_answer = ""
                    extracted_final_answers = []
        else:
            # we have one or more finals
            extracted_final_answers = all_finals
            if len(all_finals) == 1:
                extracted_final_answer = all_finals[0]
            else:
                # store a machine-readable concatenation: JSON array string
                try:
                    extracted_final_answer = json.dumps(all_finals, ensure_ascii=False)
                except Exception:
                    extracted_final_answer = " ||| ".join(all_finals)

        # If still empty, save a failed extraction example for inspection
        if not extracted_final_answer:
            fname = f"failed_{idx}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
            fpath = os.path.join(self.failed_folder, fname)
            with open(fpath, 'w', encoding='utf-8') as f:
                json.dump({
                    "index": idx,
                    "question": question,
                    "generated_answer": generated_answer,
                    "thinking_content": thinking_content,
                    "final_tag_output": final_tag_output,
                    "exact_answer": exact_answer,
                    "canonical_type": canonical_type,
                    "extracted_final_answer": extracted_final_answer,
                    "extracted_final_answers": extracted_final_answers
                }, f, ensure_ascii=False, indent=2)
            print(f"Saved failed extraction example to {fpath}")

        result_entry = {
            "problem_index": idx,
            "language": language,
            "chapter_number": chapter_num,
            "example_number": example_num,
            "question": question,
            "generated_answer": generated_answer,
            "thinking_content": thinking_content,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer,       # string (or JSON array string)
            "extracted_final_answers": extracted_final_answers,     # list (empty / single / many)
            "exact_answer": exact_answer,
            "raw_answer_type": raw_answer_type,
            "canonical_answer_type": canonical_type,
            "evaluation_method": problem.get("Evaluation Method", "")
        }
        return result_entry

    def _create_error_entry(self, idx, problem, error_msg):
        return {
            "problem_index": idx,
            "language": problem.get("Language", ""),
            "chapter_number": problem.get("Chapter Number", ""),
            "example_number": problem.get("Example Number", ""),
            "question": problem.get("Question", ""),
            "generated_answer": f"ERROR: {error_msg}",
            "thinking_content": "",
            "final_tag_output": "",
            "extracted_final_answer": "",
            "extracted_final_answers": [],
            "exact_answer": problem.get("Exact Answer", ""),
            "raw_answer_type": problem.get("Answer Type", ""),
            "canonical_answer_type": "",
            "evaluation_method": problem.get("Evaluation Method", "")
        }

    def _print_progress(self, idx, result_entry):
        print(f"Generated answer length: {len(result_entry['generated_answer']) if result_entry['generated_answer'] else 0}")
        print(f"Extracted final answer: '{result_entry['extracted_final_answer']}'")
        print(f"Extracted final answers (list): {result_entry.get('extracted_final_answers', [])}")
        print(f"Expected answer: '{result_entry['exact_answer']}'")

    def _save_intermediate_results(self, results, output_folder, count):
        temp_filename = f'intermediate_results_{count}.json'
        temp_output_path = os.path.join(output_folder, temp_filename)
        with open(temp_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"Saved intermediate results to {temp_output_path}")

    def _save_final_results(self, results, output_folder, start_idx, end_idx):
        final_filename = f'final_results_{start_idx}_to_{end_idx-1}.json'
        final_output_path = os.path.join(output_folder, final_filename)
        with open(final_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"\nProcessing complete. Results saved to {final_output_path}")
        print(f"Total problems processed: {len(results)}")
        return final_output_path

    def _create_summary_file(self, results, output_folder, dataset_path, start_idx, end_idx):
        successful_extractions = len([r for r in results if r.get('extracted_final_answer', '').strip()])
        summary_data = {
            "processing_info": {
                "dataset_path": dataset_path,
                "start_index": start_idx,
                "end_index": end_idx - 1,
                "total_processed": len(results),
                "processing_timestamp": datetime.now().isoformat(),
                "output_folder": output_folder
            },
            "statistics": {
                "successful_problems": len([r for r in results if not r['generated_answer'].startswith('ERROR:')]),
                "failed_problems": len([r for r in results if r['generated_answer'].startswith('ERROR:')]),
                "successful_extractions": successful_extractions,
                "extraction_success_rate": f"{(successful_extractions/len(results)*100):.1f}%" if results else "0%",
                "average_answer_length": sum(len(r['generated_answer']) for r in results) / len(results) if results else 0,
                "chapters_processed": list(set(r['chapter_number'] for r in results if r['chapter_number'])),
                "raw_answer_types": list(set(r['raw_answer_type'] for r in results if r.get('raw_answer_type'))),
                "canonical_answer_types": list(set(r['canonical_answer_type'] for r in results if r.get('canonical_answer_type')))
            }
        }
        summary_path = os.path.join(output_folder, 'processing_summary.json')
        with open(summary_path, 'w', encoding='utf-8') as f:
            json.dump(summary_data, f, ensure_ascii=False, indent=2)
        print(f"Processing summary saved to {summary_path}")
        print(f"Answer extraction success rate: {summary_data['statistics']['extraction_success_rate']}")


# -------------------------
# Main (example usage)
# -------------------------
def main():
    # NOTE: update dataset_path and output_base_path to match your environment.
    # Ensure your dataset's "Question" fields are in Bengali (Bangla).
    dataset_path = "/kaggle/input/nctb-dataset/Bengali_Final_Corpus.jsonl"
    output_base_path = "/kaggle/working/"

    # Use the Ollama zero-shot solver (ensure the specified model is available in Ollama)
    solver = OllamaZeroShotMathSolver(model_name="qwen3:8b")
    processor = DatasetProcessor(solver, failed_folder=os.path.join(output_base_path, "failed_extractions"))

    # For quick testing, process only first few problems
    results, out_folder = processor.process_dataset(
        dataset_path,
        output_base_path,
        start_idx=0,
        end_idx=100  # smaller quick test
    )
    print("Done. Results saved to:", out_folder)


if __name__ == "__main__":
    main()


# **English**

In [None]:
import json
import os
import re
from datetime import datetime
from tqdm import tqdm

# Ollama client
try:
    import ollama
except Exception as e:
    raise ImportError("The 'ollama' package is required. Install it and make sure the Ollama daemon is running.") from e

# -------------------------
# Answer type normalization
# -------------------------
# Canonical set
_CANONICAL_TYPES = {"symbolic", "numerical", "proof"}

# mapping common noisy labels to canonical
_ANSWER_TYPE_MAP_SIMPLE = {
    # Proof variants
    "proof": "proof", "prove": "proof",
    # Numerical variants
    "numerical": "numerical", "numeric": "numerical", "number": "numerical", "calculation": "numerical",
    # Symbolic variants
    "symbolic": "symbolic", "symbol": "symbolic", "equation": "symbolic", "algebraic": "symbolic",
}


def normalize_answer_type(raw_label: str, question_text: str = "", exact_answer: str = "") -> str:
    """
    Normalize a dataset label to one of: 'symbolic', 'numerical', 'proof'.
    Heuristics:
      - Direct mapping from known strings (English)
      - If dataset field missing or noisy, infer from question or exact_answer
      - Final fallback: 'symbolic'
    """
    # Helper to clean label
    def _clean_label(lbl: str) -> str:
        if not lbl:
            return ""
        s = lbl.strip().lower()
        s = re.sub(r'[^0-9a-z\s]', ' ', s)
        s = re.sub(r'\s+', ' ', s).strip()
        return s

    s = _clean_label(raw_label)

    # direct mapping
    if s in _ANSWER_TYPE_MAP_SIMPLE:
        return _ANSWER_TYPE_MAP_SIMPLE[s]

    # partial matches
    for k, v in _ANSWER_TYPE_MAP_SIMPLE.items():
        if k in s:
            return v

    # heuristics using question or exact_answer
    q = (question_text or "").lower()
    a = (exact_answer or "").lower()

    if re.search(r'prove|show that|prove that', q) or 'proof' in s:
        return "proof"

    if re.search(r'\b(log|ln|logarithm)\b', q) or re.search(r'\blog\b', a):
        # default to symbolic but prefer numerical if exact answer contains digits
        if re.search(r'\d', a):
            return "numerical"
        return "symbolic"

    if re.search(r'set|subset|\bunion\b|\bintersection\b', q):
        return "symbolic"

    if re.search(r'meter|m\b|cm|kg|liter|l\b|unit|units|km|mile', q + " " + a):
        return "numerical"

    # equation-solving heuristics
    if re.search(r'equation|solve for|solve|= x|x\s*=', q) or re.search(r'= x|= \d', a):
        return "symbolic"

    if re.search(r'trig|sin|cos|tan|geometry|triangle|circle', q):
        if re.search(r'\d', a):
            return "numerical"
        return "symbolic"

    # numeric indicators -> numerical
    if re.search(r'\d', q) or re.search(r'find the value|compute|calculate|evaluate', q):
        return "numerical"

    # If exact_answer looks numeric, prefer numerical
    if re.search(r'[0-9]|\\frac|\\sqrt', a):
        if re.search(r'\\frac|\\sqrt|\{|\\', a):
            return "symbolic"
        return "numerical"

    # fallback
    return "symbolic"

# -------------------------
# EnhancedAnswerExtractor (adapted to behave like SimplifiedAnswerExtractor)
# -------------------------
class EnhancedAnswerExtractor:
    """
    Adapter that implements the simplified extraction behavior (ported from SimplifiedAnswerExtractor).
    Provides:
      - extract_final_answer(text) -> str
      - extract_all_final_answers(generated_solution) -> list
    and internal helpers _clean_answer and _is_valid_answer.
    """

    @staticmethod
    def _clean_answer(answer: str) -> str:
        if not answer:
            return ""
        # Start with a trimmed answer and normalize whitespace
        a = answer.strip()
        # collapse whitespace
        a = re.sub(r'\s+', ' ', a)

        # remove outer $$ if present (multiline)
        a = re.sub(r'^\$\$(.*)\$\$$', r'\1', a, flags=re.DOTALL)
        # remove surrounding single $ if the whole string is wrapped
        a = re.sub(r'^\$(.*)\$$', r'\1', a, flags=re.DOTALL)

        # strip standalone leading/trailing $ characters and spaces
        a = a.strip('$ ')

        # Remove common prefixes (kept after stripping $ to catch cases like "$Final Answer: ...$")
        prefixes_to_remove = [
            r'Final Answer:\s*',
            r'Answer:\s*',
            r'The answer is\s*',
            r'Therefore,?\s*',
            r'Thus,?\s*',
            r'Hence,?\s*',
            r'So,?\s*',
            r'∴\s*',
        ]
        for prefix in prefixes_to_remove:
            a = re.sub(f'^{prefix}', '', a, flags=re.IGNORECASE)

        # remove various boxed wrappers with optional backslashes and optional surrounding $
        # e.g. $$\boxed{...}$$, $\boxed{...}$, \boxed{...}
        a = re.sub(r'\$?\s*(?:\\){0,3}boxed\{([^}]*)\}\s*\$?', r'\1', a, flags=re.DOTALL | re.IGNORECASE)
        # also ensure plain \boxed{...} is unwrapped (redundant but safe)
        a = re.sub(r'\\boxed\{([^}]*)\}', r'\1', a)

        # convert common LaTeX to readable forms
        a = re.sub(r'\\frac\{([^}]*)\}\{([^}]*)\}', r'(\1)/(\2)', a)
        a = re.sub(r'\\sqrt\{([^}]*)\}', r'√(\1)', a)

        # remove bold/italic wrappers
        a = re.sub(r'\*\*([^*]+)\*\*', r'\1', a)
        a = re.sub(r'\*([^*]+)\*', r'\1', a)

        # collapse multiple spaces again (in case replacements introduced them)
        a = re.sub(r'\s+', ' ', a).strip()

        # trim trailing punctuation/words
        a = a.rstrip(' \t\n.,;:')

        # remove trailing words like "proved" or "the answer"
        a = re.sub(r'\b(proved|completed|finished|the answer)\b[.\s]*$', '', a, flags=re.IGNORECASE).strip()

        return a

    @staticmethod
    def _is_valid_answer(answer: str) -> bool:
        if not answer:
            return False
        if re.match(r'^[\W_]+$', answer):
            return False
        if not re.search(r'[0-9A-Za-z\\]', answer):
            return False
        if len(answer) > 1000:
            return False
        blacklist = [r'therefore$', r'thus$', r'hence$', r'so$', r'we get$', r'we have$']
        for b in blacklist:
            if re.search(b, answer.strip(), flags=re.IGNORECASE):
                return False
        return True

    @staticmethod
    def extract_final_answer_simple(text: str) -> str:
        """
        Primary method: Extract final answer using the last lines approach,
        with fallback to pattern-based extraction.
        """
        if not text:
            return ""

        # Clean the text and split into lines
        lines = [line.strip() for line in text.strip().split('\n') if line.strip()]

        # Strategy 1: Try last two lines combined
        if len(lines) >= 2:
            last_two = ' '.join(lines[-2:])
            cleaned = EnhancedAnswerExtractor._clean_answer(last_two)
            if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned

        # Strategy 2: Try last line only
        if lines:
            cleaned = EnhancedAnswerExtractor._clean_answer(lines[-1])
            if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned

        # Strategy 3: Try last 3 lines if we have them (sometimes answers span multiple lines)
        if len(lines) >= 3:
            last_three = ' '.join(lines[-3:])
            cleaned = EnhancedAnswerExtractor._clean_answer(last_three)
            if EnhancedAnswerExtractor._is_valid_answer(cleaned) and len(cleaned) < 500:
                return cleaned

        # Strategy 4: Fallback to pattern-based extraction
        return EnhancedAnswerExtractor._extract_with_patterns(text)

    @staticmethod
    def _extract_with_patterns(text: str) -> str:
        """
        Pattern-based extraction as fallback method.
        """
        # Check for <final> tags
        raw_matches = re.findall(r'<final>(.*?)</final>', text, re.DOTALL | re.IGNORECASE)
        for m in raw_matches:
            c = EnhancedAnswerExtractor._clean_answer(m)
            if EnhancedAnswerExtractor._is_valid_answer(c):
                return c

        # Try common answer patterns
        patterns = [
            r'\*\*Final Answer:\*\*\s*(.+?)(?:\n|$)',
            r'Final Answer:\s*(.+?)(?:\n|$)',
            r'Therefore[,:\s]*(.+?)(?:\.|$|\n)',
            r'Hence[,:\s]*(.+?)(?:\.|$|\n)',
            r'Thus[,:\s]*(.+?)(?:\.|$|\n)',
            r'Answer[:\s]*(.+?)(?:\n|$)',
            r'∴\s*(.+?)(?:\.|$|\n)',
        ]

        for pat in patterns:
            matches = re.findall(pat, text, re.MULTILINE | re.DOTALL | re.IGNORECASE)
            if matches:
                answer = matches[-1].strip()
                cleaned = EnhancedAnswerExtractor._clean_answer(answer)
                if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                    return cleaned

        # Try boxed math expressions
        boxed_patterns = [
            r'\$\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$\$',
            r'\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$',
            r'(?:\\){0,3}boxed\{(.+?)\}',
        ]

        for pat in boxed_patterns:
            m = re.search(pat, text, re.DOTALL | re.IGNORECASE)
            if m:
                cand = EnhancedAnswerExtractor._clean_answer(m.group(1))
                if EnhancedAnswerExtractor._is_valid_answer(cand):
                    return cand

        return ""

    @staticmethod
    def extract_all_final_answers(generated_solution: str) -> list:
        """
        Extract multiple final answers using simplified approach.
        Returns a list (possibly empty) of cleaned answers found inside all <final>...</final> tags.
        Falls back to the single simplified extraction if no tags are found.
        """
        if not generated_solution:
            return []

        # Find all <final>...</final> (non-greedy)
        raw_matches = re.findall(r'<final>(.*?)</final>', generated_solution, re.DOTALL | re.IGNORECASE)
        cleaned = []
        for m in raw_matches:
            c = EnhancedAnswerExtractor._clean_answer(m)
            if EnhancedAnswerExtractor._is_valid_answer(c):
                cleaned.append(c)

        if cleaned:
            return cleaned

        # Fallback: try to extract a single final using the simpler logic
        simple_answer = EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)
        if simple_answer:
            return [simple_answer]

        return []

    @staticmethod
    def extract_final_answer(generated_solution: str) -> str:
        """
        Backwards-compatible extractor that delegates to the simplified extraction.
        If multiple <final> tags exist, returns a JSON array string of cleaned answers.
        """
        if not generated_solution:
            return ""

        # Prefer explicit <final> tags (can be multiple)
        raw_matches = re.findall(r'<final>(.*?)</final>', generated_solution, re.DOTALL | re.IGNORECASE)
        if raw_matches:
            cleaned = []
            for m in raw_matches:
                c = EnhancedAnswerExtractor._clean_answer(m)
                if EnhancedAnswerExtractor._is_valid_answer(c):
                    cleaned.append(c)
            if not cleaned:
                # fall through to simpler single extraction
                return EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)
            if len(cleaned) == 1:
                return cleaned[0]
            try:
                return json.dumps(cleaned, ensure_ascii=False)
            except Exception:
                return " ||| ".join(cleaned)

        # No explicit finals: use simplified single extraction
        return EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)

# -------------------------
# Ollama-based English Math Solver (ZERO-SHOT)
# -------------------------
class OllamaZeroShotMathSolver:
    """
    Zero-shot math solver that uses the Ollama daemon via the Python client.
    - Zero-shot prompt: forbids chain-of-thought and requests concise final <final> tag
    - Single-pass only.
    """

    def __init__(self, model_name="qwen3:8b"):
        """
        model_name: the Ollama model reference (e.g., "qwen3:8b")
        """
        self.model_name = model_name
        self.client = self._load_client()

    def _load_client(self):
        print(f"Initializing Ollama client for model: {self.model_name}")
        client = ollama.Client()
        return client

    def cleanup(self):
        if hasattr(self, 'client'):
            del self.client

    def _get_format_instructions(self, answer_type):
        """
        Zero-shot formatting instructions (same logic as your zero-shot HF version).
        """
        t = (answer_type or "symbolic").strip().lower()
        if t not in _CANONICAL_TYPES:
            t = "symbolic"

        base = (
            "CRITICAL ANSWER FORMATTING REQUIREMENTS (ZERO-SHOT):\n"
            "You MUST NOT provide chain-of-thought or step-by-step internal reasoning. "
            "Do NOT reveal private chain-of-thought. Provide a concise answer only. "
            "If a very short justification is necessary, include a single-line 'Explanation:' "
            "with at most one sentence. Always end with a machine-readable final tag <final>...</final> "
            "containing only the concise final answer (no extra reasoning inside the tag).\n"
        )

        if t == "proof":
            return base + (
                "FOR PROOFS (zero-shot):\n"
                "- Provide a concise conclusion or a one-sentence proof sketch labeled 'Conclusion:' or 'Proof sketch:' "
                "if needed (one sentence only). Do NOT provide a full step-by-step proof or chain-of-thought.\n"
                "Format:\n"
                "Final Answer:\n"
                "[Concise English conclusion or one-sentence sketch]\n\n"
                "<final>[Concise English conclusion]</final>\n"
            )
        elif t == "numerical":
            return base + (
                "FOR NUMERICAL RESULTS:\n"
                "- Provide the numeric result in exact form if available (fractions/radicals). Otherwise provide a decimal rounded to 4 decimal places.\n"
                "Format:\n"
                "Final Answer:\n"
                "[Numeric result]\n\n"
                "<final>[Numeric result]</final>\n"
            )
        else:  # symbolic
            return base + (
                "FOR SYMBOLIC RESULTS:\n"
                "- Provide the final symbolic expression (LaTeX is allowed) in a concise form.\n"
                "Format:\n"
                "Final Answer:\n"
                "[Final symbolic expression]\n\n"
                "<final>[LaTeX expression or concise symbolic expression]</final>\n"
            )

    def _create_prompt(self, question, answer_type="General"):
        format_instructions = self._get_format_instructions(answer_type)

        prompt = f"""You are an expert mathematician. Provide a concise answer to the problem below.
DO NOT provide chain-of-thought, step-by-step reasoning, or internal deliberation. If you include any justification, it must be a single short sentence preceded by 'Explanation:'.

MATHEMATICAL PROBLEM:
{question}

{format_instructions}

Begin your concise answer now:
"""
        return prompt

    def _generate_once(self, prompt_text: str, enable_thinking: bool = False) -> str:
        """
        Call ollama.Client.chat WITHOUT temperature/max_tokens.
        Handles several possible response shapes.
        """
        messages = [{"role": "user", "content": prompt_text}]

        # Call Ollama client.chat without temperature/max_tokens
        try:
            resp = self.client.chat(model=self.model_name, messages=messages, think=enable_thinking)
        except TypeError:
            # Some client versions have different signatures
            try:
                resp = self.client.chat(self.model_name, messages=messages, think=enable_thinking)
            except Exception:
                resp = self.client.chat(self.model_name, messages)

        # Normalize response into a string
        full_output = ""
        if isinstance(resp, dict):
            if 'message' in resp and isinstance(resp['message'], dict) and 'content' in resp['message']:
                full_output = resp['message']['content']
            elif 'choices' in resp and isinstance(resp['choices'], (list, tuple)) and resp['choices']:
                choice = resp['choices'][0]
                if isinstance(choice, dict) and 'message' in choice and isinstance(choice['message'], dict):
                    full_output = choice['message'].get('content', '')
                else:
                    full_output = str(choice)
            else:
                full_output = str(resp)
        else:
            # resp might be an object with .message.content
            try:
                full_output = resp.message.content
            except Exception:
                full_output = str(resp)

        if isinstance(full_output, bytes):
            full_output = full_output.decode('utf-8', errors='ignore')
        return (full_output or "").strip()

    def solve_problem(self, question, answer_type="symbolic"):
        """
        Zero-shot single-pass solve via Ollama.
        """
        prompt = self._create_prompt(question, answer_type)
        full_output = self._generate_once(prompt, enable_thinking=False)

        # No thinking parsing for zero-shot mode
        thinking_content = ""
        generated_answer = full_output
        final_tag_output = ""  # no second pass

        extracted_final_answer = EnhancedAnswerExtractor.extract_final_answer(generated_answer)

        return {
            "thinking_content": thinking_content,
            "generated_answer": generated_answer,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer
        }

# -------------------------
# Dataset Processor
# -------------------------
class DatasetProcessor:
    def __init__(self, solver: OllamaZeroShotMathSolver, failed_folder=None):
        self.solver = solver
        self.extractor = EnhancedAnswerExtractor()
        self.failed_folder = failed_folder or "failed_extractions"
        os.makedirs(self.failed_folder, exist_ok=True)

    def process_dataset(self, dataset_path, output_base_path, start_idx=0, end_idx=None,
                        folder_name=None, create_timestamped_folder=True):
        dataset = self._load_dataset(dataset_path)
        if end_idx is None:
            end_idx = len(dataset)

        output_folder = self._create_output_folder(output_base_path, folder_name, start_idx, end_idx, create_timestamped_folder)
        results = []

        print(f"Processing problems {start_idx} to {end_idx-1} ({end_idx-start_idx} total)")
        print(f"Output will be saved in: {output_folder}")

        for idx in tqdm(range(start_idx, min(end_idx, len(dataset)))):
            problem = dataset[idx]
            try:
                result_entry = self._process_single_problem(idx, problem)
                results.append(result_entry)
                self._print_progress(idx, result_entry)
                if (idx - start_idx + 1) % 10 == 0:
                    self._save_intermediate_results(results, output_folder, idx - start_idx + 1)
            except Exception as e:
                print(f"Error processing problem {idx+1}: {str(e)}")
                error_entry = self._create_error_entry(idx, problem, str(e))
                results.append(error_entry)

        final_output_path = self._save_final_results(results, output_folder, start_idx, end_idx)
        self._create_summary_file(results, output_folder, dataset_path, start_idx, end_idx)
        return results, output_folder

    def _create_output_folder(self, base_path, folder_name, start_idx, end_idx, add_timestamp):
        if folder_name is None:
            folder_name = f"results_{start_idx}_to_{end_idx-1}"
        if add_timestamp:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            folder_name = f"{folder_name}_{timestamp}"
        output_folder = os.path.join(base_path, folder_name)
        os.makedirs(output_folder, exist_ok=True)
        return output_folder

    def _load_dataset(self, dataset_path):
        dataset = []
        with open(dataset_path, 'r', encoding='utf-8') as f:
            for line in f:
                if line.strip():
                    dataset.append(json.loads(line))
        return dataset

    def _process_single_problem(self, idx, problem):
        language = problem.get("Language", "")
        chapter_num = problem.get("Chapter Number", "")
        example_num = problem.get("Example Number", "")
        question = problem.get("Question", "")
        exact_answer = problem.get("Exact Answer", "")
        raw_answer_type = problem.get("Answer Type", "") or ""

        # Normalize/infer canonical answer type: 'symbolic', 'numerical', 'proof'
        canonical_type = normalize_answer_type(raw_answer_type, question_text=question, exact_answer=exact_answer)

        # If exact_answer strongly indicates numeric, prefer numerical
        if exact_answer and re.search(r'\d', str(exact_answer)):
            # if exact contains LaTeX expressions like \frac or \sqrt, keep symbolic
            if re.search(r'\\frac|\\sqrt|\\boxed', str(exact_answer)):
                pass
            else:
                canonical_type = "numerical"

        print(f"\nProcessing Problem {idx+1}: Chapter {chapter_num}, Example {example_num}")
        print(f"Raw Answer Type: '{raw_answer_type}'  --> canonical: '{canonical_type}'")

        # Generate solution (use canonical_type) -- zero-shot, single pass
        solution_result = self.solver.solve_problem(question, answer_type=canonical_type)
        generated_answer = solution_result.get('generated_answer', '')
        thinking_content = solution_result.get('thinking_content', '')  # will be empty
        final_tag_output = solution_result.get('final_tag_output', '')

        # --- NEW extraction logic: keep both forms (single string & list) ---
        # Try to get all <final> answers first (preferred)
        all_finals = EnhancedAnswerExtractor.extract_all_final_answers(generated_answer)
        extracted_final_answer = ""
        extracted_final_answers = []

        # If none found in generated_answer, try final_tag_output (unused here)
        if not all_finals and final_tag_output:
            all_finals = EnhancedAnswerExtractor.extract_all_final_answers(final_tag_output)

        # If still none, fall back to single-answer extractor
        if not all_finals:
            single = EnhancedAnswerExtractor.extract_final_answer(generated_answer)
            if single:
                extracted_final_answer = single
                extracted_final_answers = [single]
            else:
                # try whole combined text (thinking + generated + final_tag)
                combined = "\n".join([thinking_content or "", generated_answer or "", final_tag_output or ""])
                single = EnhancedAnswerExtractor.extract_final_answer(combined)
                if single:
                    extracted_final_answer = single
                    extracted_final_answers = [single]
                else:
                    extracted_final_answer = ""
                    extracted_final_answers = []
        else:
            # we have one or more finals
            extracted_final_answers = all_finals
            if len(all_finals) == 1:
                extracted_final_answer = all_finals[0]
            else:
                # store a machine-readable concatenation: JSON array string
                try:
                    extracted_final_answer = json.dumps(all_finals, ensure_ascii=False)
                except Exception:
                    extracted_final_answer = " ||| ".join(all_finals)

        # If still empty, save a failed extraction example for inspection
        if not extracted_final_answer:
            fname = f"failed_{idx}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
            fpath = os.path.join(self.failed_folder, fname)
            with open(fpath, 'w', encoding='utf-8') as f:
                json.dump({
                    "index": idx,
                    "question": question,
                    "generated_answer": generated_answer,
                    "thinking_content": thinking_content,
                    "final_tag_output": final_tag_output,
                    "exact_answer": exact_answer,
                    "canonical_type": canonical_type,
                    "extracted_final_answer": extracted_final_answer,
                    "extracted_final_answers": extracted_final_answers
                }, f, ensure_ascii=False, indent=2)
            print(f"Saved failed extraction example to {fpath}")

        result_entry = {
            "problem_index": idx,
            "language": language,
            "chapter_number": chapter_num,
            "example_number": example_num,
            "question": question,
            "generated_answer": generated_answer,
            "thinking_content": thinking_content,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer,       # string (or JSON array string)
            "extracted_final_answers": extracted_final_answers,     # list (empty / single / many)
            "exact_answer": exact_answer,
            "raw_answer_type": raw_answer_type,
            "canonical_answer_type": canonical_type,
            "evaluation_method": problem.get("Evaluation Method", "")
        }
        return result_entry

    def _create_error_entry(self, idx, problem, error_msg):
        return {
            "problem_index": idx,
            "language": problem.get("Language", ""),
            "chapter_number": problem.get("Chapter Number", ""),
            "example_number": problem.get("Example Number", ""),
            "question": problem.get("Question", ""),
            "generated_answer": f"ERROR: {error_msg}",
            "thinking_content": "",
            "final_tag_output": "",
            "extracted_final_answer": "",
            "extracted_final_answers": [],
            "exact_answer": problem.get("Exact Answer", ""),
            "raw_answer_type": problem.get("Answer Type", ""),
            "canonical_answer_type": "",
            "evaluation_method": problem.get("Evaluation Method", "")
        }

    def _print_progress(self, idx, result_entry):
        print(f"Generated answer length: {len(result_entry['generated_answer']) if result_entry['generated_answer'] else 0}")
        print(f"Extracted final answer: '{result_entry['extracted_final_answer']}'")
        print(f"Extracted final answers (list): {result_entry.get('extracted_final_answers', [])}")
        print(f"Expected answer: '{result_entry['exact_answer']}'")

    def _save_intermediate_results(self, results, output_folder, count):
        temp_filename = f'intermediate_results_{count}.json'
        temp_output_path = os.path.join(output_folder, temp_filename)
        with open(temp_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"Saved intermediate results to {temp_output_path}")

    def _save_final_results(self, results, output_folder, start_idx, end_idx):
        final_filename = f'final_results_{start_idx}_to_{end_idx-1}.json'
        final_output_path = os.path.join(output_folder, final_filename)
        with open(final_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"\nProcessing complete. Results saved to {final_output_path}")
        print(f"Total problems processed: {len(results)}")
        return final_output_path

    def _create_summary_file(self, results, output_folder, dataset_path, start_idx, end_idx):
        successful_extractions = len([r for r in results if r.get('extracted_final_answer', '').strip()])
        summary_data = {
            "processing_info": {
                "dataset_path": dataset_path,
                "start_index": start_idx,
                "end_index": end_idx - 1,
                "total_processed": len(results),
                "processing_timestamp": datetime.now().isoformat(),
                "output_folder": output_folder
            },
            "statistics": {
                "successful_problems": len([r for r in results if not r['generated_answer'].startswith('ERROR:')]),
                "failed_problems": len([r for r in results if r['generated_answer'].startswith('ERROR:')]),
                "successful_extractions": successful_extractions,
                "extraction_success_rate": f"{(successful_extractions/len(results)*100):.1f}%" if results else "0%",
                "average_answer_length": sum(len(r['generated_answer']) for r in results) / len(results) if results else 0,
                "chapters_processed": list(set(r['chapter_number'] for r in results if r['chapter_number'])),
                "raw_answer_types": list(set(r['raw_answer_type'] for r in results if r.get('raw_answer_type'))),
                "canonical_answer_types": list(set(r['canonical_answer_type'] for r in results if r.get('canonical_answer_type')))
            }
        }
        summary_path = os.path.join(output_folder, 'processing_summary.json')
        with open(summary_path, 'w', encoding='utf-8') as f:
            json.dump(summary_data, f, ensure_ascii=False, indent=2)
        print(f"Processing summary saved to {summary_path}")
        print(f"Answer extraction success rate: {summary_data['statistics']['extraction_success_rate']}")

# -------------------------
# Main (example usage)
# -------------------------
def main():
    # NOTE: update dataset_path and output_base_path to match your environment
    dataset_path = "/kaggle/input/nctb-dataset/English_Final_Corpus.jsonl"  # source format unchanged; questions are English
    output_base_path = "/kaggle/working/"

    # Use the Ollama zero-shot solver (ensure the specified model is available in Ollama)
    solver = OllamaZeroShotMathSolver(model_name="qwen3:8b")
    processor = DatasetProcessor(solver, failed_folder=os.path.join(output_base_path, "failed_extractions"))

    # For quick testing, process only first few problems
    results, out_folder = processor.process_dataset(
        dataset_path,
        output_base_path,
        start_idx=0,
        end_idx=1445
    )
    print("Done. Results saved to:", out_folder)

if __name__ == "__main__":
    main()


# **French**

In [None]:
import json
import os
import re
from datetime import datetime
from tqdm import tqdm

# Ollama client
try:
    import ollama
except Exception as e:
    raise ImportError("The 'ollama' package is required. Install it and make sure the Ollama daemon is running.") from e

# -------------------------
# Answer type normalization
# -------------------------
# Canonical set
_CANONICAL_TYPES = {"symbolic", "numerical", "proof"}

# mapping common noisy labels to canonical (English + French variants)
_ANSWER_TYPE_MAP_SIMPLE = {
    # Proof variants (English -> French)
    "proof": "proof", "prove": "proof", "proove": "proof",
    "preuve": "proof", "prouver": "proof", "démontrer": "proof", "demonstration": "proof",

    # Numerical variants
    "numerical": "numerical", "numeric": "numerical", "number": "numerical", "calculation": "numerical",
    "numérique": "numerical", "numerique": "numerical", "nombre": "numerical", "calcul": "numerical",

    # Symbolic variants
    "symbolic": "symbolic", "symbol": "symbolic", "equation": "symbolic", "algebraic": "symbolic",
    "symbolique": "symbolic", "algébrique": "symbolic", "algebrique": "symbolic", "équation": "symbolic",
    "equations": "symbolic",
}


def normalize_answer_type(raw_label: str, question_text: str = "", exact_answer: str = "") -> str:
    """
    Normalize a dataset label to one of: 'symbolic', 'numerical', 'proof'.
    Heuristics expanded to handle French phrases and labels.
    """
    # Helper to clean label
    def _clean_label(lbl: str) -> str:
        if not lbl:
            return ""
        s = lbl.strip().lower()
        # keep unicode letters (accents) and digits and spaces
        s = re.sub(r'[^0-9\w\sàâäéèêëïîôöùûüçœÀÂÄÉÈÊËÏÎÔÖÙÛÜÇŒ-]', ' ', s, flags=re.UNICODE)
        s = re.sub(r'\s+', ' ', s).strip()
        return s

    s = _clean_label(raw_label)

    # direct mapping
    if s in _ANSWER_TYPE_MAP_SIMPLE:
        return _ANSWER_TYPE_MAP_SIMPLE[s]

    # partial matches (allow English or French keywords appearing)
    for k, v in _ANSWER_TYPE_MAP_SIMPLE.items():
        if k in s:
            return v

    # heuristics using question or exact_answer (both English & French checks)
    q = (question_text or "").lower()
    a = (exact_answer or "").lower()

    # Proof indicators (English + French)
    if re.search(r'\b(prove|show that|prove that|proof)\b', q) or re.search(r'\b(prouver|démontrer|demontrer|preuve|montrer que)\b', q):
        return "proof"

    # logarithm related: English/French
    if re.search(r'\b(log|ln|logarithm)\b', q) or re.search(r'\b(log|ln|logarithme)\b', q) or re.search(r'\blog\b', a):
        # prefer numerical if exact answer contains digits
        if re.search(r'\d', a):
            return "numerical"
        return "symbolic"

    # set theory indicators: English / French
    if re.search(r'\b(set|subset|union|intersection)\b', q) or re.search(r'\b(ensemble|sous-ensemble|union|intersection)\b', q):
        return "symbolic"

    # units (English + French)
    if re.search(r'\bmeter\b|\bm\b|\bcm\b|\bkg\b|\bliter\b|\bl\b|\bkm\b|\bmile\b', q + " " + a) or \
       re.search(r'\bmètre\b|\bmetre\b|\bcm\b|\bkg\b|\blitre\b|\bl\b|\bkm\b', q + " " + a):
        return "numerical"

    # equation-solving heuristics (English + French)
    if re.search(r'\bequation\b|solve for|solve|= x|x\s*=', q) or re.search(r'\beq(uation)?\b|résoudre|resoudre|résoudre pour|= x|x\s*=', q, flags=re.IGNORECASE):
        return "symbolic"

    # trig / geometry indicators (English + French)
    if re.search(r'\b(trig|sin|cos|tan|geometry|triangle|circle)\b', q) or re.search(r'\b(trigonométr|sin|cos|tan|géométrie|triangle|cercle)\b', q):
        if re.search(r'\d', a):
            return "numerical"
        return "symbolic"

    # numeric indicators (English + French) -> numerical
    if re.search(r'\d', q) or re.search(r'find the value|compute|calculate|evaluate', q) or \
       re.search(r'trouver la valeur|calculer|évaluer|evaluer', q):
        return "numerical"

    # If exact_answer looks numeric, prefer numerical (but check for LaTeX)
    if re.search(r'[0-9]|\\frac|\\sqrt', a):
        if re.search(r'\\frac|\\sqrt|\{|\\', a):
            return "symbolic"
        return "numerical"

    # fallback
    return "symbolic"

# -------------------------
# EnhancedAnswerExtractor (adapted to behave like SimplifiedAnswerExtractor)
# and now handles French markers as well
# -------------------------
class EnhancedAnswerExtractor:
    """
    Adapter that implements the simplified extraction behavior (ported from SimplifiedAnswerExtractor),
    extended to recognize French formatting/conclusion words as well.
    Provides:
      - extract_final_answer(text) -> str
      - extract_all_final_answers(generated_solution) -> list
    and internal helpers _clean_answer and _is_valid_answer.
    """

    @staticmethod
    def _clean_answer(answer: str) -> str:
        if not answer:
            return ""
        # Start with a trimmed answer and normalize whitespace
        a = answer.strip()
        # collapse whitespace
        a = re.sub(r'\s+', ' ', a)

        # remove outer $$ if present (multiline)
        a = re.sub(r'^\$\$(.*)\$\$$', r'\1', a, flags=re.DOTALL)
        # remove surrounding single $ if the whole string is wrapped
        a = re.sub(r'^\$(.*)\$$', r'\1', a, flags=re.DOTALL)

        # strip standalone leading/trailing $ characters and spaces
        a = a.strip('$ ')

        # Remove common prefixes (kept after stripping $ to catch cases like "$Final Answer: ...$")
        prefixes_to_remove = [
            # English
            r'Final Answer:\s*', r'Answer:\s*', r'The answer is\s*',
            r'Therefore,?\s*', r'Thus,?\s*', r'Hence,?\s*', r'So,?\s*', r'∴\s*',

            # French
            r'Réponse finale[:\s]*', r'Reponse finale[:\s]*',
            r'Réponse[:\s]*', r'Reponse[:\s]*',
            r'La réponse est\s*', r'La reponse est\s*',
            r'Par conséquent,?\s*', r'Par consequent,?\s*',
            r'Donc,?\s*', r'Ainsi,?\s*', r'Doù\s*', r'Dou\s*',
        ]
        for prefix in prefixes_to_remove:
            a = re.sub(f'^{prefix}', '', a, flags=re.IGNORECASE)

        # remove various boxed wrappers with optional backslashes and optional surrounding $
        # e.g. $$\boxed{...}$$, $\boxed{...}$, \boxed{...}
        a = re.sub(r'\$?\s*(?:\\){0,3}boxed\{([^}]*)\}\s*\$?', r'\1', a, flags=re.DOTALL | re.IGNORECASE)
        # also ensure plain \boxed{...} is unwrapped (redundant but safe)
        a = re.sub(r'\\boxed\{([^}]*)\}', r'\1', a)

        # convert common LaTeX to readable forms
        a = re.sub(r'\\frac\{([^}]*)\}\{([^}]*)\}', r'(\1)/(\2)', a)
        a = re.sub(r'\\sqrt\{([^}]*)\}', r'√(\1)', a)

        # remove bold/italic wrappers
        a = re.sub(r'\*\*([^*]+)\*\*', r'\1', a)
        a = re.sub(r'\*([^*]+)\*', r'\1', a)

        # collapse multiple spaces again (in case replacements introduced them)
        a = re.sub(r'\s+', ' ', a).strip()

        # trim trailing punctuation/words
        a = a.rstrip(' \t\n.,;:')

        # remove trailing words like "proved" or "the answer" (English & French)
        a = re.sub(r'\b(proved|completed|finished|the answer|prouvé|prouve|la réponse)\b[.\s]*$', '', a, flags=re.IGNORECASE).strip()

        return a

    @staticmethod
    def _is_valid_answer(answer: str) -> bool:
        if not answer:
            return False
        # not only punctuation
        if re.match(r'^[\W_]+$', answer):
            return False
        # contains at least some alphanumeric characters (or common math symbols)
        if not re.search(r'[0-9A-Za-zÀ-ÖØ-öø-ÿ\\]', answer):
            return False
        # length sanity
        if len(answer) > 1000:
            return False
        # avoid answers that end with only concluding words (English & French)
        blacklist = [r'therefore$', r'thus$', r'hence$', r'so$', r'we get$', r'we have$', r'donc$', r'ainsi$', r'par conséquent$']
        for b in blacklist:
            if re.search(b, answer.strip(), flags=re.IGNORECASE):
                return False
        return True

    @staticmethod
    def extract_final_answer_simple(text: str) -> str:
        """
        Primary method: Extract final answer using the last lines approach,
        with fallback to pattern-based extraction.
        """
        if not text:
            return ""

        # Clean the text and split into lines
        lines = [line.strip() for line in text.strip().split('\n') if line.strip()]

        # Strategy 1: Try last two lines combined
        if len(lines) >= 2:
            last_two = ' '.join(lines[-2:])
            cleaned = EnhancedAnswerExtractor._clean_answer(last_two)
            if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned

        # Strategy 2: Try last line only
        if lines:
            cleaned = EnhancedAnswerExtractor._clean_answer(lines[-1])
            if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned

        # Strategy 3: Try last 3 lines if we have them (sometimes answers span multiple lines)
        if len(lines) >= 3:
            last_three = ' '.join(lines[-3:])
            cleaned = EnhancedAnswerExtractor._clean_answer(last_three)
            if EnhancedAnswerExtractor._is_valid_answer(cleaned) and len(cleaned) < 500:
                return cleaned

        # Strategy 4: Fallback to pattern-based extraction
        return EnhancedAnswerExtractor._extract_with_patterns(text)

    @staticmethod
    def _extract_with_patterns(text: str) -> str:
        """
        Pattern-based extraction as fallback method.
        Recognizes both English and French answer/conclusion patterns.
        """
        # Check for <final> tags
        raw_matches = re.findall(r'<final>(.*?)</final>', text, re.DOTALL | re.IGNORECASE)
        for m in raw_matches:
            c = EnhancedAnswerExtractor._clean_answer(m)
            if EnhancedAnswerExtractor._is_valid_answer(c):
                return c

        # Try common answer patterns (English + French)
        patterns = [
            r'\*\*Final Answer:\*\*\s*(.+?)(?:\n|$)',
            r'Final Answer:\s*(.+?)(?:\n|$)',
            r'Final Answer\s*[:\-]\s*(.+?)(?:\n|$)',
            r'\*\*Réponse finale:\*\*\s*(.+?)(?:\n|$)',
            r'Réponse finale[:\s]*(.+?)(?:\n|$)',
            r'Reponse finale[:\s]*(.+?)(?:\n|$)',
            r'Reponse[:\s]*(.+?)(?:\n|$)',
            r'Réponse[:\s]*(.+?)(?:\n|$)',
            r'Therefore[,:\s]*(.+?)(?:\.|$|\n)',
            r'Hence[,:\s]*(.+?)(?:\.|$|\n)',
            r'Thus[,:\s]*(.+?)(?:\.|$|\n)',
            r'∴\s*(.+?)(?:\.|$|\n)',
            r'Par conséquent[,:\s]*(.+?)(?:\.|$|\n)',
            r'Par consequent[,:\s]*(.+?)(?:\.|$|\n)',
            r'Donc[,:\s]*(.+?)(?:\.|$|\n)',
            r'Ainsi[,:\s]*(.+?)(?:\.|$|\n)',
            r'Doù[,:\s]*(.+?)(?:\.|$|\n)',
            r'Answer[:\s]*(.+?)(?:\n|$)',
            r'Result[:\s]*(.+?)(?:\n|$)',
            r'Solution[:\s]*(.+?)(?:\n|$)',
        ]

        for pat in patterns:
            matches = re.findall(pat, text, re.MULTILINE | re.DOTALL | re.IGNORECASE)
            if matches:
                answer = matches[-1].strip()
                cleaned = EnhancedAnswerExtractor._clean_answer(answer)
                if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                    return cleaned

        # Try boxed math expressions
        boxed_patterns = [
            r'\$\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$\$',
            r'\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$',
            r'(?:\\){0,3}boxed\{(.+?)\}',
        ]

        for pat in boxed_patterns:
            m = re.search(pat, text, re.DOTALL | re.IGNORECASE)
            if m:
                cand = EnhancedAnswerExtractor._clean_answer(m.group(1))
                if EnhancedAnswerExtractor._is_valid_answer(cand):
                    return cand

        return ""

    @staticmethod
    def extract_all_final_answers(generated_solution: str) -> list:
        """
        Extract multiple final answers using simplified approach.
        Returns a list (possibly empty) of cleaned answers found inside all <final>...</final> tags.
        Falls back to the single simplified extraction if no tags are found.
        """
        if not generated_solution:
            return []

        # Find all <final>...</final> (non-greedy)
        raw_matches = re.findall(r'<final>(.*?)</final>', generated_solution, re.DOTALL | re.IGNORECASE)
        cleaned = []
        for m in raw_matches:
            c = EnhancedAnswerExtractor._clean_answer(m)
            if EnhancedAnswerExtractor._is_valid_answer(c):
                cleaned.append(c)

        if cleaned:
            return cleaned

        # Fallback: try to extract a single final using the simpler logic
        simple_answer = EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)
        if simple_answer:
            return [simple_answer]

        return []

    @staticmethod
    def extract_final_answer(generated_solution: str) -> str:
        """
        Backwards-compatible extractor that delegates to the simplified extraction.
        If multiple <final> tags exist, returns a JSON array string of cleaned answers.
        """
        if not generated_solution:
            return ""

        # Prefer explicit <final> tags (can be multiple)
        raw_matches = re.findall(r'<final>(.*?)</final>', generated_solution, re.DOTALL | re.IGNORECASE)
        if raw_matches:
            cleaned = []
            for m in raw_matches:
                c = EnhancedAnswerExtractor._clean_answer(m)
                if EnhancedAnswerExtractor._is_valid_answer(c):
                    cleaned.append(c)
            if not cleaned:
                # fall through to simpler single extraction
                return EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)
            if len(cleaned) == 1:
                return cleaned[0]
            try:
                return json.dumps(cleaned, ensure_ascii=False)
            except Exception:
                return " ||| ".join(cleaned)

        # No explicit finals: use simplified single extraction
        return EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)

# -------------------------
# Ollama-based Math Solver (ZERO-SHOT) — PROMPTS IN ENGLISH (but problem is French)
# -------------------------
class OllamaZeroShotMathSolver:
    """
    Zero-shot math solver that uses the Ollama daemon via the Python client.
    Prompts/instructions are written in English (as requested), but the solver is informed that
    the problem text will be in French and is instructed to provide a concise final answer in French.
    - Zero-shot prompt: forbids chain-of-thought and requests concise final <final> tag
    - Single-pass only.
    """

    def __init__(self, model_name="gpt-oss:20b"):
        """
        model_name: the Ollama model reference (e.g., "gpt-oss:20b")
        """
        self.model_name = model_name
        self.client = self._load_client()

    def _load_client(self):
        print(f"Initializing Ollama client for model: {self.model_name}")
        client = ollama.Client()
        return client

    def cleanup(self):
        if hasattr(self, 'client'):
            del self.client

    def _get_format_instructions(self, answer_type):
        """
        Zero-shot formatting instructions (WRITTEN IN ENGLISH).
        The model is explicitly informed that the problem text is in French and that the concise final
        answer should be provided in FRENCH. Examples/formats are shown in English but demonstrate
        that the final tag must contain the concise French answer.
        """
        t = (answer_type or "symbolic").strip().lower()
        if t not in _CANONICAL_TYPES:
            t = "symbolic"

        base = (
            "CRITICAL FORMATTING REQUIREMENTS (ZERO-SHOT):\n"
            "- The math problem you will receive is written in FRENCH. Provide your concise FINAL ANSWER in FRENCH.\n"
            "- DO NOT provide chain-of-thought or step-by-step internal reasoning. Do NOT reveal private chain-of-thought.\n"
            "- If a very short justification is necessary, include a single-line 'Explanation:' with at most one sentence (in French).\n"
            "- Always end with a machine-readable final tag <final>...</final> containing ONLY the concise final answer in FRENCH (no extra reasoning inside the tag).\n"
        )

        if t == "proof":
            return base + (
                "FOR PROOFS (zero-shot):\n"
                "- Provide a concise conclusion or a one-sentence proof sketch (in FRENCH) labeled 'Conclusion:' or 'Esquisse de preuve:' if needed. Do NOT provide a full step-by-step proof.\n"
                "Format example (English instructions):\n"
                "Final Answer (in French):\n"
                "[Concise French conclusion or one-sentence sketch]\n\n"
                "<final>[Concise French conclusion]</final>\n"
            )
        elif t == "numerical":
            return base + (
                "FOR NUMERICAL RESULTS:\n"
                "- Provide the numeric result in exact form if available (fractions/radicals). Otherwise provide a decimal rounded to 4 decimal places.\n"
                "Format example (English instructions):\n"
                "Final Answer (in French):\n"
                "[Numeric result in French notation if needed]\n\n"
                "<final>[Numeric result]</final>\n"
            )
        else:  # symbolic
            return base + (
                "FOR SYMBOLIC RESULTS:\n"
                "- Provide the final symbolic expression (LaTeX allowed) in a concise form. The expression may be LaTeX but the surrounding explanation should be in French if any.\n"
                "Format example (English instructions):\n"
                "Final Answer (in French):\n"
                "[Final symbolic expression]\n\n"
                "<final>[LaTeX expression or concise symbolic expression — in French if words are used]</final>\n"
            )

    def _create_prompt(self, question, answer_type="symbolic"):
        format_instructions = self._get_format_instructions(answer_type)

        # The main instruction is still in English (per your requirement), but it states the problem language and the required answer language.
        prompt = f"""You are an expert mathematician. The following math problem is written in FRENCH.
Provide a concise final answer in FRENCH. DO NOT produce chain-of-thought or step-by-step internal reasoning.
If you include a justification, it must be one short sentence and labeled 'Explication:' (in French).

MATH PROBLEM (in French):
{question}

{format_instructions}

Begin your concise answer (in French) now:
"""
        return prompt

    def _generate_once(self, prompt_text: str, enable_thinking: bool = False) -> str:
        """
        Call ollama.Client.chat WITHOUT temperature/max_tokens.
        Handles several possible response shapes.
        """
        messages = [{"role": "user", "content": prompt_text}]

        # Call Ollama client.chat without temperature/max_tokens
        try:
            resp = self.client.chat(model=self.model_name, messages=messages, think=enable_thinking)
        except TypeError:
            # Some client versions have different signatures
            try:
                resp = self.client.chat(self.model_name, messages=messages, think=enable_thinking)
            except Exception:
                resp = self.client.chat(self.model_name, messages)

        # Normalize response into a string
        full_output = ""
        if isinstance(resp, dict):
            if 'message' in resp and isinstance(resp['message'], dict) and 'content' in resp['message']:
                full_output = resp['message']['content']
            elif 'choices' in resp and isinstance(resp['choices'], (list, tuple)) and resp['choices']:
                choice = resp['choices'][0]
                if isinstance(choice, dict) and 'message' in choice and isinstance(choice['message'], dict):
                    full_output = choice['message'].get('content', '')
                else:
                    full_output = str(choice)
            else:
                full_output = str(resp)
        else:
            # resp might be an object with .message.content
            try:
                full_output = resp.message.content
            except Exception:
                full_output = str(resp)

        if isinstance(full_output, bytes):
            full_output = full_output.decode('utf-8', errors='ignore')
        return (full_output or "").strip()

    def solve_problem(self, question, answer_type="symbolic"):
        """
        Zero-shot single-pass solve via Ollama.
        """
        prompt = self._create_prompt(question, answer_type)
        full_output = self._generate_once(prompt, enable_thinking=False)

        # No thinking parsing for zero-shot mode
        thinking_content = ""
        generated_answer = full_output
        final_tag_output = ""  # no second pass

        extracted_final_answer = EnhancedAnswerExtractor.extract_final_answer(generated_answer)

        return {
            "thinking_content": thinking_content,
            "generated_answer": generated_answer,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer
        }

# -------------------------
# Dataset Processor
# -------------------------
class DatasetProcessor:
    def __init__(self, solver: OllamaZeroShotMathSolver, failed_folder=None):
        self.solver = solver
        self.extractor = EnhancedAnswerExtractor()
        self.failed_folder = failed_folder or "failed_extractions"
        os.makedirs(self.failed_folder, exist_ok=True)

    def process_dataset(self, dataset_path, output_base_path, start_idx=0, end_idx=None,
                        folder_name=None, create_timestamped_folder=True):
        dataset = self._load_dataset(dataset_path)
        if end_idx is None:
            end_idx = len(dataset)

        output_folder = self._create_output_folder(output_base_path, folder_name, start_idx, end_idx, create_timestamped_folder)
        results = []

        print(f"Processing problems {start_idx} to {end_idx-1} ({end_idx-start_idx} total)")
        print(f"Output will be saved in: {output_folder}")

        for idx in tqdm(range(start_idx, min(end_idx, len(dataset)))):
            problem = dataset[idx]
            try:
                result_entry = self._process_single_problem(idx, problem)
                results.append(result_entry)
                self._print_progress(idx, result_entry)
                if (idx - start_idx + 1) % 10 == 0:
                    self._save_intermediate_results(results, output_folder, idx - start_idx + 1)
            except Exception as e:
                print(f"Error processing problem {idx+1}: {str(e)}")
                error_entry = self._create_error_entry(idx, problem, str(e))
                results.append(error_entry)

        final_output_path = self._save_final_results(results, output_folder, start_idx, end_idx)
        self._create_summary_file(results, output_folder, dataset_path, start_idx, end_idx)
        return results, output_folder

    def _create_output_folder(self, base_path, folder_name, start_idx, end_idx, add_timestamp):
        if folder_name is None:
            folder_name = f"results_{start_idx}_to_{end_idx-1}"
        if add_timestamp:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            folder_name = f"{folder_name}_{timestamp}"
        output_folder = os.path.join(base_path, folder_name)
        os.makedirs(output_folder, exist_ok=True)
        return output_folder

    def _load_dataset(self, dataset_path):
        dataset = []
        with open(dataset_path, 'r', encoding='utf-8') as f:
            for line in f:
                if line.strip():
                    dataset.append(json.loads(line))
        return dataset

    def _process_single_problem(self, idx, problem):
        language = problem.get("Language", "")
        chapter_num = problem.get("Chapter Number", "")
        example_num = problem.get("Example Number", "")
        question = problem.get("Question", "")
        exact_answer = problem.get("Exact Answer", "")
        raw_answer_type = problem.get("Answer Type", "") or ""

        # Normalize/infer canonical answer type: 'symbolic', 'numerical', 'proof'
        canonical_type = normalize_answer_type(raw_answer_type, question_text=question, exact_answer=exact_answer)

        # If exact_answer strongly indicates numeric, prefer numerical
        if exact_answer and re.search(r'\d', str(exact_answer)):
            # if exact contains LaTeX expressions like \frac or \sqrt, keep symbolic
            if re.search(r'\\frac|\\sqrt|\\boxed', str(exact_answer)):
                pass
            else:
                canonical_type = "numerical"

        print(f"\nProcessing Problem {idx+1}: Chapter {chapter_num}, Example {example_num}")
        print(f"Raw Answer Type: '{raw_answer_type}'  --> canonical: '{canonical_type}'")

        # Generate solution (use canonical_type) -- zero-shot, single pass
        solution_result = self.solver.solve_problem(question, answer_type=canonical_type)
        generated_answer = solution_result.get('generated_answer', '')
        thinking_content = solution_result.get('thinking_content', '')  # will be empty
        final_tag_output = solution_result.get('final_tag_output', '')

        # --- NEW extraction logic: keep both forms (single string & list) ---
        # Try to get all <final> answers first (preferred)
        all_finals = EnhancedAnswerExtractor.extract_all_final_answers(generated_answer)
        extracted_final_answer = ""
        extracted_final_answers = []

        # If none found in generated_answer, try final_tag_output (unused here)
        if not all_finals and final_tag_output:
            all_finals = EnhancedAnswerExtractor.extract_all_final_answers(final_tag_output)

        # If still none, fall back to single-answer extractor
        if not all_finals:
            single = EnhancedAnswerExtractor.extract_final_answer(generated_answer)
            if single:
                extracted_final_answer = single
                extracted_final_answers = [single]
            else:
                # try whole combined text (thinking + generated + final_tag)
                combined = "\n".join([thinking_content or "", generated_answer or "", final_tag_output or ""])
                single = EnhancedAnswerExtractor.extract_final_answer(combined)
                if single:
                    extracted_final_answer = single
                    extracted_final_answers = [single]
                else:
                    extracted_final_answer = ""
                    extracted_final_answers = []
        else:
            # we have one or more finals
            extracted_final_answers = all_finals
            if len(all_finals) == 1:
                extracted_final_answer = all_finals[0]
            else:
                # store a machine-readable concatenation: JSON array string
                try:
                    extracted_final_answer = json.dumps(all_finals, ensure_ascii=False)
                except Exception:
                    extracted_final_answer = " ||| ".join(all_finals)

        # If still empty, save a failed extraction example for inspection
        if not extracted_final_answer:
            fname = f"failed_{idx}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
            fpath = os.path.join(self.failed_folder, fname)
            with open(fpath, 'w', encoding='utf-8') as f:
                json.dump({
                    "index": idx,
                    "question": question,
                    "generated_answer": generated_answer,
                    "thinking_content": thinking_content,
                    "final_tag_output": final_tag_output,
                    "exact_answer": exact_answer,
                    "canonical_type": canonical_type,
                    "extracted_final_answer": extracted_final_answer,
                    "extracted_final_answers": extracted_final_answers
                }, f, ensure_ascii=False, indent=2)
            print(f"Saved failed extraction example to {fpath}")

        result_entry = {
            "problem_index": idx,
            "language": language,
            "chapter_number": chapter_num,
            "example_number": example_num,
            "question": question,
            "generated_answer": generated_answer,
            "thinking_content": thinking_content,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer,       # string (or JSON array string)
            "extracted_final_answers": extracted_final_answers,     # list (empty / single / many)
            "exact_answer": exact_answer,
            "raw_answer_type": raw_answer_type,
            "canonical_answer_type": canonical_type,
            "evaluation_method": problem.get("Evaluation Method", "")
        }
        return result_entry

    def _create_error_entry(self, idx, problem, error_msg):
        return {
            "problem_index": idx,
            "language": problem.get("Language", ""),
            "chapter_number": problem.get("Chapter Number", ""),
            "example_number": problem.get("Example Number", ""),
            "question": problem.get("Question", ""),
            "generated_answer": f"ERROR: {error_msg}",
            "thinking_content": "",
            "final_tag_output": "",
            "extracted_final_answer": "",
            "extracted_final_answers": [],
            "exact_answer": problem.get("Exact Answer", ""),
            "raw_answer_type": problem.get("Answer Type", ""),
            "canonical_answer_type": "",
            "evaluation_method": problem.get("Evaluation Method", "")
        }

    def _print_progress(self, idx, result_entry):
        print(f"Generated answer length: {len(result_entry['generated_answer']) if result_entry['generated_answer'] else 0}")
        print(f"Extracted final answer: '{result_entry['extracted_final_answer']}'")
        print(f"Extracted final answers (list): {result_entry.get('extracted_final_answers', [])}")
        print(f"Expected answer: '{result_entry['exact_answer']}'")

    def _save_intermediate_results(self, results, output_folder, count):
        temp_filename = f'intermediate_results_{count}.json'
        temp_output_path = os.path.join(output_folder, temp_filename)
        with open(temp_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"Saved intermediate results to {temp_output_path}")

    def _save_final_results(self, results, output_folder, start_idx, end_idx):
        final_filename = f'final_results_{start_idx}_to_{end_idx-1}.json'
        final_output_path = os.path.join(output_folder, final_filename)
        with open(final_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"\nProcessing complete. Results saved to {final_output_path}")
        print(f"Total problems processed: {len(results)}")
        return final_output_path

    def _create_summary_file(self, results, output_folder, dataset_path, start_idx, end_idx):
        successful_extractions = len([r for r in results if r.get('extracted_final_answer', '').strip()])
        summary_data = {
            "processing_info": {
                "dataset_path": dataset_path,
                "start_index": start_idx,
                "end_index": end_idx - 1,
                "total_processed": len(results),
                "processing_timestamp": datetime.now().isoformat(),
                "output_folder": output_folder
            },
            "statistics": {
                "successful_problems": len([r for r in results if not r['generated_answer'].startswith('ERROR:')]),
                "failed_problems": len([r for r in results if r['generated_answer'].startswith('ERROR:')]),
                "successful_extractions": successful_extractions,
                "extraction_success_rate": f"{(successful_extractions/len(results)*100):.1f}%" if results else "0%",
                "average_answer_length": sum(len(r['generated_answer']) for r in results) / len(results) if results else 0,
                "chapters_processed": list(set(r['chapter_number'] for r in results if r['chapter_number'])),
                "raw_answer_types": list(set(r['raw_answer_type'] for r in results if r.get('raw_answer_type'))),
                "canonical_answer_types": list(set(r['canonical_answer_type'] for r in results if r.get('canonical_answer_type')))
            }
        }
        summary_path = os.path.join(output_folder, 'processing_summary.json')
        with open(summary_path, 'w', encoding='utf-8') as f:
            json.dump(summary_data, f, ensure_ascii=False, indent=2)
        print(f"Processing summary saved to {summary_path}")
        print(f"Answer extraction success rate: {summary_data['statistics']['extraction_success_rate']}")

# -------------------------
# Main (example usage)
# -------------------------
def main():
    # NOTE: update dataset_path and output_base_path to match your environment.
    # Ensure your dataset's "Question" fields are in French.
    dataset_path = "/content/French_150_Corpus.jsonl"
    output_base_path = "/content/"

    # Use the Ollama zero-shot solver (ensure the specified model is available in Ollama)
    solver = OllamaZeroShotMathSolver(model_name="gpt-oss:20b")
    processor = DatasetProcessor(solver, failed_folder=os.path.join(output_base_path, "failed_extractions"))

    # For quick testing, process only first few problems
    results, out_folder = processor.process_dataset(
        dataset_path,
        output_base_path,
        start_idx=0,
        end_idx=150  # smaller quick test
    )
    print("Done. Results saved to:", out_folder)

if __name__ == "__main__":
    main()

# **Kazakh**

In [None]:
import json
import os
import re
from datetime import datetime
from tqdm import tqdm

# Ollama client
try:
    import ollama
except Exception as e:
    raise ImportError("The 'ollama' package is required. Install it and make sure the Ollama daemon is running.") from e

# -------------------------
# Answer type normalization
# -------------------------
# Canonical set
_CANONICAL_TYPES = {"symbolic", "numerical", "proof"}

# mapping common noisy labels to canonical (English + Kazakh variants)
_ANSWER_TYPE_MAP_SIMPLE = {
    # Proof variants (English -> Kazakh)
    "proof": "proof", "prove": "proof", "proove": "proof",
    "дәлел": "proof", "дәлелдеу": "proof", "дәлелдену": "proof", "көрсету": "proof",

    # Numerical variants
    "numerical": "numerical", "numeric": "numerical", "number": "numerical", "calculation": "numerical",
    "сандық": "numerical", "сан": "numerical", "есептеу": "numerical", "мәнін": "numerical", "таб": "numerical",

    # Symbolic variants
    "symbolic": "symbolic", "symbol": "symbolic", "equation": "symbolic", "algebraic": "symbolic",
    "теңдеу": "symbolic", "символдық": "symbolic", "алгебралық": "symbolic", "теңдеулер": "symbolic",
}


def normalize_answer_type(raw_label: str, question_text: str = "", exact_answer: str = "") -> str:
    """
    Normalize a dataset label to one of: 'symbolic', 'numerical', 'proof'.
    Heuristics expanded to handle Kazakh phrases and labels (plus English).
    """
    # Helper to clean label
    def _clean_label(lbl: str) -> str:
        if not lbl:
            return ""
        s = lbl.strip().lower()
        # keep unicode letters (Cyrillic, Latin accents) and digits and spaces
        s = re.sub(r'[^0-9\w\s\u0400-\u04FF\u0500-\u052F-]', ' ', s, flags=re.UNICODE)
        s = re.sub(r'\s+', ' ', s).strip()
        return s

    s = _clean_label(raw_label)

    # direct mapping
    if s in _ANSWER_TYPE_MAP_SIMPLE:
        return _ANSWER_TYPE_MAP_SIMPLE[s]

    # partial matches (allow English or Kazakh keywords appearing)
    for k, v in _ANSWER_TYPE_MAP_SIMPLE.items():
        if k in s:
            return v

    # heuristics using question or exact_answer (both English & Kazakh checks)
    q = (question_text or "").lower()
    a = (exact_answer or "").lower()

    # Proof indicators (English + Kazakh)
    if re.search(r'\b(prove|show that|prove that|proof)\b', q) or re.search(r'\b(дәлелдеу|дәлел|көрсету|көрсетіңіз|дәлелдеңіз)\b', q, flags=re.IGNORECASE):
        return "proof"

    # logarithm related: English/Kazakh
    if re.search(r'\b(log|ln|logarithm)\b', q) or re.search(r'\b(log|ln|логарифм)\b', q) or re.search(r'\blog\b', a):
        # prefer numerical if exact answer contains digits
        if re.search(r'\d', a):
            return "numerical"
        return "symbolic"

    # set theory indicators: English / Kazakh (use 'жиын' = set)
    if re.search(r'\b(set|subset|union|intersection)\b', q) or re.search(r'\b(жиын|асты жиын|қосынды|қиылысу|қисық)\b', q, flags=re.IGNORECASE):
        return "symbolic"

    # units (English + Kazakh)
    if re.search(r'\bmeter\b|\bm\b|\bcm\b|\bkg\b|\bliter\b|\bl\b|\bkm\b|\bmile\b', q + " " + a) or \
       re.search(r'\bметр\b|\bсм\b|\bкм\b|\bкг\b|\bлитр\b', q + " " + a, flags=re.IGNORECASE):
        return "numerical"

    # equation-solving heuristics (English + Kazakh)
    if re.search(r'\bequation\b|solve for|solve|= x|x\s*=', q) or re.search(r'\bтеңдеу\b|шешу|шешіңдер|шешіңіз|шешу\b', q, flags=re.IGNORECASE):
        return "symbolic"

    # trig / geometry indicators (English + Kazakh)
    if re.search(r'\b(trig|sin|cos|tan|geometry|triangle|circle)\b', q) or re.search(r'\b(триг|sin|cos|tan|геометрия|треугольник|шеңбер|үшбұрыш)\b', q, flags=re.IGNORECASE):
        if re.search(r'\d', a):
            return "numerical"
        return "symbolic"

    # numeric indicators (English + Kazakh) -> numerical
    if re.search(r'\d', q) or re.search(r'find the value|compute|calculate|evaluate', q) or \
       re.search(r'мәнін таб|таб(ыңыз|ыңызшы)|есептеу|шығар', q, flags=re.IGNORECASE):
        return "numerical"

    # If exact_answer looks numeric, prefer numerical (but check for LaTeX)
    if re.search(r'[0-9]|\\frac|\\sqrt', a):
        if re.search(r'\\frac|\\sqrt|\{|\\', a):
            return "symbolic"
        return "numerical"

    # fallback
    return "symbolic"

# -------------------------
# EnhancedAnswerExtractor (adapted to behave like SimplifiedAnswerExtractor)
# and now handles Kazakh markers as well
# -------------------------
class EnhancedAnswerExtractor:
    """
    Adapter that implements a simplified extraction behavior (from SimplifiedAnswerExtractor),
    extended to recognize Kazakh formatting/conclusion words as well as English.
    Provides:
      - extract_final_answer(text) -> str
      - extract_all_final_answers(generated_solution) -> list
    and internal helpers _clean_answer and _is_valid_answer.
    """

    @staticmethod
    def _clean_answer(answer: str) -> str:
        if not answer:
            return ""
        # Start with a trimmed answer and normalize whitespace
        a = answer.strip()
        # collapse whitespace
        a = re.sub(r'\s+', ' ', a)

        # remove outer $$ if present (multiline)
        a = re.sub(r'^\$\$(.*)\$\$$', r'\1', a, flags=re.DOTALL)
        # remove surrounding single $ if the whole string is wrapped
        a = re.sub(r'^\$(.*)\$$', r'\1', a, flags=re.DOTALL)

        # strip standalone leading/trailing $ characters and spaces
        a = a.strip('$ ')

        # Remove common prefixes (kept after stripping $ to catch cases like "$Final Answer: ...$")
        prefixes_to_remove = [
            # English
            r'Final Answer:\s*', r'Answer:\s*', r'The answer is\s*',
            r'Therefore,?\s*', r'Thus,?\s*', r'Hence,?\s*', r'So,?\s*', r'∴\s*',

            # Kazakh (Cyrillic common markers)
            r'Жауап[:\s]*', r'Соңғы жауап[:\s]*', r'Қорытынды[:\s]*', r'Шешімі[:\s]*',
            r'Нәтиже[:\s]*', r'Сондықтан,?\s*', r'Демек,?\s*', r'Осындайша,?\s*', r'Осыдан,?\s*',
        ]
        for prefix in prefixes_to_remove:
            a = re.sub(f'^{prefix}', '', a, flags=re.IGNORECASE)

        # remove various boxed wrappers with optional backslashes and optional surrounding $
        # e.g. $$\boxed{...}$$, $\boxed{...}$, \boxed{...}
        a = re.sub(r'\$?\s*(?:\\){0,3}boxed\{([^}]*)\}\s*\$?', r'\1', a, flags=re.DOTALL | re.IGNORECASE)
        # also ensure plain \boxed{...} is unwrapped (redundant but safe)
        a = re.sub(r'\\boxed\{([^}]*)\}', r'\1', a)

        # convert common LaTeX to readable forms
        a = re.sub(r'\\frac\{([^}]*)\}\{([^}]*)\}', r'(\1)/(\2)', a)
        a = re.sub(r'\\sqrt\{([^}]*)\}', r'√(\1)', a)

        # remove bold/italic wrappers
        a = re.sub(r'\*\*([^*]+)\*\*', r'\1', a)
        a = re.sub(r'\*([^*]+)\*', r'\1', a)

        # collapse multiple spaces again (in case replacements introduced them)
        a = re.sub(r'\s+', ' ', a).strip()

        # trim trailing punctuation/words
        a = a.rstrip(' \t\n.,;:')

        # remove trailing words like "proved" or "the answer" (English & Kazakh)
        a = re.sub(r'\b(proved|completed|finished|the answer|prouvé|prouve|la réponse|дәлелденді|дәлел|жауап)\b[.\s]*$', '', a, flags=re.IGNORECASE).strip()

        return a

    @staticmethod
    def _is_valid_answer(answer: str) -> bool:
        if not answer:
            return False
        # not only punctuation
        if re.match(r'^[\W_]+$', answer):
            return False
        # contains at least some alphanumeric characters (or common math symbols)
        if not re.search(r'[0-9A-Za-z\u0400-\u04FF\\]', answer):
            return False
        # length sanity
        if len(answer) > 1000:
            return False
        # avoid answers that end with only concluding words (English & Kazakh)
        blacklist = [r'therefore$', r'thus$', r'hence$', r'so$', r'we get$', r'we have$', r'сондықтан$', r'демек$', r'осылайша$']
        for b in blacklist:
            if re.search(b, answer.strip(), flags=re.IGNORECASE):
                return False
        return True

    @staticmethod
    def extract_final_answer_simple(text: str) -> str:
        """
        Primary method: Extract final answer using the last lines approach,
        with fallback to pattern-based extraction.
        """
        if not text:
            return ""

        # Clean the text and split into lines
        lines = [line.strip() for line in text.strip().split('\n') if line.strip()]

        # Strategy 1: Try last two lines combined
        if len(lines) >= 2:
            last_two = ' '.join(lines[-2:])
            cleaned = EnhancedAnswerExtractor._clean_answer(last_two)
            if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned

        # Strategy 2: Try last line only
        if lines:
            cleaned = EnhancedAnswerExtractor._clean_answer(lines[-1])
            if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned

        # Strategy 3: Try last 3 lines if we have them (sometimes answers span multiple lines)
        if len(lines) >= 3:
            last_three = ' '.join(lines[-3:])
            cleaned = EnhancedAnswerExtractor._clean_answer(last_three)
            if EnhancedAnswerExtractor._is_valid_answer(cleaned) and len(cleaned) < 500:
                return cleaned

        # Strategy 4: Fallback to pattern-based extraction
        return EnhancedAnswerExtractor._extract_with_patterns(text)

    @staticmethod
    def _extract_with_patterns(text: str) -> str:
        """
        Pattern-based extraction as fallback method.
        Recognizes English and Kazakh answer/conclusion patterns.
        """
        # Check for <final> tags
        raw_matches = re.findall(r'<final>(.*?)</final>', text, re.DOTALL | re.IGNORECASE)
        for m in raw_matches:
            c = EnhancedAnswerExtractor._clean_answer(m)
            if EnhancedAnswerExtractor._is_valid_answer(c):
                return c

        # Try common answer patterns (English + Kazakh)
        patterns = [
            r'\*\*Final Answer:\*\*\s*(.+?)(?:\n|$)',
            r'Final Answer:\s*(.+?)(?:\n|$)',
            r'Final Answer\s*[:\-]\s*(.+?)(?:\n|$)',

            # Kazakh patterns
            r'\*\*Соңғы жауап:\*\*\s*(.+?)(?:\n|$)',
            r'Соңғы жауап[:\s]*(.+?)(?:\n|$)',
            r'Жауап[:\s]*(.+?)(?:\n|$)',
            r'Қорытынды[:\s]*(.+?)(?:\n|$)',
            r'Нәтиже[:\s]*(.+?)(?:\n|$)',

            # English conclusion markers
            r'Therefore[,:\s]*(.+?)(?:\.|$|\n)',
            r'Hence[,:\s]*(.+?)(?:\.|$|\n)',
            r'Thus[,:\s]*(.+?)(?:\.|$|\n)',
            r'∴\s*(.+?)(?:\.|$|\n)',

            # Kazakh conclusion markers
            r'Сондықтан[,:\s]*(.+?)(?:\.|$|\n)',
            r'Демек[,:\s]*(.+?)(?:\.|$|\n)',
            r'Осыдан[,:\s]*(.+?)(?:\.|$|\n)',
            r'Осындайша[,:\s]*(.+?)(?:\.|$|\n)',

            r'Answer[:\s]*(.+?)(?:\n|$)',
            r'Result[:\s]*(.+?)(?:\n|$)',
            r'Solution[:\s]*(.+?)(?:\n|$)',
        ]

        for pat in patterns:
            matches = re.findall(pat, text, re.MULTILINE | re.DOTALL | re.IGNORECASE)
            if matches:
                answer = matches[-1].strip()
                cleaned = EnhancedAnswerExtractor._clean_answer(answer)
                if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                    return cleaned

        # Try boxed math expressions (LaTeX boxed)
        boxed_patterns = [
            r'\$\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$\$',
            r'\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$',
            r'(?:\\){0,3}boxed\{(.+?)\}',
        ]

        for pat in boxed_patterns:
            m = re.search(pat, text, re.DOTALL | re.IGNORECASE)
            if m:
                cand = EnhancedAnswerExtractor._clean_answer(m.group(1))
                if EnhancedAnswerExtractor._is_valid_answer(cand):
                    return cand

        return ""

    @staticmethod
    def extract_all_final_answers(generated_solution: str) -> list:
        """
        Extract multiple final answers using simplified approach.
        Returns a list (possibly empty) of cleaned answers found inside all <final>...</final> tags.
        Falls back to the single simplified extraction if no tags are found.
        """
        if not generated_solution:
            return []

        # Find all <final>...</final> (non-greedy)
        raw_matches = re.findall(r'<final>(.*?)</final>', generated_solution, re.DOTALL | re.IGNORECASE)
        cleaned = []
        for m in raw_matches:
            c = EnhancedAnswerExtractor._clean_answer(m)
            if EnhancedAnswerExtractor._is_valid_answer(c):
                cleaned.append(c)

        if cleaned:
            return cleaned

        # Fallback: try to extract a single final using the simpler logic
        simple_answer = EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)
        if simple_answer:
            return [simple_answer]

        return []

    @staticmethod
    def extract_final_answer(generated_solution: str) -> str:
        """
        Backwards-compatible extractor that delegates to the simplified extraction.
        If multiple <final> tags exist, returns a JSON array string of cleaned answers.
        """
        if not generated_solution:
            return ""

        # Prefer explicit <final> tags (can be multiple)
        raw_matches = re.findall(r'<final>(.*?)</final>', generated_solution, re.DOTALL | re.IGNORECASE)
        if raw_matches:
            cleaned = []
            for m in raw_matches:
                c = EnhancedAnswerExtractor._clean_answer(m)
                if EnhancedAnswerExtractor._is_valid_answer(c):
                    cleaned.append(c)
            if not cleaned:
                # fall through to simpler single extraction
                return EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)
            if len(cleaned) == 1:
                return cleaned[0]
            try:
                return json.dumps(cleaned, ensure_ascii=False)
            except Exception:
                return " ||| ".join(cleaned)

        # No explicit finals: use simplified single extraction
        return EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)

# -------------------------
# Ollama-based Math Solver (ZERO-SHOT) — PROMPTS IN ENGLISH (problem is Kazakh)
# -------------------------
class OllamaZeroShotMathSolver:
    """
    Zero-shot math solver that uses the Ollama daemon via the Python client.
    Prompts/instructions are written in English (as requested), but the solver is informed that
    the problem text will be in KAZAKH and is instructed to provide a concise final answer in KAZAKH.
    - Zero-shot prompt: forbids chain-of-thought and requests concise final <final> tag
    - Single-pass only.
    """

    def __init__(self, model_name="qwen3:8b"):
        """
        model_name: the Ollama model reference (e.g., "qwen3:8b")
        """
        self.model_name = model_name
        self.client = self._load_client()

    def _load_client(self):
        print(f"Initializing Ollama client for model: {self.model_name}")
        client = ollama.Client()
        return client

    def cleanup(self):
        if hasattr(self, 'client'):
            del self.client

    def _get_format_instructions(self, answer_type):
        """
        Zero-shot formatting instructions (WRITTEN IN ENGLISH).
        The model is explicitly informed that the problem text is in KAZAKH and that the concise final
        answer should be provided in KAZAKH. Examples/formats are shown in English but demonstrate
        that the final tag must contain the concise Kazakh answer.
        """
        t = (answer_type or "symbolic").strip().lower()
        if t not in _CANONICAL_TYPES:
            t = "symbolic"

        base = (
            "CRITICAL FORMATTING REQUIREMENTS (ZERO-SHOT):\n"
            "- The math problem you will receive is written in KAZAKH. Provide your concise FINAL ANSWER in KAZAKH.\n"
            "- DO NOT provide chain-of-thought or step-by-step internal reasoning. Do NOT reveal private chain-of-thought.\n"
            "- If a very short justification is necessary, include a single-line 'Explanation:' with at most one sentence (in Kazakh).\n"
            "- Always end with a machine-readable final tag <final>...</final> containing ONLY the concise final answer in KAZAKH (no extra reasoning inside the tag).\n"
        )

        if t == "proof":
            return base + (
                "FOR PROOFS (zero-shot):\n"
                "- Provide a concise conclusion or a one-sentence proof sketch (in KAZAKH) labeled 'Қорытынды:' or 'Esquisse de preuve:' if needed. Do NOT provide a full step-by-step proof.\n"
                "Format example (English instructions):\n"
                "Final Answer (in Kazakh):\n"
                "[Concise Kazakh conclusion or one-sentence sketch]\n\n"
                "<final>[Concise Kazakh conclusion]</final>\n"
            )
        elif t == "numerical":
            return base + (
                "FOR NUMERICAL RESULTS:\n"
                "- Provide the numeric result in exact form if available (fractions/radicals). Otherwise provide a decimal rounded to 4 decimal places.\n"
                "Format example (English instructions):\n"
                "Final Answer (in Kazakh):\n"
                "[Numeric result]\n\n"
                "<final>[Numeric result]</final>\n"
            )
        else:  # symbolic
            return base + (
                "FOR SYMBOLIC RESULTS:\n"
                "- Provide the final symbolic expression (LaTeX allowed) in a concise form. The expression may be LaTeX but any wording should be in Kazakh if used.\n"
                "Format example (English instructions):\n"
                "Final Answer (in Kazakh):\n"
                "[Final symbolic expression]\n\n"
                "<final>[LaTeX expression or concise symbolic expression — in Kazakh if words are used]</final>\n"
            )

    def _create_prompt(self, question, answer_type="symbolic"):
        format_instructions = self._get_format_instructions(answer_type)

        # The main instruction is in English (per your requirement), but it states the problem language and required answer language.
        prompt = f"""You are an expert mathematician. The following math problem is written in KAZAKH.
Provide a concise final answer in KAZAKH. DO NOT produce chain-of-thought or step-by-step internal reasoning.
If you include a justification, it must be one short sentence and labeled 'Explanation:' (in Kazakh).

MATH PROBLEM (in Kazakh):
{question}

{format_instructions}

Begin your concise answer (in Kazakh) now:
"""
        return prompt

    def _generate_once(self, prompt_text: str, enable_thinking: bool = False) -> str:
        """
        Call ollama.Client.chat WITHOUT temperature/max_tokens.
        Handles several possible response shapes.
        """
        messages = [{"role": "user", "content": prompt_text}]

        # Call Ollama client.chat without temperature/max_tokens
        try:
            resp = self.client.chat(model=self.model_name, messages=messages, think=enable_thinking)
        except TypeError:
            # Some client versions have different signatures
            try:
                resp = self.client.chat(self.model_name, messages=messages, think=enable_thinking)
            except Exception:
                resp = self.client.chat(self.model_name, messages)

        # Normalize response into a string
        full_output = ""
        if isinstance(resp, dict):
            if 'message' in resp and isinstance(resp['message'], dict) and 'content' in resp['message']:
                full_output = resp['message']['content']
            elif 'choices' in resp and isinstance(resp['choices'], (list, tuple)) and resp['choices']:
                choice = resp['choices'][0]
                if isinstance(choice, dict) and 'message' in choice and isinstance(choice['message'], dict):
                    full_output = choice['message'].get('content', '')
                else:
                    full_output = str(choice)
            else:
                full_output = str(resp)
        else:
            # resp might be an object with .message.content
            try:
                full_output = resp.message.content
            except Exception:
                full_output = str(resp)

        if isinstance(full_output, bytes):
            full_output = full_output.decode('utf-8', errors='ignore')
        return (full_output or "").strip()

    def solve_problem(self, question, answer_type="symbolic"):
        """
        Zero-shot single-pass solve via Ollama.
        """
        prompt = self._create_prompt(question, answer_type)
        full_output = self._generate_once(prompt, enable_thinking=False)

        # No thinking parsing for zero-shot mode
        thinking_content = ""
        generated_answer = full_output
        final_tag_output = ""  # no second pass

        extracted_final_answer = EnhancedAnswerExtractor.extract_final_answer(generated_answer)

        return {
            "thinking_content": thinking_content,
            "generated_answer": generated_answer,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer
        }

# -------------------------
# Dataset Processor
# -------------------------
class DatasetProcessor:
    def __init__(self, solver: OllamaZeroShotMathSolver, failed_folder=None):
        self.solver = solver
        self.extractor = EnhancedAnswerExtractor()
        self.failed_folder = failed_folder or "failed_extractions"
        os.makedirs(self.failed_folder, exist_ok=True)

    def process_dataset(self, dataset_path, output_base_path, start_idx=0, end_idx=None,
                        folder_name=None, create_timestamped_folder=True):
        dataset = self._load_dataset(dataset_path)
        if end_idx is None:
            end_idx = len(dataset)

        output_folder = self._create_output_folder(output_base_path, folder_name, start_idx, end_idx, create_timestamped_folder)
        results = []

        print(f"Processing problems {start_idx} to {end_idx-1} ({end_idx-start_idx} total)")
        print(f"Output will be saved in: {output_folder}")

        for idx in tqdm(range(start_idx, min(end_idx, len(dataset)))):
            problem = dataset[idx]
            try:
                result_entry = self._process_single_problem(idx, problem)
                results.append(result_entry)
                self._print_progress(idx, result_entry)
                if (idx - start_idx + 1) % 10 == 0:
                    self._save_intermediate_results(results, output_folder, idx - start_idx + 1)
            except Exception as e:
                print(f"Error processing problem {idx+1}: {str(e)}")
                error_entry = self._create_error_entry(idx, problem, str(e))
                results.append(error_entry)

        final_output_path = self._save_final_results(results, output_folder, start_idx, end_idx)
        self._create_summary_file(results, output_folder, dataset_path, start_idx, end_idx)
        return results, output_folder

    def _create_output_folder(self, base_path, folder_name, start_idx, end_idx, add_timestamp):
        if folder_name is None:
            folder_name = f"results_{start_idx}_to_{end_idx-1}"
        if add_timestamp:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            folder_name = f"{folder_name}_{timestamp}"
        output_folder = os.path.join(base_path, folder_name)
        os.makedirs(output_folder, exist_ok=True)
        return output_folder

    def _load_dataset(self, dataset_path):
        dataset = []
        with open(dataset_path, 'r', encoding='utf-8') as f:
            for line in f:
                if line.strip():
                    dataset.append(json.loads(line))
        return dataset

    def _process_single_problem(self, idx, problem):
        language = problem.get("Language", "")
        chapter_num = problem.get("Chapter Number", "")
        example_num = problem.get("Example Number", "")
        question = problem.get("Question", "")
        exact_answer = problem.get("Exact Answer", "")
        raw_answer_type = problem.get("Answer Type", "") or ""

        # Normalize/infer canonical answer type: 'symbolic', 'numerical', 'proof'
        canonical_type = normalize_answer_type(raw_answer_type, question_text=question, exact_answer=exact_answer)

        # If exact_answer strongly indicates numeric, prefer numerical
        if exact_answer and re.search(r'\d', str(exact_answer)):
            # if exact contains LaTeX expressions like \frac or \sqrt, keep symbolic
            if re.search(r'\\frac|\\sqrt|\\boxed', str(exact_answer)):
                pass
            else:
                canonical_type = "numerical"

        print(f"\nProcessing Problem {idx+1}: Chapter {chapter_num}, Example {example_num}")
        print(f"Raw Answer Type: '{raw_answer_type}'  --> canonical: '{canonical_type}'")

        # Generate solution (use canonical_type) -- zero-shot, single pass
        solution_result = self.solver.solve_problem(question, answer_type=canonical_type)
        generated_answer = solution_result.get('generated_answer', '')
        thinking_content = solution_result.get('thinking_content', '')  # will be empty
        final_tag_output = solution_result.get('final_tag_output', '')

        # --- NEW extraction logic: keep both forms (single string & list) ---
        # Try to get all <final> answers first (preferred)
        all_finals = EnhancedAnswerExtractor.extract_all_final_answers(generated_answer)
        extracted_final_answer = ""
        extracted_final_answers = []

        # If none found in generated_answer, try final_tag_output (unused here)
        if not all_finals and final_tag_output:
            all_finals = EnhancedAnswerExtractor.extract_all_final_answers(final_tag_output)

        # If still none, fall back to single-answer extractor
        if not all_finals:
            single = EnhancedAnswerExtractor.extract_final_answer(generated_answer)
            if single:
                extracted_final_answer = single
                extracted_final_answers = [single]
            else:
                # try whole combined text (thinking + generated + final_tag)
                combined = "\n".join([thinking_content or "", generated_answer or "", final_tag_output or ""])
                single = EnhancedAnswerExtractor.extract_final_answer(combined)
                if single:
                    extracted_final_answer = single
                    extracted_final_answers = [single]
                else:
                    extracted_final_answer = ""
                    extracted_final_answers = []
        else:
            # we have one or more finals
            extracted_final_answers = all_finals
            if len(all_finals) == 1:
                extracted_final_answer = all_finals[0]
            else:
                # store a machine-readable concatenation: JSON array string
                try:
                    extracted_final_answer = json.dumps(all_finals, ensure_ascii=False)
                except Exception:
                    extracted_final_answer = " ||| ".join(all_finals)

        # If still empty, save a failed extraction example for inspection
        if not extracted_final_answer:
            fname = f"failed_{idx}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
            fpath = os.path.join(self.failed_folder, fname)
            with open(fpath, 'w', encoding='utf-8') as f:
                json.dump({
                    "index": idx,
                    "question": question,
                    "generated_answer": generated_answer,
                    "thinking_content": thinking_content,
                    "final_tag_output": final_tag_output,
                    "exact_answer": exact_answer,
                    "canonical_type": canonical_type,
                    "extracted_final_answer": extracted_final_answer,
                    "extracted_final_answers": extracted_final_answers
                }, f, ensure_ascii=False, indent=2)
            print(f"Saved failed extraction example to {fpath}")

        result_entry = {
            "problem_index": idx,
            "language": language,
            "chapter_number": chapter_num,
            "example_number": example_num,
            "question": question,
            "generated_answer": generated_answer,
            "thinking_content": thinking_content,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer,       # string (or JSON array string)
            "extracted_final_answers": extracted_final_answers,     # list (empty / single / many)
            "exact_answer": exact_answer,
            "raw_answer_type": raw_answer_type,
            "canonical_answer_type": canonical_type,
            "evaluation_method": problem.get("Evaluation Method", "")
        }
        return result_entry

    def _create_error_entry(self, idx, problem, error_msg):
        return {
            "problem_index": idx,
            "language": problem.get("Language", ""),
            "chapter_number": problem.get("Chapter Number", ""),
            "example_number": problem.get("Example Number", ""),
            "question": problem.get("Question", ""),
            "generated_answer": f"ERROR: {error_msg}",
            "thinking_content": "",
            "final_tag_output": "",
            "extracted_final_answer": "",
            "extracted_final_answers": [],
            "exact_answer": problem.get("Exact Answer", ""),
            "raw_answer_type": problem.get("Answer Type", ""),
            "canonical_answer_type": "",
            "evaluation_method": problem.get("Evaluation Method", "")
        }

    def _print_progress(self, idx, result_entry):
        print(f"Generated answer length: {len(result_entry['generated_answer']) if result_entry['generated_answer'] else 0}")
        print(f"Extracted final answer: '{result_entry['extracted_final_answer']}'")
        print(f"Extracted final answers (list): {result_entry.get('extracted_final_answers', [])}")
        print(f"Expected answer: '{result_entry['exact_answer']}'")

    def _save_intermediate_results(self, results, output_folder, count):
        temp_filename = f'intermediate_results_{count}.json'
        temp_output_path = os.path.join(output_folder, temp_filename)
        with open(temp_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"Saved intermediate results to {temp_output_path}")

    def _save_final_results(self, results, output_folder, start_idx, end_idx):
        final_filename = f'final_results_{start_idx}_to_{end_idx-1}.json'
        final_output_path = os.path.join(output_folder, final_filename)
        with open(final_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"\nProcessing complete. Results saved to {final_output_path}")
        print(f"Total problems processed: {len(results)}")
        return final_output_path

    def _create_summary_file(self, results, output_folder, dataset_path, start_idx, end_idx):
        successful_extractions = len([r for r in results if r.get('extracted_final_answer', '').strip()])
        summary_data = {
            "processing_info": {
                "dataset_path": dataset_path,
                "start_index": start_idx,
                "end_index": end_idx - 1,
                "total_processed": len(results),
                "processing_timestamp": datetime.now().isoformat(),
                "output_folder": output_folder
            },
            "statistics": {
                "successful_problems": len([r for r in results if not r['generated_answer'].startswith('ERROR:')]),
                "failed_problems": len([r for r in results if r['generated_answer'].startswith('ERROR:')]),
                "successful_extractions": successful_extractions,
                "extraction_success_rate": f"{(successful_extractions/len(results)*100):.1f}%" if results else "0%",
                "average_answer_length": sum(len(r['generated_answer']) for r in results) / len(results) if results else 0,
                "chapters_processed": list(set(r['chapter_number'] for r in results if r['chapter_number'])),
                "raw_answer_types": list(set(r['raw_answer_type'] for r in results if r.get('raw_answer_type'))),
                "canonical_answer_types": list(set(r['canonical_answer_type'] for r in results if r.get('canonical_answer_type')))
            }
        }
        summary_path = os.path.join(output_folder, 'processing_summary.json')
        with open(summary_path, 'w', encoding='utf-8') as f:
            json.dump(summary_data, f, ensure_ascii=False, indent=2)
        print(f"Processing summary saved to {summary_path}")
        print(f"Answer extraction success rate: {summary_data['statistics']['extraction_success_rate']}")

# -------------------------
# Main (example usage)
# -------------------------
def main():
    # NOTE: update dataset_path and output_base_path to match your environment.
    # Ensure your dataset's "Question" fields are in Kazakh.
    dataset_path = "/kaggle/input/nctb-dataset/Kazakh_Final_Corpus.jsonl"
    output_base_path = "/kaggle/working/"

    # Use the Ollama zero-shot solver (ensure the specified model is available in Ollama)
    solver = OllamaZeroShotMathSolver(model_name="qwen3:8b")
    processor = DatasetProcessor(solver, failed_folder=os.path.join(output_base_path, "failed_extractions"))

    # For quick testing, process only first few problems
    results, out_folder = processor.process_dataset(
        dataset_path,
        output_base_path,
        start_idx=0,
        end_idx=100  # smaller quick test
    )
    print("Done. Results saved to:", out_folder)

if __name__ == "__main__":
    main()


# **Finnish**

In [None]:
import json
import os
import re
from datetime import datetime
from tqdm import tqdm

# Ollama client
try:
    import ollama
except Exception as e:
    raise ImportError("The 'ollama' package is required. Install it and make sure the Ollama daemon is running.") from e

# -------------------------
# Answer type normalization
# -------------------------
# Canonical set
_CANONICAL_TYPES = {"symbolic", "numerical", "proof"}

# mapping common noisy labels to canonical (English + Finnish variants)
_ANSWER_TYPE_MAP_SIMPLE = {
    # Proof variants (English -> Finnish)
    "proof": "proof", "prove": "proof",
    "todistus": "proof", "todista": "proof", "todistaa": "proof", "näytä": "proof", "näytä että": "proof",

    # Numerical variants
    "numerical": "numerical", "numeric": "numerical", "number": "numerical", "calculation": "numerical",
    "numeerinen": "numerical", "numero": "numerical", "arvo": "numerical", "laskea": "numerical", "laske": "numerical",

    # Symbolic variants
    "symbolic": "symbolic", "symbol": "symbolic", "equation": "symbolic", "algebraic": "symbolic",
    "symbolinen": "symbolic", "yhtälö": "symbolic", "yhtalo": "symbolic", "yhtälöt": "symbolic",
}


def normalize_answer_type(raw_label: str, question_text: str = "", exact_answer: str = "") -> str:
    """
    Normalize a dataset label to one of: 'symbolic', 'numerical', 'proof'.
    Heuristics expanded to handle Finnish phrases and labels (plus English).
    """
    # Helper to clean label
    def _clean_label(lbl: str) -> str:
        if not lbl:
            return ""
        s = lbl.strip().lower()
        # keep unicode letters and digits and spaces
        s = re.sub(r'[^0-9\w\säöåÄÖÅ-]', ' ', s, flags=re.UNICODE)
        s = re.sub(r'\s+', ' ', s).strip()
        return s

    s = _clean_label(raw_label)

    # direct mapping
    if s in _ANSWER_TYPE_MAP_SIMPLE:
        return _ANSWER_TYPE_MAP_SIMPLE[s]

    # partial matches (allow English or Finnish keywords appearing)
    for k, v in _ANSWER_TYPE_MAP_SIMPLE.items():
        if k in s:
            return v

    # heuristics using question or exact_answer (English & Finnish checks)
    q = (question_text or "").lower()
    a = (exact_answer or "").lower()

    # Proof indicators (English + Finnish)
    if re.search(r'\b(prove|show that|prove that|proof)\b', q) or re.search(r'\b(todista|todistus|näytä|näytä että|todistaa)\b', q):
        return "proof"

    # logarithm related: English/Finnish
    if re.search(r'\b(log|ln|logarithm)\b', q) or re.search(r'\b(log|ln|logaritmi)\b', q) or re.search(r'\blog\b', a):
        # prefer numerical if exact answer contains digits
        if re.search(r'\d', a):
            return "numerical"
        return "symbolic"

    # set theory indicators: English / Finnish
    if re.search(r'\b(set|subset|union|intersection)\b', q) or re.search(r'\b(joukko|osajoukko|yhdiste|leikkaus)\b', q):
        return "symbolic"

    # units (English + Finnish)
    if re.search(r'\bmeter\b|\bm\b|\bcm\b|\bkg\b|\bliter\b|\bl\b|\bkm\b|\bmile\b', q + " " + a) or \
       re.search(r'\bmetri\b|\bcm\b|\bkg\b|\blitra\b|\bl\b|\bkm\b', q + " " + a):
        return "numerical"

    # equation-solving heuristics (English + Finnish)
    if re.search(r'\bequation\b|solve for|solve|= x|x\s*=', q) or re.search(r'\byhtälö\b|ratkaise|ratkaista|ratkaise for|= x|x\s*=', q):
        return "symbolic"

    # trig / geometry indicators (English + Finnish)
    if re.search(r'\b(trig|sin|cos|tan|geometry|triangle|circle)\b', q) or re.search(r'\b(trigonometr|sin|cos|tan|geometria|kolmio|ympyrä|ympyra)\b', q):
        if re.search(r'\d', a):
            return "numerical"
        return "symbolic"

    # numeric indicators (English + Finnish) -> numerical
    if re.search(r'\d', q) or re.search(r'find the value|compute|calculate|evaluate', q) or \
       re.search(r'arvon|laske|laske arvo|laske\W|laske\W|laske:', q):
        return "numerical"

    # If exact_answer looks numeric, prefer numerical (but check for LaTeX)
    if re.search(r'[0-9]|\\frac|\\sqrt', a):
        if re.search(r'\\frac|\\sqrt|\{|\\', a):
            return "symbolic"
        return "numerical"

    # fallback
    return "symbolic"

# -------------------------
# EnhancedAnswerExtractor (adapted to behave like SimplifiedAnswerExtractor)
# and now handles Finnish markers as well
# -------------------------
class EnhancedAnswerExtractor:
    """
    Adapter that implements the simplified extraction behavior,
    extended to recognize Finnish formatting/conclusion words as well.
    Provides:
      - extract_final_answer(text) -> str
      - extract_all_final_answers(generated_solution) -> list
    and internal helpers _clean_answer and _is_valid_answer.
    """

    @staticmethod
    def _clean_answer(answer: str) -> str:
        if not answer:
            return ""
        # Start with a trimmed answer and normalize whitespace
        a = answer.strip()
        # collapse whitespace
        a = re.sub(r'\s+', ' ', a)

        # remove outer $$ if present (multiline)
        a = re.sub(r'^\$\$(.*)\$\$$', r'\1', a, flags=re.DOTALL)
        # remove surrounding single $ if the whole string is wrapped
        a = re.sub(r'^\$(.*)\$$', r'\1', a, flags=re.DOTALL)

        # strip standalone leading/trailing $ characters and spaces
        a = a.strip('$ ')

        # Remove common prefixes (kept after stripping $ to catch cases like "$Final Answer: ...$")
        prefixes_to_remove = [
            # English
            r'Final Answer:\s*', r'Answer:\s*', r'The answer is\s*',
            r'Therefore,?\s*', r'Thus,?\s*', r'Hence,?\s*', r'So,?\s*', r'∴\s*',

            # Finnish
            r'Lopullinen vastaus[:\s]*', r'Lopullinen[:\s]*',
            r'Vastaus[:\s]*', r'Lopullinen vastaus[:\s]*', r'Vastaus on\s*',
            r'Siksi,?\s*', r'Joten,?\s*', r'Näin ollen,?\s*', r'Johtopäätös[:\s]*'
        ]
        for prefix in prefixes_to_remove:
            a = re.sub(f'^{prefix}', '', a, flags=re.IGNORECASE)

        # remove various boxed wrappers with optional backslashes and optional surrounding $
        # e.g. $$\boxed{...}$$, $\boxed{...}$, \boxed{...}
        a = re.sub(r'\$?\s*(?:\\){0,3}boxed\{([^}]*)\}\s*\$?', r'\1', a, flags=re.DOTALL | re.IGNORECASE)
        # also ensure plain \boxed{...} is unwrapped (redundant but safe)
        a = re.sub(r'\\boxed\{([^}]*)\}', r'\1', a)

        # convert common LaTeX to readable forms
        a = re.sub(r'\\frac\{([^}]*)\}\{([^}]*)\}', r'(\1)/(\2)', a)
        a = re.sub(r'\\sqrt\{([^}]*)\}', r'√(\1)', a)

        # remove bold/italic wrappers
        a = re.sub(r'\*\*([^*]+)\*\*', r'\1', a)
        a = re.sub(r'\*([^*]+)\*', r'\1', a)

        # collapse multiple spaces again (in case replacements introduced them)
        a = re.sub(r'\s+', ' ', a).strip()

        # trim trailing punctuation/words
        a = a.rstrip(' \t\n.,;:')

        # remove trailing words like "proved" or "the answer" (English & Finnish)
        a = re.sub(r'\b(proved|completed|finished|the answer|todistettu|todistus|vastaus|vastaus on)\b[.\s]*$', '', a, flags=re.IGNORECASE).strip()

        return a

    @staticmethod
    def _is_valid_answer(answer: str) -> bool:
        if not answer:
            return False
        # not only punctuation
        if re.match(r'^[\W_]+$', answer):
            return False
        # contains at least some alphanumeric characters (or common math symbols)
        if not re.search(r'[0-9A-Za-zÀ-ÖØ-öø-ÿÄäÖöÅå\\]', answer):
            return False
        # length sanity
        if len(answer) > 1000:
            return False
        # avoid answers that end with only concluding words (English & Finnish)
        blacklist = [r'therefore$', r'thus$', r'hence$', r'so$', r'we get$', r'we have$', r'siksi$', r'joten$', r'näin ollen$']
        for b in blacklist:
            if re.search(b, answer.strip(), flags=re.IGNORECASE):
                return False
        return True

    @staticmethod
    def extract_final_answer_simple(text: str) -> str:
        """
        Primary method: Extract final answer using the last lines approach,
        with fallback to pattern-based extraction.
        """
        if not text:
            return ""

        # Clean the text and split into lines
        lines = [line.strip() for line in text.strip().split('\n') if line.strip()]

        # Strategy 1: Try last two lines combined
        if len(lines) >= 2:
            last_two = ' '.join(lines[-2:])
            cleaned = EnhancedAnswerExtractor._clean_answer(last_two)
            if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned

        # Strategy 2: Try last line only
        if lines:
            cleaned = EnhancedAnswerExtractor._clean_answer(lines[-1])
            if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned

        # Strategy 3: Try last 3 lines if we have them (sometimes answers span multiple lines)
        if len(lines) >= 3:
            last_three = ' '.join(lines[-3:])
            cleaned = EnhancedAnswerExtractor._clean_answer(last_three)
            if EnhancedAnswerExtractor._is_valid_answer(cleaned) and len(cleaned) < 500:
                return cleaned

        # Strategy 4: Fallback to pattern-based extraction
        return EnhancedAnswerExtractor._extract_with_patterns(text)

    @staticmethod
    def _extract_with_patterns(text: str) -> str:
        """
        Pattern-based extraction as fallback method.
        Recognizes both English and Finnish answer/conclusion patterns.
        """
        # Check for <final> tags
        raw_matches = re.findall(r'<final>(.*?)</final>', text, re.DOTALL | re.IGNORECASE)
        for m in raw_matches:
            c = EnhancedAnswerExtractor._clean_answer(m)
            if EnhancedAnswerExtractor._is_valid_answer(c):
                return c

        # Try common answer patterns (English + Finnish)
        patterns = [
            r'\*\*Final Answer:\*\*\s*(.+?)(?:\n|$)',
            r'Final Answer:\s*(.+?)(?:\n|$)',
            r'Final Answer\s*[:\-]\s*(.+?)(?:\n|$)',

            # Finnish patterns
            r'\*\*Lopullinen vastaus:\*\*\s*(.+?)(?:\n|$)',
            r'Lopullinen vastaus[:\s]*(.+?)(?:\n|$)',
            r'Vastaus[:\s]*(.+?)(?:\n|$)',
            r'Vastaus on[:\s]*(.+?)(?:\n|$)',
            r'Johtopäätös[:\s]*(.+?)(?:\n|$)',
            r'Näin ollen[,:\s]*(.+?)(?:\.|$|\n)',

            # English conclusion markers
            r'Therefore[,:\s]*(.+?)(?:\.|$|\n)',
            r'Hence[,:\s]*(.+?)(?:\.|$|\n)',
            r'Thus[,:\s]*(.+?)(?:\.|$|\n)',
            r'∴\s*(.+?)(?:\.|$|\n)',

            # Finnish conclusion markers
            r'Siksi[,:\s]*(.+?)(?:\.|$|\n)',
            r'Joten[,:\s]*(.+?)(?:\.|$|\n)',

            r'Answer[:\s]*(.+?)(?:\n|$)',
            r'Result[:\s]*(.+?)(?:\n|$)',
            r'Solution[:\s]*(.+?)(?:\n|$)',
        ]

        for pat in patterns:
            matches = re.findall(pat, text, re.MULTILINE | re.DOTALL | re.IGNORECASE)
            if matches:
                answer = matches[-1].strip()
                cleaned = EnhancedAnswerExtractor._clean_answer(answer)
                if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                    return cleaned

        # Try boxed math expressions
        boxed_patterns = [
            r'\$\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$\$',
            r'\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$',
            r'(?:\\){0,3}boxed\{(.+?)\}',
        ]

        for pat in boxed_patterns:
            m = re.search(pat, text, re.DOTALL | re.IGNORECASE)
            if m:
                cand = EnhancedAnswerExtractor._clean_answer(m.group(1))
                if EnhancedAnswerExtractor._is_valid_answer(cand):
                    return cand

        return ""

    @staticmethod
    def extract_all_final_answers(generated_solution: str) -> list:
        """
        Extract multiple final answers using simplified approach.
        Returns a list (possibly empty) of cleaned answers found inside all <final>...</final> tags.
        Falls back to the single simplified extraction if no tags are found.
        """
        if not generated_solution:
            return []

        # Find all <final>...</final> (non-greedy)
        raw_matches = re.findall(r'<final>(.*?)</final>', generated_solution, re.DOTALL | re.IGNORECASE)
        cleaned = []
        for m in raw_matches:
            c = EnhancedAnswerExtractor._clean_answer(m)
            if EnhancedAnswerExtractor._is_valid_answer(c):
                cleaned.append(c)

        if cleaned:
            return cleaned

        # Fallback: try to extract a single final using the simpler logic
        simple_answer = EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)
        if simple_answer:
            return [simple_answer]

        return []

    @staticmethod
    def extract_final_answer(generated_solution: str) -> str:
        """
        Backwards-compatible extractor that delegates to the simplified extraction.
        If multiple <final> tags exist, returns a JSON array string of cleaned answers.
        """
        if not generated_solution:
            return ""

        # Prefer explicit <final> tags (can be multiple)
        raw_matches = re.findall(r'<final>(.*?)</final>', generated_solution, re.DOTALL | re.IGNORECASE)
        if raw_matches:
            cleaned = []
            for m in raw_matches:
                c = EnhancedAnswerExtractor._clean_answer(m)
                if EnhancedAnswerExtractor._is_valid_answer(c):
                    cleaned.append(c)
            if not cleaned:
                # fall through to simpler single extraction
                return EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)
            if len(cleaned) == 1:
                return cleaned[0]
            try:
                return json.dumps(cleaned, ensure_ascii=False)
            except Exception:
                return " ||| ".join(cleaned)

        # No explicit finals: use simplified single extraction
        return EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)

# -------------------------
# Ollama-based Math Solver (ZERO-SHOT) — PROMPTS IN ENGLISH (problem is Finnish)
# -------------------------
class OllamaZeroShotMathSolver:
    """
    Zero-shot math solver that uses the Ollama daemon via the Python client.
    Prompts/instructions are written in English (as requested), but the solver is informed that
    the problem text will be in FINNISH and is instructed to provide a concise final answer in FINNISH.
    - Zero-shot prompt: forbids chain-of-thought and requests concise final <final> tag
    - Single-pass only.
    """

    def __init__(self, model_name="qwen3:8b"):
        """
        model_name: the Ollama model reference (e.g., "qwen3:8b")
        """
        self.model_name = model_name
        self.client = self._load_client()

    def _load_client(self):
        print(f"Initializing Ollama client for model: {self.model_name}")
        client = ollama.Client()
        return client

    def cleanup(self):
        if hasattr(self, 'client'):
            del self.client

    def _get_format_instructions(self, answer_type):
        """
        Zero-shot formatting instructions (WRITTEN IN ENGLISH).
        The model is explicitly informed that the problem text is in FINNISH and that the concise final
        answer should be provided in FINNISH. Examples/formats are shown in English but demonstrate
        that the final tag must contain the concise Finnish answer.
        """
        t = (answer_type or "symbolic").strip().lower()
        if t not in _CANONICAL_TYPES:
            t = "symbolic"

        base = (
            "CRITICAL FORMATTING REQUIREMENTS (ZERO-SHOT):\n"
            "- The math problem you will receive is written in FINNISH. Provide your concise FINAL ANSWER in FINNISH.\n"
            "- DO NOT provide chain-of-thought or step-by-step internal reasoning. Do NOT reveal private chain-of-thought.\n"
            "- If a very short justification is necessary, include a single-line 'Explanation:' with at most one sentence (in Finnish).\n"
            "- Always end with a machine-readable final tag <final>...</final> containing ONLY the concise final answer in FINNISH (no extra reasoning inside the tag).\n"
        )

        if t == "proof":
            return base + (
                "FOR PROOFS (zero-shot):\n"
                "- Provide a concise conclusion or a one-sentence proof sketch (in FINNISH) labeled 'Johtopäätös:' or 'Esitys:' if needed. Do NOT provide a full step-by-step proof.\n"
                "Format example (English instructions):\n"
                "Final Answer (in Finnish):\n"
                "[Concise Finnish conclusion or one-sentence sketch]\n\n"
                "<final>[Concise Finnish conclusion]</final>\n"
            )
        elif t == "numerical":
            return base + (
                "FOR NUMERICAL RESULTS:\n"
                "- Provide the numeric result in exact form if available (fractions/radicals). Otherwise provide a decimal rounded to 4 decimal places.\n"
                "Format example (English instructions):\n"
                "Final Answer (in Finnish):\n"
                "[Numeric result]\n\n"
                "<final>[Numeric result]</final>\n"
            )
        else:  # symbolic
            return base + (
                "FOR SYMBOLIC RESULTS:\n"
                "- Provide the final symbolic expression (LaTeX allowed) in a concise form. The expression may be LaTeX but any wording should be in Finnish if used.\n"
                "Format example (English instructions):\n"
                "Final Answer (in Finnish):\n"
                "[Final symbolic expression]\n\n"
                "<final>[LaTeX expression or concise symbolic expression — in Finnish if words are used]</final>\n"
            )

    def _create_prompt(self, question, answer_type="symbolic"):
        format_instructions = self._get_format_instructions(answer_type)

        # The main instruction is in English (per your requirement), but it states the problem language and required answer language.
        prompt = f"""You are an expert mathematician. The following math problem is written in FINNISH.
Provide a concise final answer in FINNISH. DO NOT produce chain-of-thought or step-by-step internal reasoning.
If you include a justification, it must be one short sentence and labeled 'Explanation:' (in Finnish).

MATH PROBLEM (in Finnish):
{question}

{format_instructions}

Begin your concise answer (in Finnish) now:
"""
        return prompt

    def _generate_once(self, prompt_text: str, enable_thinking: bool = False) -> str:
        """
        Call ollama.Client.chat WITHOUT temperature/max_tokens.
        Handles several possible response shapes.
        """
        messages = [{"role": "user", "content": prompt_text}]

        # Call Ollama client.chat without temperature/max_tokens
        try:
            resp = self.client.chat(model=self.model_name, messages=messages, think=enable_thinking)
        except TypeError:
            # Some client versions have different signatures
            try:
                resp = self.client.chat(self.model_name, messages=messages, think=enable_thinking)
            except Exception:
                resp = self.client.chat(self.model_name, messages)

        # Normalize response into a string
        full_output = ""
        if isinstance(resp, dict):
            if 'message' in resp and isinstance(resp['message'], dict) and 'content' in resp['message']:
                full_output = resp['message']['content']
            elif 'choices' in resp and isinstance(resp['choices'], (list, tuple)) and resp['choices']:
                choice = resp['choices'][0]
                if isinstance(choice, dict) and 'message' in choice and isinstance(choice['message'], dict):
                    full_output = choice['message'].get('content', '')
                else:
                    full_output = str(choice)
            else:
                full_output = str(resp)
        else:
            # resp might be an object with .message.content
            try:
                full_output = resp.message.content
            except Exception:
                full_output = str(resp)

        if isinstance(full_output, bytes):
            full_output = full_output.decode('utf-8', errors='ignore')
        return (full_output or "").strip()

    def solve_problem(self, question, answer_type="symbolic"):
        """
        Zero-shot single-pass solve via Ollama.
        """
        prompt = self._create_prompt(question, answer_type)
        full_output = self._generate_once(prompt, enable_thinking=False)

        # No thinking parsing for zero-shot mode
        thinking_content = ""
        generated_answer = full_output
        final_tag_output = ""  # no second pass

        extracted_final_answer = EnhancedAnswerExtractor.extract_final_answer(generated_answer)

        return {
            "thinking_content": thinking_content,
            "generated_answer": generated_answer,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer
        }

# -------------------------
# Dataset Processor
# -------------------------
class DatasetProcessor:
    def __init__(self, solver: OllamaZeroShotMathSolver, failed_folder=None):
        self.solver = solver
        self.extractor = EnhancedAnswerExtractor()
        self.failed_folder = failed_folder or "failed_extractions"
        os.makedirs(self.failed_folder, exist_ok=True)

    def process_dataset(self, dataset_path, output_base_path, start_idx=0, end_idx=None,
                        folder_name=None, create_timestamped_folder=True):
        dataset = self._load_dataset(dataset_path)
        if end_idx is None:
            end_idx = len(dataset)

        output_folder = self._create_output_folder(output_base_path, folder_name, start_idx, end_idx, create_timestamped_folder)
        results = []

        print(f"Processing problems {start_idx} to {end_idx-1} ({end_idx-start_idx} total)")
        print(f"Output will be saved in: {output_folder}")

        for idx in tqdm(range(start_idx, min(end_idx, len(dataset)))):
            problem = dataset[idx]
            try:
                result_entry = self._process_single_problem(idx, problem)
                results.append(result_entry)
                self._print_progress(idx, result_entry)
                if (idx - start_idx + 1) % 10 == 0:
                    self._save_intermediate_results(results, output_folder, idx - start_idx + 1)
            except Exception as e:
                print(f"Error processing problem {idx+1}: {str(e)}")
                error_entry = self._create_error_entry(idx, problem, str(e))
                results.append(error_entry)

        final_output_path = self._save_final_results(results, output_folder, start_idx, end_idx)
        self._create_summary_file(results, output_folder, dataset_path, start_idx, end_idx)
        return results, output_folder

    def _create_output_folder(self, base_path, folder_name, start_idx, end_idx, add_timestamp):
        if folder_name is None:
            folder_name = f"results_{start_idx}_to_{end_idx-1}"
        if add_timestamp:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            folder_name = f"{folder_name}_{timestamp}"
        output_folder = os.path.join(base_path, folder_name)
        os.makedirs(output_folder, exist_ok=True)
        return output_folder

    def _load_dataset(self, dataset_path):
        dataset = []
        with open(dataset_path, 'r', encoding='utf-8') as f:
            for line in f:
                if line.strip():
                    dataset.append(json.loads(line))
        return dataset

    def _process_single_problem(self, idx, problem):
        language = problem.get("Language", "")
        chapter_num = problem.get("Chapter Number", "")
        example_num = problem.get("Example Number", "")
        question = problem.get("Question", "")
        exact_answer = problem.get("Exact Answer", "")
        raw_answer_type = problem.get("Answer Type", "") or ""

        # Normalize/infer canonical answer type: 'symbolic', 'numerical', 'proof'
        canonical_type = normalize_answer_type(raw_answer_type, question_text=question, exact_answer=exact_answer)

        # If exact_answer strongly indicates numeric, prefer numerical
        if exact_answer and re.search(r'\d', str(exact_answer)):
            # if exact contains LaTeX expressions like \frac or \sqrt, keep symbolic
            if re.search(r'\\frac|\\sqrt|\\boxed', str(exact_answer)):
                pass
            else:
                canonical_type = "numerical"

        print(f"\nProcessing Problem {idx+1}: Chapter {chapter_num}, Example {example_num}")
        print(f"Raw Answer Type: '{raw_answer_type}'  --> canonical: '{canonical_type}'")

        # Generate solution (use canonical_type) -- zero-shot, single pass
        solution_result = self.solver.solve_problem(question, answer_type=canonical_type)
        generated_answer = solution_result.get('generated_answer', '')
        thinking_content = solution_result.get('thinking_content', '')  # will be empty
        final_tag_output = solution_result.get('final_tag_output', '')

        # --- NEW extraction logic: keep both forms (single string & list) ---
        # Try to get all <final> answers first (preferred)
        all_finals = EnhancedAnswerExtractor.extract_all_final_answers(generated_answer)
        extracted_final_answer = ""
        extracted_final_answers = []

        # If none found in generated_answer, try final_tag_output (unused here)
        if not all_finals and final_tag_output:
            all_finals = EnhancedAnswerExtractor.extract_all_final_answers(final_tag_output)

        # If still none, fall back to single-answer extractor
        if not all_finals:
            single = EnhancedAnswerExtractor.extract_final_answer(generated_answer)
            if single:
                extracted_final_answer = single
                extracted_final_answers = [single]
            else:
                # try whole combined text (thinking + generated + final_tag)
                combined = "\n".join([thinking_content or "", generated_answer or "", final_tag_output or ""])
                single = EnhancedAnswerExtractor.extract_final_answer(combined)
                if single:
                    extracted_final_answer = single
                    extracted_final_answers = [single]
                else:
                    extracted_final_answer = ""
                    extracted_final_answers = []
        else:
            # we have one or more finals
            extracted_final_answers = all_finals
            if len(all_finals) == 1:
                extracted_final_answer = all_finals[0]
            else:
                # store a machine-readable concatenation: JSON array string
                try:
                    extracted_final_answer = json.dumps(all_finals, ensure_ascii=False)
                except Exception:
                    extracted_final_answer = " ||| ".join(all_finals)

        # If still empty, save a failed extraction example for inspection
        if not extracted_final_answer:
            fname = f"failed_{idx}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
            fpath = os.path.join(self.failed_folder, fname)
            with open(fpath, 'w', encoding='utf-8') as f:
                json.dump({
                    "index": idx,
                    "question": question,
                    "generated_answer": generated_answer,
                    "thinking_content": thinking_content,
                    "final_tag_output": final_tag_output,
                    "exact_answer": exact_answer,
                    "canonical_type": canonical_type,
                    "extracted_final_answer": extracted_final_answer,
                    "extracted_final_answers": extracted_final_answers
                }, f, ensure_ascii=False, indent=2)
            print(f"Saved failed extraction example to {fpath}")

        result_entry = {
            "problem_index": idx,
            "language": language,
            "chapter_number": chapter_num,
            "example_number": example_num,
            "question": question,
            "generated_answer": generated_answer,
            "thinking_content": thinking_content,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer,       # string (or JSON array string)
            "extracted_final_answers": extracted_final_answers,     # list (empty / single / many)
            "exact_answer": exact_answer,
            "raw_answer_type": raw_answer_type,
            "canonical_answer_type": canonical_type,
            "evaluation_method": problem.get("Evaluation Method", "")
        }
        return result_entry

    def _create_error_entry(self, idx, problem, error_msg):
        return {
            "problem_index": idx,
            "language": problem.get("Language", ""),
            "chapter_number": problem.get("Chapter Number", ""),
            "example_number": problem.get("Example Number", ""),
            "question": problem.get("Question", ""),
            "generated_answer": f"ERROR: {error_msg}",
            "thinking_content": "",
            "final_tag_output": "",
            "extracted_final_answer": "",
            "extracted_final_answers": [],
            "exact_answer": problem.get("Exact Answer", ""),
            "raw_answer_type": problem.get("Answer Type", ""),
            "canonical_answer_type": "",
            "evaluation_method": problem.get("Evaluation Method", "")
        }

    def _print_progress(self, idx, result_entry):
        print(f"Generated answer length: {len(result_entry['generated_answer']) if result_entry['generated_answer'] else 0}")
        print(f"Extracted final answer: '{result_entry['extracted_final_answer']}'")
        print(f"Extracted final answers (list): {result_entry.get('extracted_final_answers', [])}")
        print(f"Expected answer: '{result_entry['exact_answer']}'")

    def _save_intermediate_results(self, results, output_folder, count):
        temp_filename = f'intermediate_results_{count}.json'
        temp_output_path = os.path.join(output_folder, temp_filename)
        with open(temp_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"Saved intermediate results to {temp_output_path}")

    def _save_final_results(self, results, output_folder, start_idx, end_idx):
        final_filename = f'final_results_{start_idx}_to_{end_idx-1}.json'
        final_output_path = os.path.join(output_folder, final_filename)
        with open(final_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"\nProcessing complete. Results saved to {final_output_path}")
        print(f"Total problems processed: {len(results)}")
        return final_output_path

    def _create_summary_file(self, results, output_folder, dataset_path, start_idx, end_idx):
        successful_extractions = len([r for r in results if r.get('extracted_final_answer', '').strip()])
        summary_data = {
            "processing_info": {
                "dataset_path": dataset_path,
                "start_index": start_idx,
                "end_index": end_idx - 1,
                "total_processed": len(results),
                "processing_timestamp": datetime.now().isoformat(),
                "output_folder": output_folder
            },
            "statistics": {
                "successful_problems": len([r for r in results if not r['generated_answer'].startswith('ERROR:')]),
                "failed_problems": len([r for r in results if r['generated_answer'].startswith('ERROR:')]),
                "successful_extractions": successful_extractions,
                "extraction_success_rate": f"{(successful_extractions/len(results)*100):.1f}%" if results else "0%",
                "average_answer_length": sum(len(r['generated_answer']) for r in results) / len(results) if results else 0,
                "chapters_processed": list(set(r['chapter_number'] for r in results if r['chapter_number'])),
                "raw_answer_types": list(set(r['raw_answer_type'] for r in results if r.get('raw_answer_type'))),
                "canonical_answer_types": list(set(r['canonical_answer_type'] for r in results if r.get('canonical_answer_type')))
            }
        }
        summary_path = os.path.join(output_folder, 'processing_summary.json')
        with open(summary_path, 'w', encoding='utf-8') as f:
            json.dump(summary_data, f, ensure_ascii=False, indent=2)
        print(f"Processing summary saved to {summary_path}")
        print(f"Answer extraction success rate: {summary_data['statistics']['extraction_success_rate']}")

# -------------------------
# Main (example usage)
# -------------------------
def main():
    # NOTE: update dataset_path and output_base_path to match your environment.
    # Ensure your dataset's "Question" fields are in Finnish.
    dataset_path = "/kaggle/input/nctb-dataset/Finnish_Final_Corpus.jsonl"
    output_base_path = "/kaggle/working/"

    # Use the Ollama zero-shot solver (ensure the specified model is available in Ollama)
    solver = OllamaZeroShotMathSolver(model_name="qwen3:8b")
    processor = DatasetProcessor(solver, failed_folder=os.path.join(output_base_path, "failed_extractions"))

    # For quick testing, process only first few problems
    results, out_folder = processor.process_dataset(
        dataset_path,
        output_base_path,
        start_idx=0,
        end_idx=100  # smaller quick test
    )
    print("Done. Results saved to:", out_folder)

if __name__ == "__main__":
    main()


# **Lithuanian**

In [None]:
import json
import os
import re
from datetime import datetime
from tqdm import tqdm

# Ollama client
try:
    import ollama
except Exception as e:
    raise ImportError("The 'ollama' package is required. Install it and make sure the Ollama daemon is running.") from e

# -------------------------
# Answer type normalization
# -------------------------
# Canonical set
_CANONICAL_TYPES = {"symbolic", "numerical", "proof"}

# mapping common noisy labels to canonical (English + Lithuanian variants)
_ANSWER_TYPE_MAP_SIMPLE = {
    # Proof variants (English -> Lithuanian)
    "proof": "proof", "prove": "proof",
    "įrodyti": "proof", "įrodymas": "proof", "įrodinėti": "proof", "parodyti": "proof", "įrodyk": "proof",

    # Numerical variants
    "numerical": "numerical", "numeric": "numerical", "number": "numerical", "calculation": "numerical",
    "skaitinis": "numerical", "skaičius": "numerical", "apskaičiuoti": "numerical", "apskaičiuokite": "numerical",
    "rasti": "numerical", "raskite": "numerical",

    # Symbolic variants
    "symbolic": "symbolic", "symbol": "symbolic", "equation": "symbolic", "algebraic": "symbolic",
    "simbolinis": "symbolic", "lygtis": "symbolic", "lygtys": "symbolic", "algebrinis": "symbolic",
}


def normalize_answer_type(raw_label: str, question_text: str = "", exact_answer: str = "") -> str:
    """
    Normalize a dataset label to one of: 'symbolic', 'numerical', 'proof'.
    Heuristics expanded to handle Lithuanian phrases and labels (plus English).
    """
    # Helper to clean label
    def _clean_label(lbl: str) -> str:
        if not lbl:
            return ""
        s = lbl.strip().lower()
        # keep unicode letters (Latin extended), digits and spaces
        s = re.sub(r'[^0-9\w\s\u0100-\u017F-]', ' ', s, flags=re.UNICODE)
        s = re.sub(r'\s+', ' ', s).strip()
        return s

    s = _clean_label(raw_label)

    # direct mapping
    if s in _ANSWER_TYPE_MAP_SIMPLE:
        return _ANSWER_TYPE_MAP_SIMPLE[s]

    # partial matches (allow English or Lithuanian keywords appearing)
    for k, v in _ANSWER_TYPE_MAP_SIMPLE.items():
        if k in s:
            return v

    # heuristics using question or exact_answer (both English & Lithuanian checks)
    q = (question_text or "").lower()
    a = (exact_answer or "").lower()

    # Proof indicators (English + Lithuanian)
    if re.search(r'\b(prove|show that|prove that|proof)\b', q) or re.search(r'\b(įrodyti|įrodymas|parodyti|įrodyk|įrodinėti)\b', q, flags=re.IGNORECASE):
        return "proof"

    # logarithm related: English/Lithuanian
    if re.search(r'\b(log|ln|logarithm)\b', q) or re.search(r'\b(log|ln|logaritmas)\b', q) or re.search(r'\blog\b', a):
        # prefer numerical if exact answer contains digits
        if re.search(r'\d', a):
            return "numerical"
        return "symbolic"

    # set theory indicators: English / Lithuanian ('aibė' = set)
    if re.search(r'\b(set|subset|union|intersection)\b', q) or re.search(r'\b(aibė|poaibis|sąjunga|sankirta|sandūra)\b', q, flags=re.IGNORECASE):
        return "symbolic"

    # units (English + Lithuanian)
    if re.search(r'\bmeter\b|\bm\b|\bcm\b|\bkg\b|\bliter\b|\bl\b|\bkm\b|\bmile\b', q + " " + a) or \
       re.search(r'\bmetras\b|\bcm\b|\bkg\b|\bliktras\b|\bliktr\b|\bkm\b', q + " " + a, flags=re.IGNORECASE):
        return "numerical"

    # equation-solving heuristics (English + Lithuanian)
    if re.search(r'\bequation\b|solve for|solve|= x|x\s*=', q) or re.search(r'\b(lygtis|lygti|lygti|išspręsti|spręsti|išspręskite)\b', q, flags=re.IGNORECASE):
        return "symbolic"

    # trig / geometry indicators (English + Lithuanian)
    if re.search(r'\b(trig|sin|cos|tan|geometry|triangle|circle)\b', q) or re.search(r'\b(trigonometr|sin|cos|tan|geometrija|trikampis|apskritimas|ratas)\b', q, flags=re.IGNORECASE):
        if re.search(r'\d', a):
            return "numerical"
        return "symbolic"

    # numeric indicators (English + Lithuanian) -> numerical
    if re.search(r'\d', q) or re.search(r'find the value|compute|calculate|evaluate', q) or \
       re.search(r'rasti|raskite|apskaičiuoti|apskaičiuokite|skaičiuoti|skaičiuokite', q, flags=re.IGNORECASE):
        return "numerical"

    # If exact_answer looks numeric, prefer numerical (but check for LaTeX)
    if re.search(r'[0-9]|\\frac|\\sqrt', a):
        if re.search(r'\\frac|\\sqrt|\{|\\', a):
            return "symbolic"
        return "numerical"

    # fallback
    return "symbolic"

# -------------------------
# EnhancedAnswerExtractor (adapted to behave like SimplifiedAnswerExtractor)
# and now handles Lithuanian markers as well
# -------------------------
class EnhancedAnswerExtractor:
    """
    Adapter that implements the simplified extraction behavior (ported from SimplifiedAnswerExtractor),
    extended to recognize Lithuanian formatting/conclusion words as well.
    Provides:
      - extract_final_answer(text) -> str
      - extract_all_final_answers(generated_solution) -> list
    and internal helpers _clean_answer and _is_valid_answer.
    """

    @staticmethod
    def _clean_answer(answer: str) -> str:
        if not answer:
            return ""
        # Start with a trimmed answer and normalize whitespace
        a = answer.strip()
        # collapse whitespace
        a = re.sub(r'\s+', ' ', a)

        # remove outer $$ if present (multiline)
        a = re.sub(r'^\$\$(.*)\$\$$', r'\1', a, flags=re.DOTALL)
        # remove surrounding single $ if the whole string is wrapped
        a = re.sub(r'^\$(.*)\$$', r'\1', a, flags=re.DOTALL)

        # strip standalone leading/trailing $ characters and spaces
        a = a.strip('$ ')

        # Remove common prefixes (kept after stripping $ to catch cases like "$Final Answer: ...$")
        prefixes_to_remove = [
            # English
            r'Final Answer:\s*', r'Answer:\s*', r'The answer is\s*',
            r'Therefore,?\s*', r'Thus,?\s*', r'Hence,?\s*', r'So,?\s*', r'∴\s*',

            # Lithuanian
            r'Galutinis atsakymas[:\s]*', r'Galutinis[:\s]*',
            r'Atsakymas[:\s]*', r'Atsakymas yra\s*', r'Išvada[:\s]*',
            r'Todėl,?\s*', r'Taigi,?\s*', r'Taigi,?\s*', r'Iš to,?\s*',
        ]
        for prefix in prefixes_to_remove:
            a = re.sub(f'^{prefix}', '', a, flags=re.IGNORECASE)

        # remove various boxed wrappers with optional backslashes and optional surrounding $
        # e.g. $$\boxed{...}$$, $\boxed{...}$, \boxed{...}
        a = re.sub(r'\$?\s*(?:\\){0,3}boxed\{([^}]*)\}\s*\$?', r'\1', a, flags=re.DOTALL | re.IGNORECASE)
        # also ensure plain \boxed{...} is unwrapped (redundant but safe)
        a = re.sub(r'\\boxed\{([^}]*)\}', r'\1', a)

        # convert common LaTeX to readable forms
        a = re.sub(r'\\frac\{([^}]*)\}\{([^}]*)\}', r'(\1)/(\2)', a)
        a = re.sub(r'\\sqrt\{([^}]*)\}', r'√(\1)', a)

        # remove bold/italic wrappers
        a = re.sub(r'\*\*([^*]+)\*\*', r'\1', a)
        a = re.sub(r'\*([^*]+)\*', r'\1', a)

        # collapse multiple spaces again (in case replacements introduced them)
        a = re.sub(r'\s+', ' ', a).strip()

        # trim trailing punctuation/words
        a = a.rstrip(' \t\n.,;:')

        # remove trailing words like "proved" or "the answer" (English & Lithuanian)
        a = re.sub(r'\b(proved|completed|finished|the answer|įrodyta|įrodymas|atsakymas|atsakyti)\b[.\s]*$', '', a, flags=re.IGNORECASE).strip()

        return a

    @staticmethod
    def _is_valid_answer(answer: str) -> bool:
        if not answer:
            return False
        # not only punctuation
        if re.match(r'^[\W_]+$', answer):
            return False
        # contains at least some alphanumeric characters (or common math symbols)
        if not re.search(r'[0-9A-Za-z\u0100-\u017F\\]', answer):
            return False
        # length sanity
        if len(answer) > 1000:
            return False
        # avoid answers that end with only concluding words (English & Lithuanian)
        blacklist = [r'therefore$', r'thus$', r'hence$', r'so$', r'we get$', r'we have$', r'todėl$', r'taigi$', r'išvada$']
        for b in blacklist:
            if re.search(b, answer.strip(), flags=re.IGNORECASE):
                return False
        return True

    @staticmethod
    def extract_final_answer_simple(text: str) -> str:
        """
        Primary method: Extract final answer using the last lines approach,
        with fallback to pattern-based extraction.
        """
        if not text:
            return ""

        # Clean the text and split into lines
        lines = [line.strip() for line in text.strip().split('\n') if line.strip()]

        # Strategy 1: Try last two lines combined
        if len(lines) >= 2:
            last_two = ' '.join(lines[-2:])
            cleaned = EnhancedAnswerExtractor._clean_answer(last_two)
            if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned

        # Strategy 2: Try last line only
        if lines:
            cleaned = EnhancedAnswerExtractor._clean_answer(lines[-1])
            if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned

        # Strategy 3: Try last 3 lines if we have them (sometimes answers span multiple lines)
        if len(lines) >= 3:
            last_three = ' '.join(lines[-3:])
            cleaned = EnhancedAnswerExtractor._clean_answer(last_three)
            if EnhancedAnswerExtractor._is_valid_answer(cleaned) and len(cleaned) < 500:
                return cleaned

        # Strategy 4: Fallback to pattern-based extraction
        return EnhancedAnswerExtractor._extract_with_patterns(text)

    @staticmethod
    def _extract_with_patterns(text: str) -> str:
        """
        Pattern-based extraction as fallback method.
        Recognizes both English and Lithuanian answer/conclusion patterns.
        """
        # Check for <final> tags
        raw_matches = re.findall(r'<final>(.*?)</final>', text, re.DOTALL | re.IGNORECASE)
        for m in raw_matches:
            c = EnhancedAnswerExtractor._clean_answer(m)
            if EnhancedAnswerExtractor._is_valid_answer(c):
                return c

        # Try common answer patterns (English + Lithuanian)
        patterns = [
            r'\*\*Final Answer:\*\*\s*(.+?)(?:\n|$)',
            r'Final Answer:\s*(.+?)(?:\n|$)',
            r'Final Answer\s*[:\-]\s*(.+?)(?:\n|$)',

            # Lithuanian patterns
            r'\*\*Galutinis atsakymas:\*\*\s*(.+?)(?:\n|$)',
            r'Galutinis atsakymas[:\s]*(.+?)(?:\n|$)',
            r'Atsakymas[:\s]*(.+?)(?:\n|$)',
            r'Atsakymas yra[:\s]*(.+?)(?:\n|$)',
            r'Išvada[:\s]*(.+?)(?:\n|$)',

            # English conclusion markers
            r'Therefore[,:\s]*(.+?)(?:\.|$|\n)',
            r'Hence[,:\s]*(.+?)(?:\.|$|\n)',
            r'Thus[,:\s]*(.+?)(?:\.|$|\n)',
            r'∴\s*(.+?)(?:\.|$|\n)',

            # Lithuanian conclusion markers
            r'Todėl[,:\s]*(.+?)(?:\.|$|\n)',
            r'Taigi[,:\s]*(.+?)(?:\.|$|\n)',
            r'Iš to[,:\s]*(.+?)(?:\.|$|\n)',

            r'Answer[:\s]*(.+?)(?:\n|$)',
            r'Result[:\s]*(.+?)(?:\n|$)',
            r'Solution[:\s]*(.+?)(?:\n|$)',
        ]

        for pat in patterns:
            matches = re.findall(pat, text, re.MULTILINE | re.DOTALL | re.IGNORECASE)
            if matches:
                answer = matches[-1].strip()
                cleaned = EnhancedAnswerExtractor._clean_answer(answer)
                if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                    return cleaned

        # Try boxed math expressions
        boxed_patterns = [
            r'\$\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$\$',
            r'\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$',
            r'(?:\\){0,3}boxed\{(.+?)\}',
        ]

        for pat in boxed_patterns:
            m = re.search(pat, text, re.DOTALL | re.IGNORECASE)
            if m:
                cand = EnhancedAnswerExtractor._clean_answer(m.group(1))
                if EnhancedAnswerExtractor._is_valid_answer(cand):
                    return cand

        return ""

    @staticmethod
    def extract_all_final_answers(generated_solution: str) -> list:
        """
        Extract multiple final answers using simplified approach.
        Returns a list (possibly empty) of cleaned answers found inside all <final>...</final> tags.
        Falls back to the single simplified extraction if no tags are found.
        """
        if not generated_solution:
            return []

        # Find all <final>...</final> (non-greedy)
        raw_matches = re.findall(r'<final>(.*?)</final>', generated_solution, re.DOTALL | re.IGNORECASE)
        cleaned = []
        for m in raw_matches:
            c = EnhancedAnswerExtractor._clean_answer(m)
            if EnhancedAnswerExtractor._is_valid_answer(c):
                cleaned.append(c)

        if cleaned:
            return cleaned

        # Fallback: try to extract a single final using the simpler logic
        simple_answer = EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)
        if simple_answer:
            return [simple_answer]

        return []

    @staticmethod
    def extract_final_answer(generated_solution: str) -> str:
        """
        Backwards-compatible extractor that delegates to the simplified extraction.
        If multiple <final> tags exist, returns a JSON array string of cleaned answers.
        """
        if not generated_solution:
            return ""

        # Prefer explicit <final> tags (can be multiple)
        raw_matches = re.findall(r'<final>(.*?)</final>', generated_solution, re.DOTALL | re.IGNORECASE)
        if raw_matches:
            cleaned = []
            for m in raw_matches:
                c = EnhancedAnswerExtractor._clean_answer(m)
                if EnhancedAnswerExtractor._is_valid_answer(c):
                    cleaned.append(c)
            if not cleaned:
                # fall through to simpler single extraction
                return EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)
            if len(cleaned) == 1:
                return cleaned[0]
            try:
                return json.dumps(cleaned, ensure_ascii=False)
            except Exception:
                return " ||| ".join(cleaned)

        # No explicit finals: use simplified single extraction
        return EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)

# -------------------------
# Ollama-based Math Solver (ZERO-SHOT) — PROMPTS IN ENGLISH (problem is Lithuanian)
# -------------------------
class OllamaZeroShotMathSolver:
    """
    Zero-shot math solver that uses the Ollama daemon via the Python client.
    Prompts/instructions are written in English (as requested), but the solver is informed that
    the problem text will be in LITHUANIAN and is instructed to provide a concise final answer in LITHUANIAN.
    - Zero-shot prompt: forbids chain-of-thought and requests concise final <final> tag
    - Single-pass only.
    """

    def __init__(self, model_name="qwen3:8b"):
        """
        model_name: the Ollama model reference (e.g., "qwen3:8b")
        """
        self.model_name = model_name
        self.client = self._load_client()

    def _load_client(self):
        print(f"Initializing Ollama client for model: {self.model_name}")
        client = ollama.Client()
        return client

    def cleanup(self):
        if hasattr(self, 'client'):
            del self.client

    def _get_format_instructions(self, answer_type):
        """
        Zero-shot formatting instructions (WRITTEN IN ENGLISH).
        The model is explicitly informed that the problem text is in LITHUANIAN and that the concise final
        answer should be provided in LITHUANIAN. Examples/formats are shown in English but demonstrate
        that the final tag must contain the concise Lithuanian answer.
        """
        t = (answer_type or "symbolic").strip().lower()
        if t not in _CANONICAL_TYPES:
            t = "symbolic"

        base = (
            "CRITICAL FORMATTING REQUIREMENTS (ZERO-SHOT):\n"
            "- The math problem you will receive is written in LITHUANIAN. Provide your concise FINAL ANSWER in LITHUANIAN.\n"
            "- DO NOT provide chain-of-thought or step-by-step internal reasoning. Do NOT reveal private chain-of-thought.\n"
            "- If a very short justification is necessary, include a single-line 'Explanation:' with at most one sentence (in Lithuanian).\n"
            "- Always end with a machine-readable final tag <final>...</final> containing ONLY the concise final answer in LITHUANIAN (no extra reasoning inside the tag).\n"
        )

        if t == "proof":
            return base + (
                "FOR PROOFS (zero-shot):\n"
                "- Provide a concise conclusion or a one-sentence proof sketch (in LITHUANIAN) labeled 'Išvada:' or 'Įrodymas:' if needed. Do NOT provide a full step-by-step proof.\n"
                "Format example (English instructions):\n"
                "Final Answer (in Lithuanian):\n"
                "[Concise Lithuanian conclusion or one-sentence sketch]\n\n"
                "<final>[Concise Lithuanian conclusion]</final>\n"
            )
        elif t == "numerical":
            return base + (
                "FOR NUMERICAL RESULTS:\n"
                "- Provide the numeric result in exact form if available (fractions/radicals). Otherwise provide a decimal rounded to 4 decimal places.\n"
                "Format example (English instructions):\n"
                "Final Answer (in Lithuanian):\n"
                "[Numeric result]\n\n"
                "<final>[Numeric result]</final>\n"
            )
        else:  # symbolic
            return base + (
                "FOR SYMBOLIC RESULTS:\n"
                "- Provide the final symbolic expression (LaTeX allowed) in a concise form. The expression may be LaTeX but any wording should be in Lithuanian if used.\n"
                "Format example (English instructions):\n"
                "Final Answer (in Lithuanian):\n"
                "[Final symbolic expression]\n\n"
                "<final>[LaTeX expression or concise symbolic expression — in Lithuanian if words are used]</final>\n"
            )

    def _create_prompt(self, question, answer_type="symbolic"):
        format_instructions = self._get_format_instructions(answer_type)

        # The main instruction is in English (per your requirement), but it states the problem language and required answer language.
        prompt = f"""You are an expert mathematician. The following math problem is written in LITHUANIAN.
Provide a concise final answer in LITHUANIAN. DO NOT produce chain-of-thought or step-by-step internal reasoning.
If you include a justification, it must be one short sentence and labeled 'Explanation:' (in Lithuanian).

MATH PROBLEM (in Lithuanian):
{question}

{format_instructions}

Begin your concise answer (in Lithuanian) now:
"""
        return prompt

    def _generate_once(self, prompt_text: str, enable_thinking: bool = False) -> str:
        """
        Call ollama.Client.chat WITHOUT temperature/max_tokens.
        Handles several possible response shapes.
        """
        messages = [{"role": "user", "content": prompt_text}]

        # Call Ollama client.chat without temperature/max_tokens
        try:
            resp = self.client.chat(model=self.model_name, messages=messages, think=enable_thinking)
        except TypeError:
            # Some client versions have different signatures
            try:
                resp = self.client.chat(self.model_name, messages=messages, think=enable_thinking)
            except Exception:
                resp = self.client.chat(self.model_name, messages)

        # Normalize response into a string
        full_output = ""
        if isinstance(resp, dict):
            if 'message' in resp and isinstance(resp['message'], dict) and 'content' in resp['message']:
                full_output = resp['message']['content']
            elif 'choices' in resp and isinstance(resp['choices'], (list, tuple)) and resp['choices']:
                choice = resp['choices'][0]
                if isinstance(choice, dict) and 'message' in choice and isinstance(choice['message'], dict):
                    full_output = choice['message'].get('content', '')
                else:
                    full_output = str(choice)
            else:
                full_output = str(resp)
        else:
            # resp might be an object with .message.content
            try:
                full_output = resp.message.content
            except Exception:
                full_output = str(resp)

        if isinstance(full_output, bytes):
            full_output = full_output.decode('utf-8', errors='ignore')
        return (full_output or "").strip()

    def solve_problem(self, question, answer_type="symbolic"):
        """
        Zero-shot single-pass solve via Ollama.
        """
        prompt = self._create_prompt(question, answer_type)
        full_output = self._generate_once(prompt, enable_thinking=False)

        # No thinking parsing for zero-shot mode
        thinking_content = ""
        generated_answer = full_output
        final_tag_output = ""  # no second pass

        extracted_final_answer = EnhancedAnswerExtractor.extract_final_answer(generated_answer)

        return {
            "thinking_content": thinking_content,
            "generated_answer": generated_answer,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer
        }

# -------------------------
# Dataset Processor
# -------------------------
class DatasetProcessor:
    def __init__(self, solver: OllamaZeroShotMathSolver, failed_folder=None):
        self.solver = solver
        self.extractor = EnhancedAnswerExtractor()
        self.failed_folder = failed_folder or "failed_extractions"
        os.makedirs(self.failed_folder, exist_ok=True)

    def process_dataset(self, dataset_path, output_base_path, start_idx=0, end_idx=None,
                        folder_name=None, create_timestamped_folder=True):
        dataset = self._load_dataset(dataset_path)
        if end_idx is None:
            end_idx = len(dataset)

        output_folder = self._create_output_folder(output_base_path, folder_name, start_idx, end_idx, create_timestamped_folder)
        results = []

        print(f"Processing problems {start_idx} to {end_idx-1} ({end_idx-start_idx} total)")
        print(f"Output will be saved in: {output_folder}")

        for idx in tqdm(range(start_idx, min(end_idx, len(dataset)))):
            problem = dataset[idx]
            try:
                result_entry = self._process_single_problem(idx, problem)
                results.append(result_entry)
                self._print_progress(idx, result_entry)
                if (idx - start_idx + 1) % 10 == 0:
                    self._save_intermediate_results(results, output_folder, idx - start_idx + 1)
            except Exception as e:
                print(f"Error processing problem {idx+1}: {str(e)}")
                error_entry = self._create_error_entry(idx, problem, str(e))
                results.append(error_entry)

        final_output_path = self._save_final_results(results, output_folder, start_idx, end_idx)
        self._create_summary_file(results, output_folder, dataset_path, start_idx, end_idx)
        return results, output_folder

    def _create_output_folder(self, base_path, folder_name, start_idx, end_idx, add_timestamp):
        if folder_name is None:
            folder_name = f"results_{start_idx}_to_{end_idx-1}"
        if add_timestamp:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            folder_name = f"{folder_name}_{timestamp}"
        output_folder = os.path.join(base_path, folder_name)
        os.makedirs(output_folder, exist_ok=True)
        return output_folder

    def _load_dataset(self, dataset_path):
        dataset = []
        with open(dataset_path, 'r', encoding='utf-8') as f:
            for line in f:
                if line.strip():
                    dataset.append(json.loads(line))
        return dataset

    def _process_single_problem(self, idx, problem):
        language = problem.get("Language", "")
        chapter_num = problem.get("Chapter Number", "")
        example_num = problem.get("Example Number", "")
        question = problem.get("Question", "")
        exact_answer = problem.get("Exact Answer", "")
        raw_answer_type = problem.get("Answer Type", "") or ""

        # Normalize/infer canonical answer type: 'symbolic', 'numerical', 'proof'
        canonical_type = normalize_answer_type(raw_answer_type, question_text=question, exact_answer=exact_answer)

        # If exact_answer strongly indicates numeric, prefer numerical
        if exact_answer and re.search(r'\d', str(exact_answer)):
            # if exact contains LaTeX expressions like \frac or \sqrt, keep symbolic
            if re.search(r'\\frac|\\sqrt|\\boxed', str(exact_answer)):
                pass
            else:
                canonical_type = "numerical"

        print(f"\nProcessing Problem {idx+1}: Chapter {chapter_num}, Example {example_num}")
        print(f"Raw Answer Type: '{raw_answer_type}'  --> canonical: '{canonical_type}'")

        # Generate solution (use canonical_type) -- zero-shot, single pass
        solution_result = self.solver.solve_problem(question, answer_type=canonical_type)
        generated_answer = solution_result.get('generated_answer', '')
        thinking_content = solution_result.get('thinking_content', '')  # will be empty
        final_tag_output = solution_result.get('final_tag_output', '')

        # --- NEW extraction logic: keep both forms (single string & list) ---
        # Try to get all <final> answers first (preferred)
        all_finals = EnhancedAnswerExtractor.extract_all_final_answers(generated_answer)
        extracted_final_answer = ""
        extracted_final_answers = []

        # If none found in generated_answer, try final_tag_output (unused here)
        if not all_finals and final_tag_output:
            all_finals = EnhancedAnswerExtractor.extract_all_final_answers(final_tag_output)

        # If still none, fall back to single-answer extractor
        if not all_finals:
            single = EnhancedAnswerExtractor.extract_final_answer(generated_answer)
            if single:
                extracted_final_answer = single
                extracted_final_answers = [single]
            else:
                # try whole combined text (thinking + generated + final_tag)
                combined = "\n".join([thinking_content or "", generated_answer or "", final_tag_output or ""])
                single = EnhancedAnswerExtractor.extract_final_answer(combined)
                if single:
                    extracted_final_answer = single
                    extracted_final_answers = [single]
                else:
                    extracted_final_answer = ""
                    extracted_final_answers = []
        else:
            # we have one or more finals
            extracted_final_answers = all_finals
            if len(all_finals) == 1:
                extracted_final_answer = all_finals[0]
            else:
                # store a machine-readable concatenation: JSON array string
                try:
                    extracted_final_answer = json.dumps(all_finals, ensure_ascii=False)
                except Exception:
                    extracted_final_answer = " ||| ".join(all_finals)

        # If still empty, save a failed extraction example for inspection
        if not extracted_final_answer:
            fname = f"failed_{idx}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
            fpath = os.path.join(self.failed_folder, fname)
            with open(fpath, 'w', encoding='utf-8') as f:
                json.dump({
                    "index": idx,
                    "question": question,
                    "generated_answer": generated_answer,
                    "thinking_content": thinking_content,
                    "final_tag_output": final_tag_output,
                    "exact_answer": exact_answer,
                    "canonical_type": canonical_type,
                    "extracted_final_answer": extracted_final_answer,
                    "extracted_final_answers": extracted_final_answers
                }, f, ensure_ascii=False, indent=2)
            print(f"Saved failed extraction example to {fpath}")

        result_entry = {
            "problem_index": idx,
            "language": language,
            "chapter_number": chapter_num,
            "example_number": example_num,
            "question": question,
            "generated_answer": generated_answer,
            "thinking_content": thinking_content,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer,       # string (or JSON array string)
            "extracted_final_answers": extracted_final_answers,     # list (empty / single / many)
            "exact_answer": exact_answer,
            "raw_answer_type": raw_answer_type,
            "canonical_answer_type": canonical_type,
            "evaluation_method": problem.get("Evaluation Method", "")
        }
        return result_entry

    def _create_error_entry(self, idx, problem, error_msg):
        return {
            "problem_index": idx,
            "language": problem.get("Language", ""),
            "chapter_number": problem.get("Chapter Number", ""),
            "example_number": problem.get("Example Number", ""),
            "question": problem.get("Question", ""),
            "generated_answer": f"ERROR: {error_msg}",
            "thinking_content": "",
            "final_tag_output": "",
            "extracted_final_answer": "",
            "extracted_final_answers": [],
            "exact_answer": problem.get("Exact Answer", ""),
            "raw_answer_type": problem.get("Answer Type", ""),
            "canonical_answer_type": "",
            "evaluation_method": problem.get("Evaluation Method", "")
        }

    def _print_progress(self, idx, result_entry):
        print(f"Generated answer length: {len(result_entry['generated_answer']) if result_entry['generated_answer'] else 0}")
        print(f"Extracted final answer: '{result_entry['extracted_final_answer']}'")
        print(f"Extracted final answers (list): {result_entry.get('extracted_final_answers', [])}")
        print(f"Expected answer: '{result_entry['exact_answer']}'")

    def _save_intermediate_results(self, results, output_folder, count):
        temp_filename = f'intermediate_results_{count}.json'
        temp_output_path = os.path.join(output_folder, temp_filename)
        with open(temp_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"Saved intermediate results to {temp_output_path}")

    def _save_final_results(self, results, output_folder, start_idx, end_idx):
        final_filename = f'final_results_{start_idx}_to_{end_idx-1}.json'
        final_output_path = os.path.join(output_folder, final_filename)
        with open(final_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"\nProcessing complete. Results saved to {final_output_path}")
        print(f"Total problems processed: {len(results)}")
        return final_output_path

    def _create_summary_file(self, results, output_folder, dataset_path, start_idx, end_idx):
        successful_extractions = len([r for r in results if r.get('extracted_final_answer', '').strip()])
        summary_data = {
            "processing_info": {
                "dataset_path": dataset_path,
                "start_index": start_idx,
                "end_index": end_idx - 1,
                "total_processed": len(results),
                "processing_timestamp": datetime.now().isoformat(),
                "output_folder": output_folder
            },
            "statistics": {
                "successful_problems": len([r for r in results if not r['generated_answer'].startswith('ERROR:')]),
                "failed_problems": len([r for r in results if r['generated_answer'].startswith('ERROR:')]),
                "successful_extractions": successful_extractions,
                "extraction_success_rate": f"{(successful_extractions/len(results)*100):.1f}%" if results else "0%",
                "average_answer_length": sum(len(r['generated_answer']) for r in results) / len(results) if results else 0,
                "chapters_processed": list(set(r['chapter_number'] for r in results if r['chapter_number'])),
                "raw_answer_types": list(set(r['raw_answer_type'] for r in results if r.get('raw_answer_type'))),
                "canonical_answer_types": list(set(r['canonical_answer_type'] for r in results if r.get('canonical_answer_type')))
            }
        }
        summary_path = os.path.join(output_folder, 'processing_summary.json')
        with open(summary_path, 'w', encoding='utf-8') as f:
            json.dump(summary_data, f, ensure_ascii=False, indent=2)
        print(f"Processing summary saved to {summary_path}")
        print(f"Answer extraction success rate: {summary_data['statistics']['extraction_success_rate']}")

# -------------------------
# Main (example usage)
# -------------------------
def main():
    # NOTE: update dataset_path and output_base_path to match your environment.
    # Ensure your dataset's "Question" fields are in Lithuanian.
    dataset_path = "/kaggle/input/nctb-dataset/Lithuanian_Final_Corpus.jsonl"
    output_base_path = "/kaggle/working/"

    # Use the Ollama zero-shot solver (ensure the specified model is available in Ollama)
    solver = OllamaZeroShotMathSolver(model_name="qwen3:8b")
    processor = DatasetProcessor(solver, failed_folder=os.path.join(output_base_path, "failed_extractions"))

    # For quick testing, process only first few problems
    results, out_folder = processor.process_dataset(
        dataset_path,
        output_base_path,
        start_idx=0,
        end_idx=100  # smaller quick test
    )
    print("Done. Results saved to:", out_folder)

if __name__ == "__main__":
    main()


# **Turkish**

In [None]:
import json
import os
import re
from datetime import datetime
from tqdm import tqdm

# Ollama client
try:
    import ollama
except Exception as e:
    raise ImportError("The 'ollama' package is required. Install it and make sure the Ollama daemon is running.") from e

# -------------------------
# Answer type normalization
# -------------------------
# Canonical set
_CANONICAL_TYPES = {"symbolic", "numerical", "proof"}

# mapping common noisy labels to canonical (English + Turkish variants)
_ANSWER_TYPE_MAP_SIMPLE = {
    # Proof variants (English -> Turkish)
    "proof": "proof", "prove": "proof",
    "kanıt": "proof", "kanıtla": "proof", "kanıtlamak": "proof", "ispat": "proof", "kanıt göster": "proof", "göster": "proof",

    # Numerical variants
    "numerical": "numerical", "numeric": "numerical", "number": "numerical", "calculation": "numerical",
    "sayısal": "numerical", "sayı": "numerical", "hesaplama": "numerical", "hesapla": "numerical", "bul": "numerical",

    # Symbolic variants
    "symbolic": "symbolic", "symbol": "symbolic", "equation": "symbolic", "algebraic": "symbolic",
    "sembolik": "symbolic", "denklem": "symbolic", "yazılım": "symbolic", "cebirsel": "symbolic",
}


def normalize_answer_type(raw_label: str, question_text: str = "", exact_answer: str = "") -> str:
    """
    Normalize a dataset label to one of: 'symbolic', 'numerical', 'proof'.
    Heuristics expanded to handle Turkish phrases and labels (plus English).
    """
    # Helper to clean label
    def _clean_label(lbl: str) -> str:
        if not lbl:
            return ""
        s = lbl.strip().lower()
        # keep unicode letters (Latin-1 and extended), digits and spaces (includes Turkish chars)
        s = re.sub(r'[^0-9\w\s\u00C0-\u017F-]', ' ', s, flags=re.UNICODE)
        s = re.sub(r'\s+', ' ', s).strip()
        return s

    s = _clean_label(raw_label)

    # direct mapping
    if s in _ANSWER_TYPE_MAP_SIMPLE:
        return _ANSWER_TYPE_MAP_SIMPLE[s]

    # partial matches (allow English or Turkish keywords appearing)
    for k, v in _ANSWER_TYPE_MAP_SIMPLE.items():
        if k in s:
            return v

    # heuristics using question or exact_answer (both English & Turkish checks)
    q = (question_text or "").lower()
    a = (exact_answer or "").lower()

    # Proof indicators (English + Turkish)
    if re.search(r'\b(prove|show that|prove that|proof)\b', q) or re.search(r'\b(kanıtla|kanıt|kanıtlamak|göster|ispat|kanıt göster)\b', q, flags=re.IGNORECASE):
        return "proof"

    # logarithm related: English/Turkish
    if re.search(r'\b(log|ln|logarithm)\b', q) or re.search(r'\b(log|ln|logaritma)\b', q) or re.search(r'\blog\b', a):
        # prefer numerical if exact answer contains digits
        if re.search(r'\d', a):
            return "numerical"
        return "symbolic"

    # set theory indicators: English / Turkish (küme = set)
    if re.search(r'\b(set|subset|union|intersection)\b', q) or re.search(r'\b(küme|altküme|birleşim|kesişim|kesi\u015Fim)\b', q, flags=re.IGNORECASE):
        return "symbolic"

    # units (English + Turkish)
    if re.search(r'\bmeter\b|\bm\b|\bcm\b|\bkg\b|\bliter\b|\bl\b|\bkm\b|\bmile\b', q + " " + a) or \
       re.search(r'\bmetre\b|\bcm\b|\bkg\b|\blitre\b|\bl\b|\bkm\b', q + " " + a, flags=re.IGNORECASE):
        return "numerical"

    # equation-solving heuristics (English + Turkish)
    if re.search(r'\bequation\b|solve for|solve|= x|x\s*=', q) or re.search(r'\b(denklem|çöz(ün|ün|mek)|çöz|çözün)\b', q, flags=re.IGNORECASE):
        return "symbolic"

    # trig / geometry indicators (English + Turkish)
    if re.search(r'\b(trig|sin|cos|tan|geometry|triangle|circle)\b', q) or re.search(r'\b(trigonometr|sin|cos|tan|geometri|üçgen|daire|çember)\b', q, flags=re.IGNORECASE):
        if re.search(r'\d', a):
            return "numerical"
        return "symbolic"

    # numeric indicators (English + Turkish) -> numerical
    if re.search(r'\d', q) or re.search(r'find the value|compute|calculate|evaluate', q) or \
       re.search(r'değerini bul|bulun|hesapla|hesaplayın|hesaplama|sayısını bulun|kaç)', q, flags=re.IGNORECASE):
        return "numerical"

    # If exact_answer looks numeric, prefer numerical (but check for LaTeX)
    if re.search(r'[0-9]|\\frac|\\sqrt', a):
        if re.search(r'\\frac|\\sqrt|\{|\\', a):
            return "symbolic"
        return "numerical"

    # fallback
    return "symbolic"

# -------------------------
# EnhancedAnswerExtractor (adapted to behave like SimplifiedAnswerExtractor)
# and now handles Turkish markers as well
# -------------------------
class EnhancedAnswerExtractor:
    """
    Adapter that implements the simplified extraction behavior,
    extended to recognize Turkish formatting/conclusion words as well.
    Provides:
      - extract_final_answer(text) -> str
      - extract_all_final_answers(generated_solution) -> list
    and internal helpers _clean_answer and _is_valid_answer.
    """

    @staticmethod
    def _clean_answer(answer: str) -> str:
        if not answer:
            return ""
        # Start with a trimmed answer and normalize whitespace
        a = answer.strip()
        # collapse whitespace
        a = re.sub(r'\s+', ' ', a)

        # remove outer $$ if present (multiline)
        a = re.sub(r'^\$\$(.*)\$\$$', r'\1', a, flags=re.DOTALL)
        # remove surrounding single $ if the whole string is wrapped
        a = re.sub(r'^\$(.*)\$$', r'\1', a, flags=re.DOTALL)

        # strip standalone leading/trailing $ characters and spaces
        a = a.strip('$ ')

        # Remove common prefixes (kept after stripping $ to catch cases like "$Final Answer: ...$")
        prefixes_to_remove = [
            # English
            r'Final Answer:\s*', r'Answer:\s*', r'The answer is\s*',
            r'Therefore,?\s*', r'Thus,?\s*', r'Hence,?\s*', r'So,?\s*', r'∴\s*',

            # Turkish
            r'Final Cevap[:\s]*', r'Final cevap[:\s]*', r'Sonuç[:\s]*', r'Son cevap[:\s]*',
            r'Cevap[:\s]*', r'Cevap olarak[:\s]*', r'Netice[:\s]*',
            r'Dolayısıyla,?\s*', r'Böylece,?\s*', r'Bu nedenle,?\s*', r'O halde,?\s*', r'Öyleyse,?\s*',
        ]
        for prefix in prefixes_to_remove:
            a = re.sub(f'^{prefix}', '', a, flags=re.IGNORECASE)

        # remove various boxed wrappers with optional backslashes and optional surrounding $
        # e.g. $$\boxed{...}$$, $\boxed{...}$, \boxed{...}
        a = re.sub(r'\$?\s*(?:\\){0,3}boxed\{([^}]*)\}\s*\$?', r'\1', a, flags=re.DOTALL | re.IGNORECASE)
        # also ensure plain \boxed{...} is unwrapped (redundant but safe)
        a = re.sub(r'\\boxed\{([^}]*)\}', r'\1', a)

        # convert common LaTeX to readable forms
        a = re.sub(r'\\frac\{([^}]*)\}\{([^}]*)\}', r'(\1)/(\2)', a)
        a = re.sub(r'\\sqrt\{([^}]*)\}', r'√(\1)', a)

        # remove bold/italic wrappers
        a = re.sub(r'\*\*([^*]+)\*\*', r'\1', a)
        a = re.sub(r'\*([^*]+)\*', r'\1', a)

        # collapse multiple spaces again (in case replacements introduced them)
        a = re.sub(r'\s+', ' ', a).strip()

        # trim trailing punctuation/words
        a = a.rstrip(' \t\n.,;:')

        # remove trailing words like "proved" or "the answer" (English & Turkish)
        a = re.sub(r'\b(proved|completed|finished|the answer|kanıtlandı|kanıt|cevap|sonuç)\b[.\s]*$', '', a, flags=re.IGNORECASE).strip()

        return a

    @staticmethod
    def _is_valid_answer(answer: str) -> bool:
        if not answer:
            return False
        # not only punctuation
        if re.match(r'^[\W_]+$', answer):
            return False
        # contains at least some alphanumeric characters (or common math symbols)
        if not re.search(r'[0-9A-Za-z\u00C0-\u017F\\]', answer):
            return False
        # length sanity
        if len(answer) > 1000:
            return False
        # avoid answers that end with only concluding words (English & Turkish)
        blacklist = [r'therefore$', r'thus$', r'hence$', r'so$', r'we get$', r'we have$', r'dolayısıyla$', r'böylece$', r'bu nedenle$']
        for b in blacklist:
            if re.search(b, answer.strip(), flags=re.IGNORECASE):
                return False
        return True

    @staticmethod
    def extract_final_answer_simple(text: str) -> str:
        """
        Primary method: Extract final answer using the last lines approach,
        with fallback to pattern-based extraction.
        """
        if not text:
            return ""

        # Clean the text and split into lines
        lines = [line.strip() for line in text.strip().split('\n') if line.strip()]

        # Strategy 1: Try last two lines combined
        if len(lines) >= 2:
            last_two = ' '.join(lines[-2:])
            cleaned = EnhancedAnswerExtractor._clean_answer(last_two)
            if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned

        # Strategy 2: Try last line only
        if lines:
            cleaned = EnhancedAnswerExtractor._clean_answer(lines[-1])
            if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned

        # Strategy 3: Try last 3 lines if we have them (sometimes answers span multiple lines)
        if len(lines) >= 3:
            last_three = ' '.join(lines[-3:])
            cleaned = EnhancedAnswerExtractor._clean_answer(last_three)
            if EnhancedAnswerExtractor._is_valid_answer(cleaned) and len(cleaned) < 500:
                return cleaned

        # Strategy 4: Fallback to pattern-based extraction
        return EnhancedAnswerExtractor._extract_with_patterns(text)

    @staticmethod
    def _extract_with_patterns(text: str) -> str:
        """
        Pattern-based extraction as fallback method.
        Recognizes both English and Turkish answer/conclusion patterns.
        """
        # Check for <final> tags
        raw_matches = re.findall(r'<final>(.*?)</final>', text, re.DOTALL | re.IGNORECASE)
        for m in raw_matches:
            c = EnhancedAnswerExtractor._clean_answer(m)
            if EnhancedAnswerExtractor._is_valid_answer(c):
                return c

        # Try common answer patterns (English + Turkish)
        patterns = [
            r'\*\*Final Answer:\*\*\s*(.+?)(?:\n|$)',
            r'Final Answer:\s*(.+?)(?:\n|$)',
            r'Final Answer\s*[:\-]\s*(.+?)(?:\n|$)',

            # Turkish patterns
            r'\*\*Sonuç:\*\*\s*(.+?)(?:\n|$)',
            r'Sonuç[:\s]*(.+?)(?:\n|$)',
            r'Son cevap[:\s]*(.+?)(?:\n|$)',
            r'Cevap[:\s]*(.+?)(?:\n|$)',
            r'Nihai cevap[:\s]*(.+?)(?:\n|$)',

            # English conclusion markers
            r'Therefore[,:\s]*(.+?)(?:\.|$|\n)',
            r'Hence[,:\s]*(.+?)(?:\.|$|\n)',
            r'Thus[,:\s]*(.+?)(?:\.|$|\n)',
            r'∴\s*(.+?)(?:\.|$|\n)',

            # Turkish conclusion markers
            r'Dolayısıyla[,:\s]*(.+?)(?:\.|$|\n)',
            r'Böylece[,:\s]*(.+?)(?:\.|$|\n)',
            r'Bu nedenle[,:\s]*(.+?)(?:\.|$|\n)',
            r'O halde[,:\s]*(.+?)(?:\.|$|\n)',
            r'Öyleyse[,:\s]*(.+?)(?:\.|$|\n)',

            r'Answer[:\s]*(.+?)(?:\n|$)',
            r'Result[:\s]*(.+?)(?:\n|$)',
            r'Solution[:\s]*(.+?)(?:\n|$)',
        ]

        for pat in patterns:
            matches = re.findall(pat, text, re.MULTILINE | re.DOTALL | re.IGNORECASE)
            if matches:
                answer = matches[-1].strip()
                cleaned = EnhancedAnswerExtractor._clean_answer(answer)
                if EnhancedAnswerExtractor._is_valid_answer(cleaned):
                    return cleaned

        # Try boxed math expressions (LaTeX boxed)
        boxed_patterns = [
            r'\$\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$\$',
            r'\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$',
            r'(?:\\){0,3}boxed\{(.+?)\}',
        ]

        for pat in boxed_patterns:
            m = re.search(pat, text, re.DOTALL | re.IGNORECASE)
            if m:
                cand = EnhancedAnswerExtractor._clean_answer(m.group(1))
                if EnhancedAnswerExtractor._is_valid_answer(cand):
                    return cand

        return ""

    @staticmethod
    def extract_all_final_answers(generated_solution: str) -> list:
        """
        Extract multiple final answers using simplified approach.
        Returns a list (possibly empty) of cleaned answers found inside all <final>...</final> tags.
        Falls back to the single simplified extraction if no tags are found.
        """
        if not generated_solution:
            return []

        # Find all <final>...</final> (non-greedy)
        raw_matches = re.findall(r'<final>(.*?)</final>', generated_solution, re.DOTALL | re.IGNORECASE)
        cleaned = []
        for m in raw_matches:
            c = EnhancedAnswerExtractor._clean_answer(m)
            if EnhancedAnswerExtractor._is_valid_answer(c):
                cleaned.append(c)

        if cleaned:
            return cleaned

        # Fallback: try to extract a single final using the simpler logic
        simple_answer = EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)
        if simple_answer:
            return [simple_answer]

        return []

    @staticmethod
    def extract_final_answer(generated_solution: str) -> str:
        """
        Backwards-compatible extractor that delegates to the simplified extraction.
        If multiple <final> tags exist, returns a JSON array string of cleaned answers.
        """
        if not generated_solution:
            return ""

        # Prefer explicit <final> tags (can be multiple)
        raw_matches = re.findall(r'<final>(.*?)</final>', generated_solution, re.DOTALL | re.IGNORECASE)
        if raw_matches:
            cleaned = []
            for m in raw_matches:
                c = EnhancedAnswerExtractor._clean_answer(m)
                if EnhancedAnswerExtractor._is_valid_answer(c):
                    cleaned.append(c)
            if not cleaned:
                # fall through to simpler single extraction
                return EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)
            if len(cleaned) == 1:
                return cleaned[0]
            try:
                return json.dumps(cleaned, ensure_ascii=False)
            except Exception:
                return " ||| ".join(cleaned)

        # No explicit finals: use simplified single extraction
        return EnhancedAnswerExtractor.extract_final_answer_simple(generated_solution)

# -------------------------
# Ollama-based Math Solver (ZERO-SHOT) — PROMPTS IN ENGLISH (problem is Turkish)
# -------------------------
class OllamaZeroShotMathSolver:
    """
    Zero-shot math solver that uses the Ollama daemon via the Python client.
    Prompts/instructions are written in English (as requested), but the solver is informed that
    the problem text will be in TURKISH and is instructed to provide a concise final answer in TURKISH.
    - Zero-shot prompt: forbids chain-of-thought and requests concise final <final> tag
    - Single-pass only.
    """

    def __init__(self, model_name="qwen3:8b"):
        """
        model_name: the Ollama model reference (e.g., "qwen3:8b")
        """
        self.model_name = model_name
        self.client = self._load_client()

    def _load_client(self):
        print(f"Initializing Ollama client for model: {self.model_name}")
        client = ollama.Client()
        return client

    def cleanup(self):
        if hasattr(self, 'client'):
            del self.client

    def _get_format_instructions(self, answer_type):
        """
        Zero-shot formatting instructions (WRITTEN IN ENGLISH).
        The model is explicitly informed that the problem text is in TURKISH and that the concise final
        answer should be provided in TURKISH. Examples/formats are shown in English but demonstrate
        that the final tag must contain the concise Turkish answer.
        """
        t = (answer_type or "symbolic").strip().lower()
        if t not in _CANONICAL_TYPES:
            t = "symbolic"

        base = (
            "CRITICAL FORMATTING REQUIREMENTS (ZERO-SHOT):\n"
            "- The math problem you will receive is written in TURKISH. Provide your concise FINAL ANSWER in TURKISH.\n"
            "- DO NOT provide chain-of-thought or step-by-step internal reasoning. Do NOT reveal private chain-of-thought.\n"
            "- If a very short justification is necessary, include a single-line 'Explanation:' with at most one sentence (in Turkish).\n"
            "- Always end with a machine-readable final tag <final>...</final> containing ONLY the concise final answer in TURKISH (no extra reasoning inside the tag).\n"
        )

        if t == "proof":
            return base + (
                "FOR PROOFS (zero-shot):\n"
                "- Provide a concise conclusion or a one-sentence proof sketch (in TURKISH) labeled 'Sonuç:' or 'İspat özeti:' if needed. Do NOT provide a full step-by-step proof.\n"
                "Format example (English instructions):\n"
                "Final Answer (in Turkish):\n"
                "[Concise Turkish conclusion or one-sentence sketch]\n\n"
                "<final>[Concise Turkish conclusion]</final>\n"
            )
        elif t == "numerical":
            return base + (
                "FOR NUMERICAL RESULTS:\n"
                "- Provide the numeric result in exact form if available (fractions/radicals). Otherwise provide a decimal rounded to 4 decimal places.\n"
                "Format example (English instructions):\n"
                "Final Answer (in Turkish):\n"
                "[Numeric result]\n\n"
                "<final>[Numeric result]</final>\n"
            )
        else:  # symbolic
            return base + (
                "FOR SYMBOLIC RESULTS:\n"
                "- Provide the final symbolic expression (LaTeX allowed) in a concise form. The expression may be LaTeX but any wording should be in Turkish if used.\n"
                "Format example (English instructions):\n"
                "Final Answer (in Turkish):\n"
                "[Final symbolic expression]\n\n"
                "<final>[LaTeX expression or concise symbolic expression — in Turkish if words are used]</final>\n"
            )

    def _create_prompt(self, question, answer_type="symbolic"):
        format_instructions = self._get_format_instructions(answer_type)

        # The main instruction is in English (per your requirement), but it states the problem language and required answer language.
        prompt = f"""You are an expert mathematician. The following math problem is written in TURKISH.
Provide a concise final answer in TURKISH. DO NOT produce chain-of-thought or step-by-step internal reasoning.
If you include a justification, it must be one short sentence and labeled 'Explanation:' (in Turkish).

MATH PROBLEM (in Turkish):
{question}

{format_instructions}

Begin your concise answer (in Turkish) now:
"""
        return prompt

    def _generate_once(self, prompt_text: str, enable_thinking: bool = False) -> str:
        """
        Call ollama.Client.chat WITHOUT temperature/max_tokens.
        Handles several possible response shapes.
        """
        messages = [{"role": "user", "content": prompt_text}]

        # Call Ollama client.chat without temperature/max_tokens
        try:
            resp = self.client.chat(model=self.model_name, messages=messages, think=enable_thinking)
        except TypeError:
            # Some client versions have different signatures
            try:
                resp = self.client.chat(self.model_name, messages=messages, think=enable_thinking)
            except Exception:
                resp = self.client.chat(self.model_name, messages)

        # Normalize response into a string
        full_output = ""
        if isinstance(resp, dict):
            if 'message' in resp and isinstance(resp['message'], dict) and 'content' in resp['message']:
                full_output = resp['message']['content']
            elif 'choices' in resp and isinstance(resp['choices'], (list, tuple)) and resp['choices']:
                choice = resp['choices'][0]
                if isinstance(choice, dict) and 'message' in choice and isinstance(choice['message'], dict):
                    full_output = choice['message'].get('content', '')
                else:
                    full_output = str(choice)
            else:
                full_output = str(resp)
        else:
            # resp might be an object with .message.content
            try:
                full_output = resp.message.content
            except Exception:
                full_output = str(resp)

        if isinstance(full_output, bytes):
            full_output = full_output.decode('utf-8', errors='ignore')
        return (full_output or "").strip()

    def solve_problem(self, question, answer_type="symbolic"):
        """
        Zero-shot single-pass solve via Ollama.
        """
        prompt = self._create_prompt(question, answer_type)
        full_output = self._generate_once(prompt, enable_thinking=False)

        # No thinking parsing for zero-shot mode
        thinking_content = ""
        generated_answer = full_output
        final_tag_output = ""  # no second pass

        extracted_final_answer = EnhancedAnswerExtractor.extract_final_answer(generated_answer)

        return {
            "thinking_content": thinking_content,
            "generated_answer": generated_answer,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer
        }

# -------------------------
# Dataset Processor
# -------------------------
class DatasetProcessor:
    def __init__(self, solver: OllamaZeroShotMathSolver, failed_folder=None):
        self.solver = solver
        self.extractor = EnhancedAnswerExtractor()
        self.failed_folder = failed_folder or "failed_extractions"
        os.makedirs(self.failed_folder, exist_ok=True)

    def process_dataset(self, dataset_path, output_base_path, start_idx=0, end_idx=None,
                        folder_name=None, create_timestamped_folder=True):
        dataset = self._load_dataset(dataset_path)
        if end_idx is None:
            end_idx = len(dataset)

        output_folder = self._create_output_folder(output_base_path, folder_name, start_idx, end_idx, create_timestamped_folder)
        results = []

        print(f"Processing problems {start_idx} to {end_idx-1} ({end_idx-start_idx} total)")
        print(f"Output will be saved in: {output_folder}")

        for idx in tqdm(range(start_idx, min(end_idx, len(dataset)))):
            problem = dataset[idx]
            try:
                result_entry = self._process_single_problem(idx, problem)
                results.append(result_entry)
                self._print_progress(idx, result_entry)
                if (idx - start_idx + 1) % 10 == 0:
                    self._save_intermediate_results(results, output_folder, idx - start_idx + 1)
            except Exception as e:
                print(f"Error processing problem {idx+1}: {str(e)}")
                error_entry = self._create_error_entry(idx, problem, str(e))
                results.append(error_entry)

        final_output_path = self._save_final_results(results, output_folder, start_idx, end_idx)
        self._create_summary_file(results, output_folder, dataset_path, start_idx, end_idx)
        return results, output_folder

    def _create_output_folder(self, base_path, folder_name, start_idx, end_idx, add_timestamp):
        if folder_name is None:
            folder_name = f"results_{start_idx}_to_{end_idx-1}"
        if add_timestamp:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            folder_name = f"{folder_name}_{timestamp}"
        output_folder = os.path.join(base_path, folder_name)
        os.makedirs(output_folder, exist_ok=True)
        return output_folder

    def _load_dataset(self, dataset_path):
        dataset = []
        with open(dataset_path, 'r', encoding='utf-8') as f:
            for line in f:
                if line.strip():
                    dataset.append(json.loads(line))
        return dataset

    def _process_single_problem(self, idx, problem):
        language = problem.get("Language", "")
        chapter_num = problem.get("Chapter Number", "")
        example_num = problem.get("Example Number", "")
        question = problem.get("Question", "")
        exact_answer = problem.get("Exact Answer", "")
        raw_answer_type = problem.get("Answer Type", "") or ""

        # Normalize/infer canonical answer type: 'symbolic', 'numerical', 'proof'
        canonical_type = normalize_answer_type(raw_answer_type, question_text=question, exact_answer=exact_answer)

        # If exact_answer strongly indicates numeric, prefer numerical
        if exact_answer and re.search(r'\d', str(exact_answer)):
            # if exact contains LaTeX expressions like \frac or \sqrt, keep symbolic
            if re.search(r'\\frac|\\sqrt|\\boxed', str(exact_answer)):
                pass
            else:
                canonical_type = "numerical"

        print(f"\nProcessing Problem {idx+1}: Chapter {chapter_num}, Example {example_num}")
        print(f"Raw Answer Type: '{raw_answer_type}'  --> canonical: '{canonical_type}'")

        # Generate solution (use canonical_type) -- zero-shot, single pass
        solution_result = self.solver.solve_problem(question, answer_type=canonical_type)
        generated_answer = solution_result.get('generated_answer', '')
        thinking_content = solution_result.get('thinking_content', '')  # will be empty
        final_tag_output = solution_result.get('final_tag_output', '')

        # --- NEW extraction logic: keep both forms (single string & list) ---
        # Try to get all <final> answers first (preferred)
        all_finals = EnhancedAnswerExtractor.extract_all_final_answers(generated_answer)
        extracted_final_answer = ""
        extracted_final_answers = []

        # If none found in generated_answer, try final_tag_output (unused here)
        if not all_finals and final_tag_output:
            all_finals = EnhancedAnswerExtractor.extract_all_final_answers(final_tag_output)

        # If still none, fall back to single-answer extractor
        if not all_finals:
            single = EnhancedAnswerExtractor.extract_final_answer(generated_answer)
            if single:
                extracted_final_answer = single
                extracted_final_answers = [single]
            else:
                # try whole combined text (thinking + generated + final_tag)
                combined = "\n".join([thinking_content or "", generated_answer or "", final_tag_output or ""])
                single = EnhancedAnswerExtractor.extract_final_answer(combined)
                if single:
                    extracted_final_answer = single
                    extracted_final_answers = [single]
                else:
                    extracted_final_answer = ""
                    extracted_final_answers = []
        else:
            # we have one or more finals
            extracted_final_answers = all_finals
            if len(all_finals) == 1:
                extracted_final_answer = all_finals[0]
            else:
                # store a machine-readable concatenation: JSON array string
                try:
                    extracted_final_answer = json.dumps(all_finals, ensure_ascii=False)
                except Exception:
                    extracted_final_answer = " ||| ".join(all_finals)

        # If still empty, save a failed extraction example for inspection
        if not extracted_final_answer:
            fname = f"failed_{idx}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
            fpath = os.path.join(self.failed_folder, fname)
            with open(fpath, 'w', encoding='utf-8') as f:
                json.dump({
                    "index": idx,
                    "question": question,
                    "generated_answer": generated_answer,
                    "thinking_content": thinking_content,
                    "final_tag_output": final_tag_output,
                    "exact_answer": exact_answer,
                    "canonical_type": canonical_type,
                    "extracted_final_answer": extracted_final_answer,
                    "extracted_final_answers": extracted_final_answers
                }, f, ensure_ascii=False, indent=2)
            print(f"Saved failed extraction example to {fpath}")

        result_entry = {
            "problem_index": idx,
            "language": language,
            "chapter_number": chapter_num,
            "example_number": example_num,
            "question": question,
            "generated_answer": generated_answer,
            "thinking_content": thinking_content,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer,       # string (or JSON array string)
            "extracted_final_answers": extracted_final_answers,     # list (empty / single / many)
            "exact_answer": exact_answer,
            "raw_answer_type": raw_answer_type,
            "canonical_answer_type": canonical_type,
            "evaluation_method": problem.get("Evaluation Method", "")
        }
        return result_entry

    def _create_error_entry(self, idx, problem, error_msg):
        return {
            "problem_index": idx,
            "language": problem.get("Language", ""),
            "chapter_number": problem.get("Chapter Number", ""),
            "example_number": problem.get("Example Number", ""),
            "question": problem.get("Question", ""),
            "generated_answer": f"ERROR: {error_msg}",
            "thinking_content": "",
            "final_tag_output": "",
            "extracted_final_answer": "",
            "extracted_final_answers": [],
            "exact_answer": problem.get("Exact Answer", ""),
            "raw_answer_type": problem.get("Answer Type", ""),
            "canonical_answer_type": "",
            "evaluation_method": problem.get("Evaluation Method", "")
        }

    def _print_progress(self, idx, result_entry):
        print(f"Generated answer length: {len(result_entry['generated_answer']) if result_entry['generated_answer'] else 0}")
        print(f"Extracted final answer: '{result_entry['extracted_final_answer']}'")
        print(f"Extracted final answers (list): {result_entry.get('extracted_final_answers', [])}")
        print(f"Expected answer: '{result_entry['exact_answer']}'")

    def _save_intermediate_results(self, results, output_folder, count):
        temp_filename = f'intermediate_results_{count}.json'
        temp_output_path = os.path.join(output_folder, temp_filename)
        with open(temp_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"Saved intermediate results to {temp_output_path}")

    def _save_final_results(self, results, output_folder, start_idx, end_idx):
        final_filename = f'final_results_{start_idx}_to_{end_idx-1}.json'
        final_output_path = os.path.join(output_folder, final_filename)
        with open(final_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"\nProcessing complete. Results saved to {final_output_path}")
        print(f"Total problems processed: {len(results)}")
        return final_output_path

    def _create_summary_file(self, results, output_folder, dataset_path, start_idx, end_idx):
        successful_extractions = len([r for r in results if r.get('extracted_final_answer', '').strip()])
        summary_data = {
            "processing_info": {
                "dataset_path": dataset_path,
                "start_index": start_idx,
                "end_index": end_idx - 1,
                "total_processed": len(results),
                "processing_timestamp": datetime.now().isoformat(),
                "output_folder": output_folder
            },
            "statistics": {
                "successful_problems": len([r for r in results if not r['generated_answer'].startswith('ERROR:')]),
                "failed_problems": len([r for r in results if r['generated_answer'].startswith('ERROR:')]),
                "successful_extractions": successful_extractions,
                "extraction_success_rate": f"{(successful_extractions/len(results)*100):.1f}%" if results else "0%",
                "average_answer_length": sum(len(r['generated_answer']) for r in results) / len(results) if results else 0,
                "chapters_processed": list(set(r['chapter_number'] for r in results if r['chapter_number'])),
                "raw_answer_types": list(set(r['raw_answer_type'] for r in results if r.get('raw_answer_type'))),
                "canonical_answer_types": list(set(r['canonical_answer_type'] for r in results if r.get('canonical_answer_type')))
            }
        }
        summary_path = os.path.join(output_folder, 'processing_summary.json')
        with open(summary_path, 'w', encoding='utf-8') as f:
            json.dump(summary_data, f, ensure_ascii=False, indent=2)
        print(f"Processing summary saved to {summary_path}")
        print(f"Answer extraction success rate: {summary_data['statistics']['extraction_success_rate']}")

# -------------------------
# Main (example usage)
# -------------------------
def main():
    # NOTE: update dataset_path and output_base_path to match your environment.
    # Ensure your dataset's "Question" fields are in Turkish.
    dataset_path = "/kaggle/input/nctb-dataset/Turkish_Final_Corpus.jsonl"
    output_base_path = "/kaggle/working/"

    # Use the Ollama zero-shot solver (ensure the specified model is available in Ollama)
    solver = OllamaZeroShotMathSolver(model_name="qwen3:8b")
    processor = DatasetProcessor(solver, failed_folder=os.path.join(output_base_path, "failed_extractions"))

    # For quick testing, process only first few problems
    results, out_folder = processor.process_dataset(
        dataset_path,
        output_base_path,
        start_idx=0,
        end_idx=100  # smaller quick test
    )
    print("Done. Results saved to:", out_folder)

if __name__ == "__main__":
    main()
