In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!curl -fsSL https://ollama.com/install.sh | sh

In [None]:
import subprocess
process = subprocess.Popen("ollama serve", shell=True) #runs on a different thread
#Download model
!pip install ollama

In [None]:
!ollama pull gpt-oss:20b

In [None]:
import json
import os
import re
from datetime import datetime
from tqdm import tqdm

# Replace Hugging Face with Ollama
try:
    import ollama
except Exception as e:
    raise ImportError("The 'ollama' package is required. Install it and make sure the Ollama daemon is running.") from e

# -------------------------
# Answer type normalization
# -------------------------
_CANONICAL_TYPES = {"symbolic", "numerical", "proof"}

_ANSWER_TYPE_MAP_SIMPLE = {
    # Proof variants
    "proof": "proof", "prove": "proof",
    # Numerical variants
    "numerical": "numerical", "numeric": "numerical", "number": "numerical", "calculation": "numerical",
    # Symbolic variants
    "symbolic": "symbolic", "symbol": "symbolic", "equation": "symbolic", "algebraic": "symbolic",
}


def normalize_answer_type(raw_label: str, question_text: str = "", exact_answer: str = "") -> str:
    def _clean_label(lbl: str) -> str:
        if not lbl:
            return ""
        s = lbl.strip().lower()
        s = re.sub(r'[^0-9a-z\s]', ' ', s)
        s = re.sub(r'\s+', ' ', s).strip()
        return s

    s = _clean_label(raw_label)

    if s in _ANSWER_TYPE_MAP_SIMPLE:
        return _ANSWER_TYPE_MAP_SIMPLE[s]

    for k, v in _ANSWER_TYPE_MAP_SIMPLE.items():
        if k in s:
            return v

    q = (question_text or "").lower()
    a = (exact_answer or "").lower()

    if re.search(r'prove|show that|prove that', q) or 'proof' in s:
        return "proof"

    if re.search(r'\b(log|ln|logarithm)\b', q) or re.search(r'\blog\b', a):
        if re.search(r'\d', a):
            return "numerical"
        return "symbolic"

    if re.search(r'set|subset|\bunion\b|\bintersection\b', q):
        return "symbolic"

    if re.search(r'meter|m\b|cm|kg|liter|l\b|unit|units|km|mile', q + " " + a):
        return "numerical"

    if re.search(r'equation|solve for|solve|= x|x\s*=', q) or re.search(r'= x|= \d', a):
        return "symbolic"

    if re.search(r'trig|sin|cos|tan|geometry|triangle|circle', q):
        if re.search(r'\d', a):
            return "numerical"
        return "symbolic"

    if re.search(r'\d', q) or re.search(r'find the value|compute|calculate|evaluate', q):
        return "numerical"

    if re.search(r'[0-9]|\\frac|\\sqrt', a):
        if re.search(r'\\frac|\\sqrt|\{|\\', a):
            return "symbolic"
        return "numerical"

    return "symbolic"


# -------------------------
# Simplified Answer Extractor (merged cleaning improvements)
# -------------------------
class SimplifiedAnswerExtractor:
    @staticmethod
    def _clean_answer(answer: str) -> str:
        if not answer:
            return ""
        # Start with a trimmed answer and normalize whitespace
        a = answer.strip()
        # collapse whitespace
        a = re.sub(r'\s+', ' ', a)

        # remove outer $$ if present (multiline)
        a = re.sub(r'^\$\$(.*)\$\$$', r'\1', a, flags=re.DOTALL)
        # remove surrounding single $ if the whole string is wrapped
        a = re.sub(r'^\$(.*)\$$', r'\1', a, flags=re.DOTALL)

        # strip standalone leading/trailing $ characters and spaces
        a = a.strip('$ ')

        # Remove common prefixes (kept after stripping $ to catch cases like "$Final Answer: ...$")
        prefixes_to_remove = [
            r'Final Answer:\s*',
            r'Answer:\s*',
            r'The answer is\s*',
            r'Therefore,?\s*',
            r'Thus,?\s*',
            r'Hence,?\s*',
            r'So,?\s*',
            r'∴\s*',
        ]
        for prefix in prefixes_to_remove:
            a = re.sub(f'^{prefix}', '', a, flags=re.IGNORECASE)

        # remove various boxed wrappers with optional backslashes and optional surrounding $
        # e.g. $$\boxed{...}$$, $\boxed{...}$, \boxed{...}
        a = re.sub(r'\$?\s*(?:\\){0,3}boxed\{([^}]*)\}\s*\$?', r'\1', a, flags=re.DOTALL | re.IGNORECASE)
        # also ensure plain \boxed{...} is unwrapped (redundant but safe)
        a = re.sub(r'\\boxed\{([^}]*)\}', r'\1', a)

        # convert common LaTeX to readable forms
        a = re.sub(r'\\frac\{([^}]*)\}\{([^}]*)\}', r'(\1)/(\2)', a)
        a = re.sub(r'\\sqrt\{([^}]*)\}', r'√(\1)', a)

        # remove bold/italic wrappers
        a = re.sub(r'\*\*([^*]+)\*\*', r'\1', a)
        a = re.sub(r'\*([^*]+)\*', r'\1', a)

        # collapse multiple spaces again (in case replacements introduced them)
        a = re.sub(r'\s+', ' ', a).strip()

        # trim trailing punctuation/words
        a = a.rstrip(' \t\n.,;:')

        # remove trailing words like "proved" or "the answer"
        a = re.sub(r'\b(proved|completed|finished|the answer)\b[.\s]*$', '', a, flags=re.IGNORECASE).strip()

        return a

    @staticmethod
    def _is_valid_answer(answer: str) -> bool:
        if not answer:
            return False
        if re.match(r'^[\W_]+$', answer):
            return False
        if not re.search(r'[0-9A-Za-z\\]', answer):
            return False
        if len(answer) > 1000:
            return False
        blacklist = [r'therefore$', r'thus$', r'hence$', r'so$', r'we get$', r'we have$']
        for b in blacklist:
            if re.search(b, answer.strip(), flags=re.IGNORECASE):
                return False
        return True

    @staticmethod
    def extract_final_answer_simple(text: str) -> str:
        """
        Primary method: Extract final answer using the last lines approach,
        with fallback to pattern-based extraction.
        """
        if not text:
            return ""
        
        # Clean the text and split into lines
        lines = [line.strip() for line in text.strip().split('\n') if line.strip()]
        
        # Strategy 1: Try last two lines combined
        if len(lines) >= 2:
            last_two = ' '.join(lines[-2:])
            cleaned = SimplifiedAnswerExtractor._clean_answer(last_two)
            if SimplifiedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned
        
        # Strategy 2: Try last line only
        if lines:
            cleaned = SimplifiedAnswerExtractor._clean_answer(lines[-1])
            if SimplifiedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned
        
        # Strategy 3: Try last 3 lines if we have them (sometimes answers span multiple lines)
        if len(lines) >= 3:
            last_three = ' '.join(lines[-3:])
            cleaned = SimplifiedAnswerExtractor._clean_answer(last_three)
            if SimplifiedAnswerExtractor._is_valid_answer(cleaned) and len(cleaned) < 500:
                return cleaned
        
        # Strategy 4: Fallback to pattern-based extraction
        return SimplifiedAnswerExtractor._extract_with_patterns(text)

    @staticmethod
    def _extract_with_patterns(text: str) -> str:
        """
        Pattern-based extraction as fallback method.
        """
        # Check for <final> tags
        raw_matches = re.findall(r'<final>(.*?)</final>', text, re.DOTALL | re.IGNORECASE)
        for m in raw_matches:
            c = SimplifiedAnswerExtractor._clean_answer(m)
            if SimplifiedAnswerExtractor._is_valid_answer(c):
                return c

        # Try common answer patterns
        patterns = [
            r'\*\*Final Answer:\*\*\s*(.+?)(?:\n|$)',
            r'Final Answer:\s*(.+?)(?:\n|$)',
            r'Therefore[,:\s]*(.+?)(?:\.|$|\n)',
            r'Hence[,:\s]*(.+?)(?:\.|$|\n)',
            r'Thus[,:\s]*(.+?)(?:\.|$|\n)',
            r'Answer[:\s]*(.+?)(?:\n|$)',
            r'∴\s*(.+?)(?:\.|$|\n)',
        ]
        
        for pat in patterns:
            matches = re.findall(pat, text, re.MULTILINE | re.DOTALL | re.IGNORECASE)
            if matches:
                answer = matches[-1].strip()
                cleaned = SimplifiedAnswerExtractor._clean_answer(answer)
                if SimplifiedAnswerExtractor._is_valid_answer(cleaned):
                    return cleaned

        # Try boxed math expressions
        boxed_patterns = [
            r'\$\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$\$',
            r'\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$',
            r'(?:\\){0,3}boxed\{(.+?)\}',
        ]
        
        for pat in boxed_patterns:
            m = re.search(pat, text, re.DOTALL | re.IGNORECASE)
            if m:
                cand = SimplifiedAnswerExtractor._clean_answer(m.group(1))
                if SimplifiedAnswerExtractor._is_valid_answer(cand):
                    return cand
        
        return ""

    @staticmethod
    def extract_all_final_answers(generated_solution: str) -> list:
        """
        Extract multiple final answers using simplified approach.
        Returns a list (possibly empty) of cleaned answers found inside all <final>...</final> tags.
        Falls back to the single simplified extraction if no tags are found.
        """
        if not generated_solution:
            return []

        # Find all <final>...</final> (non-greedy)
        raw_matches = re.findall(r'<final>(.*?)</final>', generated_solution, re.DOTALL | re.IGNORECASE)
        cleaned = []
        for m in raw_matches:
            c = SimplifiedAnswerExtractor._clean_answer(m)
            if SimplifiedAnswerExtractor._is_valid_answer(c):
                cleaned.append(c)

        if cleaned:
            return cleaned

        # Fallback: try to extract a single final using the simpler logic
        simple_answer = SimplifiedAnswerExtractor.extract_final_answer_simple(generated_solution)
        if simple_answer:
            return [simple_answer]

        return []


# -------------------------
# Adapter / compatibility layer
# So existing code that expects methods like extract_final_answer and extract_all_final_answers
# can use the simplified extractor transparently.
# -------------------------
class AnswerExtractor(SimplifiedAnswerExtractor):
    @staticmethod
    def extract_final_answer(text: str) -> str:
        return SimplifiedAnswerExtractor.extract_final_answer_simple(text)

    @staticmethod
    def extract_all_final_answers(text: str) -> list:
        return SimplifiedAnswerExtractor.extract_all_final_answers(text)


# -------------------------
# Ollama-based English Math Solver (no temperature/max_tokens passed)
# -------------------------
class OllamaMathSolver:
    """
    Chain-of-Thought English math solver using Ollama.
    Same behavior as before, but prompts and format instructions now require English explanatory text.
    """

    def __init__(self, model_name="gpt-oss:20b", max_tokens=2048):
        self.model_name = model_name
        self.client = self._init_client()
        self.max_tokens = max_tokens

    def _init_client(self):
        print(f"Initializing Ollama client for model: {self.model_name}")
        return ollama.Client()

    def cleanup(self):
        if hasattr(self, 'client'):
            del self.client

    def _get_format_instructions(self, answer_type):
        t = (answer_type or "symbolic").strip().lower()
        if t not in _CANONICAL_TYPES:
            t = "symbolic"

        base = """
CRITICAL ANSWER FORMATTING REQUIREMENTS:
You MUST end your solution with the final answer in the exact format below.
First provide a human-readable final line starting with 'Final Answer:'.
Immediately after that line, include a machine-readable final tag: <final>...</final>.
The content inside <final> should be concise and contain only the final answer (no extra reasoning).
"""

        if t == "proof":
            return base + """
FINAL ANSWER FORMAT FOR PROOFS:
After the proof, write exactly:

Final Answer:
[Concise English conclusion]

Then the machine-readable tag on its own line:

<final>[Concise English conclusion]</final>
"""
        elif t == "numerical":
            return base + """
FINAL ANSWER FORMAT FOR NUMERICAL RESULTS:
After the calculation, write exactly:

Final Answer:
[Numeric result in exact form or decimal]

Then the machine-readable tag on its own line:

<final>[Numeric result]</final>
"""
        else:  # symbolic
            return base + """
FINAL ANSWER FORMAT FOR SYMBOLIC RESULTS:
After the manipulations, write exactly:

Final Answer:
[Final symbolic expression; prefer LaTeX for clarity]

Then the machine-readable tag on its own line:

<final>[LaTeX expression or boxed LaTeX]</final>
"""

    def _create_prompt(self, question, answer_type="General"):
        format_instructions = self._get_format_instructions(answer_type)

        required_order_block = """
REQUIRED OUTPUT ORDER - STRICT:
1) Problem statement (brief, in English)
2) Problem understanding (brief, in English)
3) Mathematical analysis (relevant theorems/formulas, in English)
4) Step-by-step solution (NUMBERED STEPS: 1., 2., ... - each step explained clearly in English)
   - The 'Step-by-step solution' section must be present and include at least 3 numbered steps.
5) Verification (solution check, in English)
6) Final answer (human-readable, in English) starting with 'Final Answer:'
7) <final>...</final> (machine-readable tag on its own line)

IMPORTANT: Under no circumstances output 'Final Answer' or the '<final>' tag before steps 4 and 5 are completed. If you output <final> earlier, it will be considered INVALID output.
"""

        solution_approach_block = """
SOLUTION APPROACH (WRITE ALL REASONING IN ENGLISH ONLY):

1. PROBLEM UNDERSTANDING:
   - In English: Carefully read and restate the problem in English.
2. MATHEMATICAL ANALYSIS:
   - In English: Break down into subproblems and list relevant theorems/formulae.
3. STEP-BY-STEP SOLUTION:
   - In English: Provide numbered steps (1., 2., 3., ...). Show all algebraic/arithmetic work using LaTeX where helpful.
4. VERIFICATION:
   - In English: Briefly check correctness.
"""

        math_notation_block = r"""
MATHEMATICAL NOTATION GUIDELINES:
- Use $$ for displayed equations and $ for inline expressions.
- Use LaTeX commands like \\frac{num}{den}, \\sqrt{...}, \\pm, \\le, \\ge where appropriate.
"""

        language_enforcement_block = """
CRITICAL LANGUAGE REQUIREMENTS - STRICTLY MANDATORY:
You MUST write your entire response in ENGLISH only, except for mathematical notation.
- Do NOT write explanatory text in any other language.
- Use English for all descriptive text. LaTeX mathematical notation is allowed.
"""

        prompt = (
            f"You are an expert mathematician. All explanatory text MUST be in ENGLISH. Mathematical notation (LaTeX) is allowed.\n\n"
            f"MATHEMATICAL PROBLEM:\n{question}\n\n"
            f"{required_order_block}\n"
            f"{language_enforcement_block}\n\n"
            f"{solution_approach_block}\n\n"
            f"{math_notation_block}\n\n"
            f"{format_instructions}\n\n"
            "FINAL REMINDER: Follow the REQUIRED OUTPUT ORDER exactly. First produce the numbered 'Step-by-step solution' section, then Verification, and only after that print 'Final Answer:' followed by the <final> tag on its own line.\n\n"
            "Begin your solution now (Start):\n"
        )
        return prompt

    def _call_chat(self, messages, temperature=0.0, max_tokens=None):
        kwargs = {"model": self.model_name, "messages": messages}
        if max_tokens is None:
            kwargs["max_tokens"] = self.max_tokens
        else:
            kwargs["max_tokens"] = max_tokens
        kwargs["temperature"] = float(temperature)

        try:
            resp = self.client.chat(**kwargs)
            return resp
        except TypeError:
            try:
                resp = self.client.chat(self.model_name, messages=messages, temperature=temperature, max_tokens=kwargs["max_tokens"])
                return resp
            except Exception:
                resp = self.client.chat(model=self.model_name, messages=messages)
                return resp

    def _extract_content_from_resp(self, resp):
        content = ""
        if isinstance(resp, dict):
            if 'message' in resp:
                m = resp['message']
                if isinstance(m, dict) and 'content' in m:
                    content = m['content']
                else:
                    content = str(m)
            elif 'choices' in resp and resp['choices']:
                choice = resp['choices'][0]
                if isinstance(choice, dict) and 'message' in choice and isinstance(choice['message'], dict):
                    content = choice['message'].get('content', '')
                else:
                    content = str(choice)
            else:
                content = str(resp)
        else:
            try:
                content = resp.message.content
            except Exception:
                content = str(resp)
        if isinstance(content, bytes):
            content = content.decode('utf-8', errors='ignore')
        return (content or "").strip()

    def _generate_once(self, prompt_text: str, deterministic: bool = False) -> str:
        final_prompt = prompt_text
        if deterministic:
            deterministic_header = (
                "DETERMINISTIC MODE: Respond deterministically in ENGLISH ONLY. Output ONLY the concise final answer inside a single <final>...</final> tag. "
                "Do NOT include any other text or explanation. The content inside <final> must be the concise final answer in English.\n\n"
            )
            final_prompt = deterministic_header + prompt_text

        messages = [{"role": "user", "content": final_prompt}]
        resp = self._call_chat(messages, temperature=0.0, max_tokens=self.max_tokens)
        assistant_content = self._extract_content_from_resp(resp)
        # print(f"\nGenerated assistant content:\n{assistant_content}\n")
        return assistant_content

    def solve_problem(self, question, answer_type="symbolic", two_pass=True):
        prompt = self._create_prompt(question, answer_type)
        raw_output = self._generate_once(prompt, deterministic=False)
        generated_answer = raw_output

        final_tag_output = ""
        extracted_final_answer = None

        # Use the global SimplifiedAnswerExtractor (removed nested duplicate)
        extracted_final_answer = SimplifiedAnswerExtractor.extract_final_answer_simple(generated_answer)

        if two_pass and not extracted_final_answer:
            prompt2 = (
                "Below is the previously generated full solution (including reasoning).\n\n"
                "Now output ONLY the concise final answer and nothing else, enclosed in a single machine-readable tag <final>...</final>. "
                "Do not include any other text or explanation. The content inside <final> should be the concise final answer in English. "
                "YOU MUST WRITE THE CONTENT INSIDE <final> IN ENGLISH ONLY.\n\n"
                "PREVIOUS SOLUTION:\n\n"
                f"{generated_answer}\n\n"
                "OUTPUT EXAMPLE:\n"
                "<final>Answer: $\\sqrt{10}$ is irrational.</final>\n"
            )
            try:
                final_tag_output = self._generate_once(prompt2, deterministic=True)
                extracted_final_answer = SimplifiedAnswerExtractor.extract_final_answer_simple(final_tag_output)
            except Exception as e:
                print(f"Deterministic second pass failed: {e}")
                final_tag_output = final_tag_output or ""

        return {
            "thinking_content": "",
            "generated_answer": generated_answer,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer
        }

# -------------------------
# Dataset Processor
# (uses SimplifiedAnswerExtractor via AnswerExtractor adapter)
# -------------------------
class DatasetProcessor:
    def __init__(self, solver: OllamaMathSolver, failed_folder=None):
        self.solver = solver
        self.extractor = AnswerExtractor  # static adapter
        self.failed_folder = failed_folder or "failed_extractions"
        os.makedirs(self.failed_folder, exist_ok=True)

    def process_dataset(self, dataset_path, output_base_path, start_idx=0, end_idx=None,
                        folder_name=None, create_timestamped_folder=True, two_pass=True):
        dataset = self._load_dataset(dataset_path)
        if end_idx is None:
            end_idx = len(dataset)

        output_folder = self._create_output_folder(output_base_path, folder_name, start_idx, end_idx, create_timestamped_folder)
        results = []

        print(f"Processing problems {start_idx} to {end_idx-1} ({end_idx-start_idx} total)")
        print(f"Output will be saved in: {output_folder}")

        for idx in tqdm(range(start_idx, min(end_idx, len(dataset)))):
            problem = dataset[idx]
            try:
                result_entry = self._process_single_problem(idx, problem, two_pass=two_pass)
                results.append(result_entry)
                self._print_progress(idx, result_entry)
                if (idx - start_idx + 1) % 10 == 0:
                    self._save_intermediate_results(results, output_folder, idx - start_idx + 1)
            except Exception as e:
                print(f"Error processing problem {idx+1}: {str(e)}")
                error_entry = self._create_error_entry(idx, problem, str(e))
                results.append(error_entry)

        final_output_path = self._save_final_results(results, output_folder, start_idx, end_idx)
        self._create_summary_file(results, output_folder, dataset_path, start_idx, end_idx)
        return results, output_folder

    def _create_output_folder(self, base_path, folder_name, start_idx, end_idx, add_timestamp):
        if folder_name is None:
            folder_name = f"results_{start_idx}_to_{end_idx-1}"
        if add_timestamp:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            folder_name = f"{folder_name}_{timestamp}"
        output_folder = os.path.join(base_path, folder_name)
        os.makedirs(output_folder, exist_ok=True)
        return output_folder

    def _load_dataset(self, dataset_path):
        dataset = []
        with open(dataset_path, 'r', encoding='utf-8') as f:
            for line in f:
                if line.strip():
                    dataset.append(json.loads(line))
        return dataset

    def _process_single_problem(self, idx, problem, two_pass=True):
        language = problem.get("Language", "")
        chapter_num = problem.get("Chapter Number", "")
        example_num = problem.get("Example Number", "")
        question = problem.get("Question", "")
        exact_answer = problem.get("Exact Answer", "")
        raw_answer_type = problem.get("Answer Type", "") or ""

        # Normalize/infer canonical answer type: 'symbolic', 'numerical', 'proof'
        canonical_type = normalize_answer_type(raw_answer_type, question_text=question, exact_answer=exact_answer)

        # If exact_answer strongly indicates numeric, prefer numerical
        if exact_answer and re.search(r'\d', str(exact_answer)):
            # if exact contains LaTeX expressions like \frac or \sqrt, keep symbolic
            if re.search(r'\\frac|\\sqrt|\\boxed', str(exact_answer)):
                pass
            else:
                canonical_type = "numerical"

        print(f"\nProcessing Problem {idx+1}: Chapter {chapter_num}, Example {example_num}")
        print(f"Raw Answer Type: '{raw_answer_type}'  --> canonical: '{canonical_type}'")

        # Generate solution (use canonical_type)
        solution_result = self.solver.solve_problem(question, answer_type=canonical_type, two_pass=two_pass)
        generated_answer = solution_result.get('generated_answer', '')
        thinking_content = solution_result.get('thinking_content', '')
        final_tag_output = solution_result.get('final_tag_output', '')

        # --- Use simplified extractor (supports single final & multi finals via adapter) ---
        all_finals = AnswerExtractor.extract_all_final_answers(generated_answer)
        extracted_final_answer = ""
        extracted_final_answers = []

        # If none found in generated_answer, try final_tag_output (second pass raw)
        if not all_finals and final_tag_output:
            all_finals = AnswerExtractor.extract_all_final_answers(final_tag_output)

        # If still none, fall back to single-answer extractor (old behavior)
        if not all_finals:
            single = AnswerExtractor.extract_final_answer(generated_answer)
            if single:
                extracted_final_answer = single
                extracted_final_answers = [single]
            else:
                # try whole combined text (thinking + generated + final_tag)
                combined = "\n".join([thinking_content or "", generated_answer or "", final_tag_output or ""])
                single = AnswerExtractor.extract_final_answer(combined)
                if single:
                    extracted_final_answer = single
                    extracted_final_answers = [single]
                else:
                    extracted_final_answer = ""
                    extracted_final_answers = []
        else:
            # we have one or more finals
            extracted_final_answers = all_finals
            if len(all_finals) == 1:
                extracted_final_answer = all_finals[0]
            else:
                # store a machine-readable concatenation: JSON array string
                try:
                    extracted_final_answer = json.dumps(all_finals, ensure_ascii=False)
                except Exception:
                    extracted_final_answer = " ||| ".join(all_finals)

        # If still empty, save a failed extraction example for inspection
        if not extracted_final_answer:
            fname = f"failed_{idx}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
            fpath = os.path.join(self.failed_folder, fname)
            with open(fpath, 'w', encoding='utf-8') as f:
                json.dump({
                    "index": idx,
                    "question": question,
                    "generated_answer": generated_answer,
                    "thinking_content": thinking_content,
                    "final_tag_output": final_tag_output,
                    "exact_answer": exact_answer,
                    "canonical_type": canonical_type,
                    "extracted_final_answer": extracted_final_answer,
                    "extracted_final_answers": extracted_final_answers
                }, f, ensure_ascii=False, indent=2)
            print(f"Saved failed extraction example to {fpath}")

        result_entry = {
            "problem_index": idx,
            "language": language,
            "chapter_number": chapter_num,
            "example_number": example_num,
            "question": question,
            "generated_answer": generated_answer,
            "thinking_content": thinking_content,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer,       # string (or JSON array string)
            "extracted_final_answers": extracted_final_answers,     # list (empty / single / many)
            "exact_answer": exact_answer,
            "raw_answer_type": raw_answer_type,
            "canonical_answer_type": canonical_type,
            "evaluation_method": problem.get("Evaluation Method", "")
        }
        return result_entry

    def _create_error_entry(self, idx, problem, error_msg):
        return {
            "problem_index": idx,
            "language": problem.get("Language", ""),
            "chapter_number": problem.get("Chapter Number", ""),
            "example_number": problem.get("Example Number", ""),
            "question": problem.get("Question", ""),
            "generated_answer": f"ERROR: {error_msg}",
            "thinking_content": "",
            "final_tag_output": "",
            "extracted_final_answer": "",
            "extracted_final_answers": [],
            "exact_answer": problem.get("Exact Answer", ""),
            "raw_answer_type": problem.get("Answer Type", ""),
            "canonical_answer_type": "",
            "evaluation_method": problem.get("Evaluation Method", "")
        }

    def _print_progress(self, idx, result_entry):
        print(f"Generated answer length: {len(result_entry['generated_answer']) if result_entry['generated_answer'] else 0}")
        print(f"Extracted final answer: '{result_entry['extracted_final_answer']}'")
        print(f"Extracted final answers (list): {result_entry.get('extracted_final_answers', [])}")
        print(f"Expected answer: '{result_entry['exact_answer']}'")

    def _save_intermediate_results(self, results, output_folder, count):
        temp_filename = f'intermediate_results_{count}.json'
        temp_output_path = os.path.join(output_folder, temp_filename)
        with open(temp_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"Saved intermediate results to {temp_output_path}")

    def _save_final_results(self, results, output_folder, start_idx, end_idx):
        final_filename = f'final_results_{start_idx}_to_{end_idx-1}.json'
        final_output_path = os.path.join(output_folder, final_filename)
        with open(final_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"\nProcessing complete. Results saved to {final_output_path}")
        print(f"Total problems processed: {len(results)}")
        return final_output_path

    def _create_summary_file(self, results, output_folder, dataset_path, start_idx, end_idx):
        successful_extractions = len([r for r in results if r.get('extracted_final_answer', '').strip()])
        summary_data = {
            "processing_info": {
                "dataset_path": dataset_path,
                "start_index": start_idx,
                "end_index": end_idx - 1,
                "total_processed": len(results),
                "processing_timestamp": datetime.now().isoformat(),
                "output_folder": output_folder
            },
            "statistics": {
                "successful_problems": len([r for r in results if not r['generated_answer'].startswith('ERROR:')]),
                "failed_problems": len([r for r in results if r['generated_answer'].startswith('ERROR:')]),
                "successful_extractions": successful_extractions,
                "extraction_success_rate": f"{(successful_extractions/len(results)*100):.1f}%" if results else "0%",
                "average_answer_length": sum(len(r['generated_answer']) for r in results) / len(results) if results else 0,
                "chapters_processed": list(set(r['chapter_number'] for r in results if r['chapter_number'])),
                "raw_answer_types": list(set(r['raw_answer_type'] for r in results if r.get('raw_answer_type'))),
                "canonical_answer_types": list(set(r['canonical_answer_type'] for r in results if r.get('canonical_answer_type')))
            }
        }
        summary_path = os.path.join(output_folder, 'processing_summary.json')
        with open(summary_path, 'w', encoding='utf-8') as f:
            json.dump(summary_data, f, ensure_ascii=False, indent=2)
        print(f"Processing summary saved to {summary_path}")
        print(f"Answer extraction success rate: {summary_data['statistics']['extraction_success_rate']}")


# -------------------------
# Main (example usage)
# -------------------------
def main():
    # NOTE: update dataset_path and output_base_path to match your environment
    dataset_path = "/kaggle/input/Bangla_Final_Corpus.jsonl"  # source format unchanged; questions are English
    output_base_path = "/kaggle/working/"

    # Use OllamaMathSolver (model reference must match a model available locally via Ollama)
    solver = OllamaMathSolver(model_name="gpt-oss:20b")
    processor = DatasetProcessor(solver, failed_folder=os.path.join(output_base_path, "failed_extractions"))

    # For quick testing, process only first few problems
    results, out_folder = processor.process_dataset(
        dataset_path,
        output_base_path,
        start_idx=720,
        end_idx=1445,
        two_pass=True
    )
    print("Done. Results saved to:", out_folder)


if __name__ == "__main__":
    main()