In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!curl -fsSL https://ollama.com/install.sh | sh

In [None]:
import subprocess
process = subprocess.Popen("ollama serve", shell=True) #runs on a different thread
#Download model
!pip install ollama

In [None]:
!ollama pull mathstral:7b

In [None]:
import json
import os
import re
from datetime import datetime
from tqdm import tqdm

# Ollama client (make sure Ollama daemon is running and 'ollama' Python package is installed)
try:
    import ollama
except Exception as e:
    raise ImportError("The 'ollama' package is required. Install it and make sure the Ollama daemon is running.") from e


# -------------------------
# Answer type normalization (Bangla-aware)
# -------------------------
_CANONICAL_TYPES = {"symbolic", "numerical", "proof"}

_ANSWER_TYPE_MAP_SIMPLE = {
    # Proof variants
    "proof": "proof", "prove": "proof", "প্রমাণ": "proof", "প্রমাণিত": "proof",
    # Numerical variants
    "numerical": "numerical", "numeric": "numerical", "সংখ্যা": "numerical", "গণনা": "numerical", "number": "numerical",
    # Symbolic variants
    "symbolic": "symbolic", "symbol": "symbolic", "প্রতীক": "symbolic", "সূত্র": "symbolic", "সমীকরণ": "symbolic", "equation": "symbolic",
}


def normalize_answer_type(raw_label: str, question_text: str = "", exact_answer: str = "") -> str:
    """
    Normalize a dataset label to one of: 'symbolic', 'numerical', 'proof'.
    Heuristics:
      - Direct mapping from known strings (Bangla/English)
      - If dataset field missing or noisy, infer from question or exact_answer using a small scoring system.
      - Final fallback: 'symbolic'
    """
    # Helper to clean label
    def _clean_label(lbl: str) -> str:
        if not lbl:
            return ""
        s = lbl.strip().lower()
        # Keep printable characters (preserve math symbols, Bangla, Greek, punctuation)
        s = ''.join(ch for ch in s if ch.isprintable())
        s = re.sub(r'\s+', ' ', s).strip()
        return s

    s = _clean_label(raw_label)

    # direct mapping
    if s in _ANSWER_TYPE_MAP_SIMPLE:
        return _ANSWER_TYPE_MAP_SIMPLE[s]

    # partial matches still apply
    for k, v in _ANSWER_TYPE_MAP_SIMPLE.items():
        if k in s:
            return v

    # score-based heuristics
    q = (question_text or "").lower()
    a = (exact_answer or "").lower()

    scores = {"proof": 0, "numerical": 0, "symbolic": 0}

    # proof indicators (strong)
    if re.search(r'প্রমাণ|prove|প্রমাণ কর|show that|deduce', q):
        scores["proof"] += 5
    if 'proof' in s:
        scores["proof"] += 3

    # numerical indicators
    if re.search(r'\b(compute|calculate|evaluate|find the value|গণনা|অনুপাত|মান|মাপ|লিটার|মিটার|কিমি|km|m\b|cm|kg|কিলো|লিটার|বর্গ|units?)\b', q + " " + a):
        scores["numerical"] += 3
    # presence of explicit units in question or answer
    if re.search(r'\b(cm|mm|m|km|kg|g|l|ltr|liter|লিটার|মিটার|গুণফল)\b', q + " " + a):
        scores["numerical"] += 2

    # numeric-looking exact answers should push towards numerical (but allow LaTeX algebraic forms to push symbolic)
    if re.search(r'[0-9০-৯]', a):
        scores["numerical"] += 2
        if re.search(r'\\frac|\\sqrt|[A-Za-z]+\(|\^|\_|\{|\}', a):
            # algebraic formatting suggests symbolic form
            scores["symbolic"] += 2

    # symbolic indicators
    if re.search(r'সেট|উপসেট|\bunion\b|\bintersection\b|∪|∩|set|subset', q):
        scores["symbolic"] += 3
    if re.search(r'সমীকরণ|solve for|solve|ইক্যুয়েশন|equation|solve the', q):
        scores["symbolic"] += 3

    # trigonometry, geometry, algebra -> more symbolic unless numeric answer strongly signaled
    if re.search(r'ত্রিকোণ|sin\b|cos\b|tan\b|ত্রিকোণমিতি|জ্যামিতি|triangle|circle|angle|arc', q):
        scores["symbolic"] += 2

    # logarithm: ambiguous, give symbolic slight preference unless exact numeric present
    if re.search(r'\b(log|ln|logarithm|লগ)\b', q):
        scores["symbolic"] += 1
        if re.search(r'\d', a):
            scores["numerical"] += 2

    # short cues that could be ambiguous: digits in question are not decisive anymore (only a weak signal)
    if re.search(r'\d', q):
        scores["numerical"] += 1

    # finalize decision by highest score; tie-break: prefer raw label if present else 'symbolic'
    chosen = max(scores.items(), key=lambda x: x[1])
    # If all zeros, fallback to symbolic
    if chosen[1] == 0:
        return "symbolic"

    # Return the canonical
    return chosen[0]


# -------------------------
# Simplified Answer Extractor (Bangla-aware)
# -------------------------
class SimplifiedAnswerExtractor:
    @staticmethod
    def _strip_math_dollars(s: str) -> str:
        """Safely strip matching $...$ or $$...$$ wrappers. Does not remove stray dollars."""
        if not s:
            return s
        s = s.strip()
        if s.startswith('$$') and s.endswith('$$') and len(s) >= 4:
            return s[2:-2].strip()
        if s.startswith('$') and s.endswith('$') and len(s) >= 2:
            return s[1:-1].strip()
        return s

    @staticmethod
    def _clean_answer(answer: str) -> str:
        if not answer:
            return ""
        # start with user's improved cleaning sequence
        a = answer.strip()

        # collapse whitespace
        a = re.sub(r'\s+', ' ', a)

        # remove outer $$/$ if present using safe helper
        a = SimplifiedAnswerExtractor._strip_math_dollars(a)

        # unwrap boxed (handle \boxed and variants)
        a = re.sub(r'\$?\s*(?:\\){0,3}boxed\{([^}]*)\}\s*\$?', r'\1', a, flags=re.DOTALL | re.IGNORECASE)
        a = re.sub(r'\\boxed\{([^}]*)\}', r'\1', a, flags=re.DOTALL | re.IGNORECASE)

        # replace common latex constructs with simpler forms
        a = re.sub(r'\\frac\{([^}]*)\}\{([^}]*)\}', r'(\1)/(\2)', a)
        a = re.sub(r'\\sqrt\{([^}]*)\}', r'√(\1)', a)

        # remove emphasis marks
        a = re.sub(r'\*\*([^*]+)\*\*', r'\1', a)
        a = re.sub(r'\*([^*]+)\*', r'\1', a)

        # Remove common English and Bangla prefixes (only at start)
        prefixes_to_remove = [
            r'Final Answer:\s*',
            r'Answer:\s*',
            r'The answer is\s*',
            r'Therefore,?\s*',
            r'Thus,?\s*',
            r'Hence,?\s*',
            r'So,?\s*',
            r'∴\s*',
            # Bangla equivalents (note: variations of unicode forms)
            r'উত্তর[:\s]*',
            r'চূড়ান্ত উত্তর[:\s]*',
            r'চূড়ান্ত উত্তর[:\s]*',
            r'চূর্তান্ত[:\s]*',  # protective mis-typed variant
            r'চূড়ান্ত[:\s]*',
            r'অতএব[:,\s]*',
            r'সুতরাং[:,\s]*',
        ]
        for prefix in prefixes_to_remove:
            a = re.sub(f'^{prefix}', '', a, flags=re.IGNORECASE)

        # trim trailing punctuation/words
        a = a.rstrip(' \t\n.,;:')

        # remove trailing words like 'proved', 'completed', etc.
        a = re.sub(r'\b(proved|completed|finished|the answer|উত্তর দেওয়া|সমাপ্ত)\b[.\s]*$', '', a, flags=re.IGNORECASE).strip()

        return a

    @staticmethod
    def _is_valid_answer(answer: str) -> bool:
        if not answer:
            return False
        if re.match(r'^[\W_]+$', answer):
            return False
        # allow latin, digits, backslash and Bangla unicode range and other printable symbols
        if not re.search(r'[0-9A-Za-z\\\u0980-\u09FF\u0370-\u03FF\+\-\*=/\^\_\\\{\}\(\)\[\]\|%√ππΔ≤≥<>]', answer):
            return False
        if len(answer) > 1000:
            return False
        blacklist = [
            r'therefore$', r'thus$', r'hence$', r'so$', r'we get$', r'we have$',
            r'অতএব$', r'সুতরাং$', r'চূড়ান্ত$', r'চূড়ান্ত$'
        ]
        for b in blacklist:
            if re.search(b, answer.strip(), flags=re.IGNORECASE):
                return False
        return True

    @staticmethod
    def extract_final_answer_simple(text: str) -> str:
        """
        Primary method: Extract final answer using the last lines approach,
        with fallback to pattern-based extraction.
        """
        if not text:
            return ""

        # Clean the text and split into lines
        lines = [line.strip() for line in text.strip().split('\n') if line.strip()]

        # Strategy 1: Try last two lines combined
        if len(lines) >= 2:
            last_two = ' '.join(lines[-2:])
            cleaned = SimplifiedAnswerExtractor._clean_answer(last_two)
            if SimplifiedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned

        # Strategy 2: Try last line only
        if lines:
            cleaned = SimplifiedAnswerExtractor._clean_answer(lines[-1])
            if SimplifiedAnswerExtractor._is_valid_answer(cleaned):
                return cleaned

        # Strategy 3: Try last 3 lines if we have them (sometimes answers span multiple lines)
        if len(lines) >= 3:
            last_three = ' '.join(lines[-3:])
            cleaned = SimplifiedAnswerExtractor._clean_answer(last_three)
            if SimplifiedAnswerExtractor._is_valid_answer(cleaned) and len(cleaned) < 500:
                return cleaned

        # Strategy 4: Fallback to pattern-based extraction
        return SimplifiedAnswerExtractor._extract_with_patterns(text)

    @staticmethod
    def _extract_with_patterns(text: str) -> str:
        """
        Pattern-based extraction as fallback method.
        """
        # Check for <final> tags (explicit requirement enforced in prompts)
        raw_matches = re.findall(r'<final>(.*?)</final>', text, re.DOTALL | re.IGNORECASE)
        for m in raw_matches:
            c = SimplifiedAnswerExtractor._clean_answer(m)
            if SimplifiedAnswerExtractor._is_valid_answer(c):
                return c

        # Try common answer patterns (English + Bangla)
        patterns = [
            r'\*\*Final Answer:\*\*\s*(.+?)(?:\n|$)',
            r'Final Answer:\s*(.+?)(?:\n|$)',
            r'Final[:\s]*(.+?)(?:\.|$|\n)',
            r'Therefore[,:\s]*(.+?)(?:\.|$|\n)',
            r'Hence[,:\s]*(.+?)(?:\.|$|\n)',
            r'Thus[,:\s]*(.+?)(?:\.|$|\n)',
            r'Answer[:\s]*(.+?)(?:\n|$)',
            r'∴\s*(.+?)(?:\.|$|\n)',
            # Bangla patterns
            r'উত্তর[:\s]*(.+?)(?:\.|$|\n)',
            r'চূড়ান্ত উত্তর[:\s]*(.+?)(?:\.|$|\n)',
            r'চূড়ান্ত উত্তর[:\s]*(.+?)(?:\.|$|\n)',
            r'অতএব[:,\s]*(.+?)(?:\.|$|\n)',
        ]

        for pat in patterns:
            matches = re.findall(pat, text, re.MULTILINE | re.DOTALL | re.IGNORECASE)
            if matches:
                answer = matches[-1].strip()
                cleaned = SimplifiedAnswerExtractor._clean_answer(answer)
                if SimplifiedAnswerExtractor._is_valid_answer(cleaned):
                    return cleaned

        # Try boxed math expressions
        boxed_patterns = [
            r'\$\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$\$',
            r'\$\s*(?:\\){0,3}boxed\{(.+?)\}\s*\$',
            r'(?:\\){0,3}boxed\{(.+?)\}',
        ]

        for pat in boxed_patterns:
            m = re.search(pat, text, re.DOTALL | re.IGNORECASE)
            if m:
                cand = SimplifiedAnswerExtractor._clean_answer(m.group(1))
                if SimplifiedAnswerExtractor._is_valid_answer(cand):
                    return cand

        return ""

    @staticmethod
    def extract_all_final_answers(generated_solution: str) -> list:
        """
        Extract multiple final answers using simplified approach.
        Returns a list (possibly empty) of cleaned answers found inside all <final>...</final> tags.
        Falls back to the single simplified extraction if no tags are found.
        """
        if not generated_solution:
            return []

        # Find all <final>...</final> (non-greedy)
        raw_matches = re.findall(r'<final>(.*?)</final>', generated_solution, re.DOTALL | re.IGNORECASE)
        cleaned = []
        for m in raw_matches:
            c = SimplifiedAnswerExtractor._clean_answer(m)
            if SimplifiedAnswerExtractor._is_valid_answer(c):
                cleaned.append(c)

        if cleaned:
            return cleaned

        # Fallback: try to extract a single final using the simpler logic
        simple_answer = SimplifiedAnswerExtractor.extract_final_answer_simple(generated_solution)
        if simple_answer:
            return [simple_answer]

        return []


# -------------------------
# Ollama-based Bangla CoT Solver (no thinking support)
# -------------------------
class OllamaBanglaCoTMathSolver:
    """
    Chain-of-Thought Bangla math solver using Ollama.
    This cleaned version fixes indentation and removes the follow-up expansion step.
    """

    def __init__(self, model_name="mathstral:7b", max_tokens=2048):
        self.model_name = model_name
        self.client = self._init_client()
        self.max_tokens = max_tokens

    def _init_client(self):
        print(f"Initializing Ollama client for model: {self.model_name}")
        return ollama.Client()

    def cleanup(self):
        if hasattr(self, 'client'):
            del self.client

    def _get_format_instructions(self, answer_type):
        t = (answer_type or "symbolic").strip().lower()
        if t not in _CANONICAL_TYPES:
            t = "symbolic"

        base = """
CRITICAL ANSWER FORMATTING REQUIREMENTS:
You MUST end your solution with the final answer in the exact format below.
First provide a human-readable final line starting with 'চূড়ান্ত উত্তর:'.
Immediately after that line, include a machine-readable final tag: <final>...</final>.
The content inside <final> should be concise and contain only the final answer (no extra reasoning).
"""

        if t == "proof":
            return base + """
FINAL ANSWER FORMAT FOR PROOFS:
After the proof, write exactly:

চূড়ান্ত উত্তর:
[Concise **BANGLA** conclusion]

Then the machine-readable tag on its own line:

<final>[Concise **BANGLA** conclusion]</final>
"""
        elif t == "numerical":
            return base + """
FINAL ANSWER FORMAT FOR NUMERICAL RESULTS:
After the calculation, write exactly:

চূড়ান্ত উত্তর:
[Numeric result in exact form or decimal]

Then the machine-readable tag on its own line:

<final>[Numeric result]</final>
"""
        else:  # symbolic
            return base + """
FINAL ANSWER FORMAT FOR SYMBOLIC RESULTS:
After the manipulations, write exactly:

চূড়ান্ত উত্তর:
[Final symbolic expression; prefer LaTeX for clarity]

Then the machine-readable tag on its own line:

<final>[LaTeX expression or boxed LaTeX]</final>
"""

    def _create_prompt(self, question, answer_type="General"):
        format_instructions = self._get_format_instructions(answer_type)

        required_order_block = """
REQUIRED OUTPUT ORDER - STRICT:
1) Problem statement (brief, in Bangla)
2) Problem understanding (brief, in Bangla)
3) Mathematical analysis (relevant theorems/formulas, in Bangla)
4) Step-by-step solution (NUMBERED STEPS: 1., 2., ... - each step explained clearly in Bangla)
   - The 'Step-by-step solution' section must be present and include at least 3 numbered steps.
5) Verification (solution check, in Bangla)
6) Final answer (human-readable, in Bangla) starting with 'চূড়ান্ত উত্তর:'
7) <final>...</final> (machine-readable tag on its own line)

IMPORTANT: Under no circumstances output 'চূড়ান্ত উত্তর' or the '<final>' tag before steps 4 and 5 are completed. If you output <final> earlier, it will be considered INVALID output.
"""

        solution_approach_block = """
SOLUTION APPROACH (WRITE ALL REASONING IN BANGLA ONLY):

1. PROBLEM UNDERSTANDING (সমস্যা বুঝা):
   - In Bangla: Carefully read and restate the problem in Bangla.
2. MATHEMATICAL ANALYSIS (গাণিতিক বিশ্লেষণ):
   - In Bangla: Break down into subproblems and list relevant theorems/formulae.
3. STEP-BY-STEP SOLUTION (ধাপে ধাপে সমাধান):
   - In Bangla: Provide numbered steps (1., 2., 3., ...). Show all algebraic/arithmetic work using LaTeX where helpful.
4. VERIFICATION (যাচাইকরণ):
   - In Bangla: Briefly check correctness.
"""

        math_notation_block = r"""
MATHEMATICAL NOTATION GUIDELINES:
- Use $$ for displayed equations and $ for inline expressions.
- Use LaTeX commands like \\frac{num}{den}, \\sqrt{...}, \\pm, \\le, \\ge where appropriate.
"""

        language_enforcement_block = """
CRITICAL LANGUAGE REQUIREMENTS - STRICTLY MANDATORY:
You MUST write your entire response in BANGLA (বাংলা) language only, except for mathematical notation.
- Do NOT write explanatory text in English.
- Use Bangla for all descriptive text. LaTeX mathematical notation is allowed.
"""

        prompt = (
            f"You are an expert mathematician. All explanatory text MUST be in BANGLA/BENGALI. Mathematical notation (LaTeX) is allowed.\n\n"
            f"MATHEMATICAL PROBLEM:\n{question}\n\n"
            f"{required_order_block}\n"
            f"{language_enforcement_block}\n\n"
            f"{solution_approach_block}\n\n"
            f"{math_notation_block}\n\n"
            f"{format_instructions}\n\n"
            "FINAL REMINDER: Follow the REQUIRED OUTPUT ORDER exactly. First produce the numbered 'ধাপে ধাপে সমাধান' section, then যাচাইকরণ, and only after that print 'চূড়ান্ত উত্তর:' followed by the <final> tag on its own line.\n\n"
            "Begin your solution now (শুরু করুন):\n"
        )
        return prompt

    def _call_chat(self, messages, temperature=0.0, max_tokens=None):
        kwargs = {"model": self.model_name, "messages": messages}
        if max_tokens is None:
            kwargs["max_tokens"] = self.max_tokens
        else:
            kwargs["max_tokens"] = max_tokens
        kwargs["temperature"] = float(temperature)

        try:
            resp = self.client.chat(**kwargs)
            return resp
        except TypeError:
            try:
                resp = self.client.chat(self.model_name, messages=messages, temperature=temperature, max_tokens=kwargs["max_tokens"])
                return resp
            except Exception:
                resp = self.client.chat(model=self.model_name, messages=messages)
                return resp

    def _extract_content_from_resp(self, resp):
        content = ""
        if isinstance(resp, dict):
            if 'message' in resp:
                m = resp['message']
                if isinstance(m, dict) and 'content' in m:
                    content = m['content']
                else:
                    content = str(m)
            elif 'choices' in resp and resp['choices']:
                choice = resp['choices'][0]
                if isinstance(choice, dict) and 'message' in choice and isinstance(choice['message'], dict):
                    content = choice['message'].get('content', '')
                else:
                    content = str(choice)
            else:
                content = str(resp)
        else:
            try:
                content = resp.message.content
            except Exception:
                content = str(resp)
        if isinstance(content, bytes):
            content = content.decode('utf-8', errors='ignore')
        return (content or "").strip()

    def _generate_once(self, prompt_text: str, deterministic: bool = False) -> str:
        final_prompt = prompt_text
        if deterministic:
            deterministic_header = (
                "DETERMINISTIC MODE: Respond deterministically in BANGLA LANGUAGE ONLY. Output ONLY the concise final answer inside a single <final>...</final> tag. "
                "Do NOT include any other text or explanation. The content inside <final> must be the concise final answer in Bangla.\n\n"
            )
            final_prompt = deterministic_header + prompt_text

        messages = [{"role": "user", "content": final_prompt}]
        resp = self._call_chat(messages, temperature=0.0, max_tokens=self.max_tokens)
        assistant_content = self._extract_content_from_resp(resp)
        # print(f"\nGenerated assistant content:\n{assistant_content}\n")
        return assistant_content

    def solve_problem(self, question, answer_type="symbolic", two_pass=True):
        prompt = self._create_prompt(question, answer_type)
        raw_output = self._generate_once(prompt, deterministic=False)
        generated_answer = raw_output

        final_tag_output = ""
        extracted_final_answer = None

        # Use the global SimplifiedAnswerExtractor (removed nested duplicate)
        extracted_final_answer = SimplifiedAnswerExtractor.extract_final_answer_simple(generated_answer)

        if two_pass and not extracted_final_answer:
            prompt2 = (
                "Below is the previously generated full solution (including reasoning).\n\n"
                "Now output ONLY the concise final answer and nothing else, enclosed in a single machine-readable tag <final>...</final>. "
                "Do not include any other text or explanation. The content inside <final> should be the concise final answer in Bangla. "
                "YOU MUST WRITE THE CONTENT INSIDE <final> IN BANGLA LANGUAGE ONLY.\n\n"
                "PREVIOUS SOLUTION:\n\n"
                f"{generated_answer}\n\n"
                "OUTPUT EXAMPLE:\n"
                "<final>উত্তর: $\\sqrt{10}$ একটি অমূলদ সংখ্যা।</final>\n"
            )
            try:
                final_tag_output = self._generate_once(prompt2, deterministic=True)
                extracted_final_answer = SimplifiedAnswerExtractor.extract_final_answer_simple(final_tag_output)
            except Exception as e:
                print(f"Deterministic second pass failed: {e}")
                final_tag_output = final_tag_output or ""

        return {
            "thinking_content": "",
            "generated_answer": generated_answer,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer
        }


# -------------------------
# Dataset Processor (uses the CoT solver and SimplifiedAnswerExtractor)
# -------------------------
class DatasetProcessor:
    def __init__(self, solver: OllamaBanglaCoTMathSolver, failed_folder=None):
        self.solver = solver
        self.extractor = SimplifiedAnswerExtractor()
        self.failed_folder = failed_folder or "failed_extractions"
        os.makedirs(self.failed_folder, exist_ok=True)

    def process_dataset(self, dataset_path, output_base_path, start_idx=0, end_idx=None,
                        folder_name=None, create_timestamped_folder=True, two_pass=True):
        dataset = self._load_dataset(dataset_path)
        if end_idx is None:
            end_idx = len(dataset)

        output_folder = self._create_output_folder(output_base_path, folder_name, start_idx, end_idx, create_timestamped_folder)
        results = []

        print(f"Processing problems {start_idx} to {end_idx-1} ({end_idx-start_idx} total)")
        print(f"Output will be saved in: {output_folder}")

        for idx in tqdm(range(start_idx, min(end_idx, len(dataset)))):
            problem = dataset[idx]
            try:
                result_entry = self._process_single_problem(idx, problem, two_pass=two_pass)
                results.append(result_entry)
                self._print_progress(idx, result_entry)
                if (idx - start_idx + 1) % 10 == 0:
                    self._save_intermediate_results(results, output_folder, idx - start_idx + 1)
            except Exception as e:
                print(f"Error processing problem {idx+1}: {str(e)}")
                error_entry = self._create_error_entry(idx, problem, str(e))
                results.append(error_entry)

        final_output_path = self._save_final_results(results, output_folder, start_idx, end_idx)
        self._create_summary_file(results, output_folder, dataset_path, start_idx, end_idx)
        return results, output_folder

    def _create_output_folder(self, base_path, folder_name, start_idx, end_idx, add_timestamp):
        if folder_name is None:
            folder_name = f"results_{start_idx}_to_{end_idx-1}"
        if add_timestamp:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            folder_name = f"{folder_name}_{timestamp}"
        output_folder = os.path.join(base_path, folder_name)
        os.makedirs(output_folder, exist_ok=True)
        return output_folder

    def _load_dataset(self, dataset_path):
        dataset = []
        with open(dataset_path, 'r', encoding='utf-8') as f:
            for line in f:
                if line.strip():
                    dataset.append(json.loads(line))
        return dataset

    def _process_single_problem(self, idx, problem, two_pass=True):
        language = problem.get("Language", "")
        chapter_num = problem.get("Chapter Number", "")
        example_num = problem.get("Example Number", "")
        question = problem.get("Question", "")
        exact_answer = problem.get("Exact Answer", "")
        raw_answer_type = problem.get("Answer Type", "") or ""

        canonical_type = normalize_answer_type(raw_answer_type, question_text=question, exact_answer=exact_answer)

        if exact_answer and re.search(r'[0-9০-৯]', str(exact_answer)):
            if re.search(r'\\frac|\\sqrt|\\boxed', str(exact_answer)):
                pass
            else:
                canonical_type = "numerical"

        print(f"\nProcessing Problem {idx+1}: Chapter {chapter_num}, Example {example_num}")
        print(f"Raw Answer Type: '{raw_answer_type}'  --> canonical: '{canonical_type}'")

        solution_result = self.solver.solve_problem(question, answer_type=canonical_type, two_pass=two_pass)
        generated_answer = solution_result.get('generated_answer', '')
        thinking_content = solution_result.get('thinking_content', '')
        final_tag_output = solution_result.get('final_tag_output', '')

        # Extraction logic: prefer multiple <final>s, then single, then fallback
        all_finals = SimplifiedAnswerExtractor.extract_all_final_answers(generated_answer)
        extracted_final_answer = ""
        extracted_final_answers = []

        if not all_finals and final_tag_output:
            all_finals = SimplifiedAnswerExtractor.extract_all_final_answers(final_tag_output)

        if not all_finals:
            single = SimplifiedAnswerExtractor.extract_final_answer_simple(generated_answer)
            if single:
                extracted_final_answer = single
                extracted_final_answers = [single]
            else:
                combined = "\n".join([thinking_content or "", generated_answer or "", final_tag_output or ""])
                single = SimplifiedAnswerExtractor.extract_final_answer_simple(combined)
                if single:
                    extracted_final_answer = single
                    extracted_final_answers = [single]
                else:
                    extracted_final_answer = ""
                    extracted_final_answers = []
        else:
            extracted_final_answers = all_finals
            if len(all_finals) == 1:
                extracted_final_answer = all_finals[0]
            else:
                try:
                    extracted_final_answer = json.dumps(all_finals, ensure_ascii=False)
                except Exception:
                    extracted_final_answer = " ||| ".join(all_finals)

        if not extracted_final_answer:
            fname = f"failed_{idx}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
            fpath = os.path.join(self.failed_folder, fname)
            with open(fpath, 'w', encoding='utf-8') as f:
                json.dump({
                    "index": idx,
                    "question": question,
                    "generated_answer": generated_answer,
                    "thinking_content": thinking_content,
                    "final_tag_output": final_tag_output,
                    "exact_answer": exact_answer,
                    "canonical_type": canonical_type,
                    "extracted_final_answer": extracted_final_answer,
                    "extracted_final_answers": extracted_final_answers
                }, f, ensure_ascii=False, indent=2)
            print(f"Saved failed extraction example to {fpath}")

        result_entry = {
            "problem_index": idx,
            "language": language,
            "chapter_number": chapter_num,
            "example_number": example_num,
            "question": question,
            "generated_answer": generated_answer,
            "thinking_content": thinking_content,
            "final_tag_output": final_tag_output,
            "extracted_final_answer": extracted_final_answer,
            "extracted_final_answers": extracted_final_answers,
            "exact_answer": exact_answer,
            "raw_answer_type": raw_answer_type,
            "canonical_answer_type": canonical_type,
            "evaluation_method": problem.get("Evaluation Method", "")
        }
        return result_entry

    def _create_error_entry(self, idx, problem, error_msg):
        return {
            "problem_index": idx,
            "language": problem.get("Language", ""),
            "chapter_number": problem.get("Chapter Number", ""),
            "example_number": problem.get("Example Number", ""),
            "question": problem.get("Question", ""),
            "generated_answer": f"ERROR: {error_msg}",
            "thinking_content": "",
            "final_tag_output": "",
            "extracted_final_answer": "",
            "extracted_final_answers": [],
            "exact_answer": problem.get("Exact Answer", ""),
            "raw_answer_type": problem.get("Answer Type", ""),
            "canonical_answer_type": "",
            "evaluation_method": problem.get("Evaluation Method", "")
        }

    def _print_progress(self, idx, result_entry):
        print(f"Generated answer length: {len(result_entry['generated_answer']) if result_entry['generated_answer'] else 0}")
        print(f"Extracted final answer: '{result_entry['extracted_final_answer']}'")
        print(f"Extracted final answers (list): {result_entry.get('extracted_final_answers', [])}")
        print(f"Expected answer: '{result_entry['exact_answer']}'")

    def _save_intermediate_results(self, results, output_folder, count):
        temp_filename = f'intermediate_results_{count}.json'
        temp_output_path = os.path.join(output_folder, temp_filename)
        with open(temp_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"Saved intermediate results to {temp_output_path}")

    def _save_final_results(self, results, output_folder, start_idx, end_idx):
        final_filename = f'final_results_{start_idx}_to_{end_idx-1}.json'
        final_output_path = os.path.join(output_folder, final_filename)
        with open(final_output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"\nProcessing complete. Results saved to {final_output_path}")
        print(f"Total problems processed: {len(results)}")
        return final_output_path

    def _create_summary_file(self, results, output_folder, dataset_path, start_idx, end_idx):
        successful_extractions = len([r for r in results if r.get('extracted_final_answer', '').strip()])
        summary_data = {
            "processing_info": {
                "dataset_path": dataset_path,
                "start_index": start_idx,
                "end_index": end_idx - 1,
                "total_processed": len(results),
                "processing_timestamp": datetime.now().isoformat(),
                "output_folder": output_folder
            },
            "statistics": {
                "successful_problems": len([r for r in results if not r['generated_answer'].startswith('ERROR:')]),
                "failed_problems": len([r for r in results if r['generated_answer'].startswith('ERROR:')]),
                "successful_extractions": successful_extractions,
                "extraction_success_rate": f"{(successful_extractions/len(results)*100):.1f}%" if results else "0%",
                "average_answer_length": sum(len(r['generated_answer']) for r in results) / len(results) if results else 0,
                "chapters_processed": list(set(r['chapter_number'] for r in results if r['chapter_number'])),
                "raw_answer_types": list(set(r['raw_answer_type'] for r in results if r.get('raw_answer_type'))),
                "canonical_answer_types": list(set(r['canonical_answer_type'] for r in results if r.get('canonical_answer_type')))
            }
        }
        summary_path = os.path.join(output_folder, 'processing_summary.json')
        with open(summary_path, 'w', encoding='utf-8') as f:
            json.dump(summary_data, f, ensure_ascii=False, indent=2)
        print(f"Processing summary saved to {summary_path}")
        print(f"Answer extraction success rate: {summary_data['statistics']['extraction_success_rate']}")


# -------------------------
# Main (example usage)
# -------------------------
def main():
    # NOTE: update dataset_path and output_base_path to match your environment
    dataset_path = "/kaggle/input/English_Final_Corpus.jsonl"  # Replace with your Bangla JSONL dataset
    output_base_path = "/kaggle/working/"

    solver = OllamaBanglaCoTMathSolver(model_name="mathstral:7b")
    processor = DatasetProcessor(solver, failed_folder=os.path.join(output_base_path, "failed_extractions"))

    # For quick testing, process only first few problems
    results, out_folder = processor.process_dataset(
        dataset_path,
        output_base_path,
        start_idx=1210,
        end_idx=1445,
        two_pass=True
    )
    print("Done. Results saved to:", out_folder)


if __name__ == "__main__":
    main()