# AIMO Progress Prize 3 - H100 Inference Strategy

This notebook implements a **Tool-Integrated Reasoning (TIR)** strategy.

### Strategy Overview
1.  **Model**: DeepSeek-R1-Distill-Qwen-32B (Reasoning Model)
2.  **Inference**: vLLM for high-throughput generation.
3.  **Methodology**: 
    *   **Generate**: Create $N=16$ candidate solutions (python code blocks).
    *   **Execute**: Run the Python code to verify results.
    *   **Vote**: Select the most common valid integer answer.

In [None]:
import os
import sys
import subprocess
import re
import math
from typing import List, Optional, Any
from collections import Counter

import pandas as pd
import polars as pl
import kaggle_evaluation.aimo_3_inference_server

# Attempt to import vLLM
try:
    from vllm import LLM, SamplingParams
    VLLM_AVAILABLE = True
except ImportError:
    print("vLLM not installed. Running in dummy mode for testing.")
    VLLM_AVAILABLE = False

In [None]:
# --- CONFIGURATION ---
# Path to your attached model dataset on Kaggle
kKAGGLE_MODEL_PATH = "/kaggle/input/deepseek-r1-distill-qwen-32b/transformers/default/1"
LOCAL_MODEL_PATH = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" # For local testing if needed

if os.path.exists(kKAGGLE_MODEL_PATH):
    MODEL_PATH = kKAGGLE_MODEL_PATH
else:
    MODEL_PATH = LOCAL_MODEL_PATH

# Inference Parameters
N_SAMPLES = 16  # How many solutions to generate per problem (Scaling N increases score)
MAX_TOKENS = 2048
TEMPERATURE = 0.6

In [None]:
# --- PYTHON WRAPPER (SANDBOX) ---
import multiprocessing
import io
import contextlib

def _exec_wrapper(code, queue):
    """Worker function to execute code safely."""
    buffer = io.StringIO()
    try:
        # Redirect stdout to capture print() outputs
        with contextlib.redirect_stdout(buffer):
            # Create a safe globals dictionary
            safe_globals = {"math": math, "__builtins__": __builtins__}
            exec(code, safe_globals)
        queue.put((True, buffer.getvalue()))
    except Exception as e:
        queue.put((False, str(e)))

def execute_python_code(code: str, timeout: int = 5) -> Optional[str]:
    """
    Executes Python code in a separate process with a timeout.
    Returns the stdout output if successful, or None on failure/timeout.
    """
    q = multiprocessing.Queue()
    p = multiprocessing.Process(target=_exec_wrapper, args=(code, q))
    p.start()
    p.join(timeout)
    
    if p.is_alive():
        p.terminate()
        p.join()
        return None # Timeout
    
    if not q.empty():
        success, result = q.get()
        if success:
            return result.strip()
    return None

In [None]:
# --- RESULT PARSING ---
def extract_python_code(text: str) -> Optional[str]:
    """Extracts the first code block from markdown text."""
    match = re.search(r'```python\s*(.*?)\s*```', text, re.DOTALL)
    if match:
        return match.group(1)
    # Fallback: looks for print statements if no code block
    if "print(" in text:
        return text
    return None

def extract_final_answer(text: str) -> Optional[int]:
    """Attempts to parse the final integer answer."""
    # 1. Look for \boxed{123}
    boxed_match = re.search(r'\\boxed\{(\d+)\}', text)
    if boxed_match:
        return int(boxed_match.group(1))
    
    # 2. Look for explicit number at end
    numbers = re.findall(r'\b\d+\b', text)
    if numbers:
        return int(numbers[-1])
        
    return None

In [None]:
# --- INFERENCE ENGINE ---
class TIRSolver:
    def __init__(self, model_path: str):
        if VLLM_AVAILABLE:
            print(f"Loading vLLM model from {model_path}...")
            # Initialize vLLM
            self.llm = LLM(
                model=model_path,
                tensor_parallel_size=1, # 1 GPU
                dtype="auto",
                trust_remote_code=True,
                enforce_eager=True # Often needed for Kaggle
            )
            self.sampling_params = SamplingParams(
                n=N_SAMPLES,
                temperature=TEMPERATURE,
                top_p=0.95,
                max_tokens=MAX_TOKENS
            )
        else:
            self.llm = None

    def generate_prompt(self, problem: str) -> str:
        """Creates a Tool-Integrated Reasoning prompt."""
        return f"""User: Solve the following math problem. 
To help you, you can write and execute Python code. 
Surround your Python code with ```python and ```.
Even if you write code, you MUST end your response with the final answer inside \\boxed{{...}}.
The answer must be a non-negative integer (0-99999).

Problem: {problem}

Assistant:"""

    def solve(self, problem_text: str) -> int:
        if not self.llm:
            return 0 # Dummy mode
            
        prompts = [self.generate_prompt(problem_text)]
        
        # 1. Generate N Candidate Solutions
        outputs = self.llm.generate(prompts, self.sampling_params, use_tqdm=False)
        candidates = []
        
        for output in outputs[0].outputs:
            text = output.text
            
            # 2. Check for Code and Execute
            code = extract_python_code(text)
            if code:
                # Run the code to verify/calculate
                exec_result = execute_python_code(code)
                if exec_result and exec_result.isdigit():
                    # If code printed a number, use that as a strong candidate
                    candidates.append(int(exec_result))
                    continue
            
            # 3. Fallback: Parse text for \boxed{}
            ans = extract_final_answer(text)
            if ans is not None:
                candidates.append(ans)
        
        # 4. Majority Vote
        valid_candidates = [c for c in candidates if 0 <= c <= 99999]
        if not valid_candidates:
            return 0 # Default fallback
            
        # Weighted voting could go here. For now, simple majority.
        counts = Counter(valid_candidates)
        best_answer, _ = counts.most_common(1)[0]
        return best_answer

In [None]:
# Initialize Model Once
# Note: In the competition, this runs during container startup
solver = TIRSolver(MODEL_PATH)

def predict(id_: pl.Series, problem: pl.Series) -> pl.DataFrame | pd.DataFrame:
    """Kaggle Competition Entry Point"""
    id_val = id_.item(0)
    problem_text = problem.item(0)
    
    # Solve
    prediction = solver.solve(problem_text)
    
    return pl.DataFrame({'id': id_val, 'answer': prediction})

In [None]:
# Start the Inference Server
inference_server = kaggle_evaluation.aimo_3_inference_server.AIMO3InferenceServer(
    predict
)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    # Local test gateway
    inference_server.run_local_gateway(
        ('/kaggle/input/ai-mathematical-olympiad-progress-prize-3/test.csv',)
    )