# AIMO Prize 3 - OOP Baseline Submission (v5)

This notebook demonstrates how to use the Object-Oriented Baseline with **Few-Shot Prompting**, **Majority Voting**, and **Parquet Submission**.

## Setup Instructions
1. **Add Utility Script**: Upload `src/kaggle_baseline.py` as a Dataset (e.g., named `aimo-pp3-source`).
2. **Add Model**: Search for and add the model `Qwen/Qwen2.5-Math-7B-Instruct`.
3. **Attach Competition Data**: Ensure the AIMO 3 competition data is attached.
4. **Run**: Execute the cells below.

In [None]:
import os
import glob
import sys

print('--- DIAGNOSTIC INFO ---')
print(f'Current working directory: {os.getcwd()}')
print('Contents of /kaggle/input/:')
try:
    for item in sorted(os.listdir('/kaggle/input')):
        full_path = os.path.join('/kaggle/input', item)
        if os.path.isdir(full_path):
            print(f'  {item}/')
            if any(x in item.lower() for x in ['competition', 'prize', 'module', 'qwen']):
                try:
                    for sub_item in sorted(os.listdir(full_path)):
                        print(f'    - {sub_item}')
                except Exception as e:
                    print(f'    (Error listing {item}: {e})')
        else:
            print(f'  {item}')
except Exception as e:
    print(f'Error listing /kaggle/input/: {e}')

print('\nEnvironment variables:')
for key, value in sorted(os.environ.items()):
    if key.startswith('KAGGLE') or 'PATH' in key or 'HOME' in key:
        print(f'{key}={value}')

print('--- END DIAGNOSTIC INFO ---')


In [None]:
# INLINED KAGGLE BASELINE MODULE
import os
import sys
import re
import io
import contextlib
import signal
from abc import ABC, abstractmethod
from typing import Optional, Dict, Any, List
from collections import Counter

# ==========================================
# 1. Configuration & Environment Handling
# ==========================================
class CompetitionConfig:
    def __init__(self):
        self.is_kaggle = os.path.exists("/kaggle/input")
        
        if self.is_kaggle:
            self.base_dir = "/kaggle/input"
            # Kaggle에 추가할 Qwen 모델 경로 (예시)
            self.model_path = "/kaggle/input/qwen2-5-math-7b-instruct" 
        else:
            self.base_dir = "./data"
            self.model_path = "Qwen/Qwen2.5-Math-1.5B-Instruct" # 로컬 테스트용 가벼운 모델

        # 하이퍼파라미터 (Top-tier 커널 참고)
        self.timeout_seconds = 10
        self.n_repetitions = 16  # 한 문제당 16번 다르게 풀기 시도
        self.temperature = 0.7   # 다양한 풀이 경로를 위한 높은 온도
        self.max_tokens = 2048
        self.gpu_memory_utilization = 0.95

# ==========================================
# 2. Code Execution Environment (Stateful)
# ==========================================
class CodeExecutor:
    """Thread-safe, optionally stateful Python executor."""
    def __init__(self, timeout: int = 5):
        self.timeout = timeout
        self.globals_dict = {} # 상태 유지를 위한 전역 변수 사전

    def execute(self, code: str, reset_state: bool = True) -> str:
        if reset_state:
            self.globals_dict = {}
            
        output_buffer = io.StringIO()
        
        def timeout_handler(signum, frame):
            raise TimeoutError("Execution timed out")

        use_timeout = False
        if hasattr(signal, 'SIGALRM'):
            try:
                signal.signal(signal.SIGALRM, timeout_handler)
                signal.alarm(self.timeout)
                use_timeout = True
            except ValueError:
                pass # 백그라운드 스레드 무시
        
        try:
            with contextlib.redirect_stdout(output_buffer):
                # 기본적인 수학 라이브러리 자동 임포트
                exec("import math\nimport sympy\nimport numpy as np\n", self.globals_dict)
                exec(code, self.globals_dict)
        except TimeoutError:
            return "Error: Execution timed out."
        except Exception as e:
            return f"Error: {type(e).__name__}: {str(e)}"
        finally:
            if use_timeout and hasattr(signal, 'SIGALRM'):
                signal.alarm(0)
        
        return output_buffer.getvalue().strip()

# ==========================================
# 3. Model Interface (vLLM Batched)
# ==========================================
class LLMInterface(ABC):
    @abstractmethod
    def generate_batch(self, prompts: List[str]) -> List[str]:
        pass

class VLLMEngine(LLMInterface):
    """Real vLLM integration for high-throughput batch generation."""
    def __init__(self, config: CompetitionConfig):
        try:
            from vllm import LLM, SamplingParams
            print(f"Loading vLLM model from {config.model_path}...")
            # VRAM을 꽉 채워 쓰도록 설정
            self.model = LLM(
                model=config.model_path, 
                trust_remote_code=True,
                tensor_parallel_size=1,
                gpu_memory_utilization=config.gpu_memory_utilization,
                max_model_len=4096, # 컨텍스트 길이 최적화
                enforce_eager=True # Kaggle 환경 호환성
            )
            self.sampling_params = SamplingParams(
                temperature=config.temperature,
                max_tokens=config.max_tokens,
                top_p=0.9,
                stop=["```\n", "User:", "<|im_end|>"]
            )
            self.is_mock = False
            print("vLLM loaded successfully.")
        except ImportError:
            print("⚠️ vLLM not installed. Falling back to MockLLM.")
            self.is_mock = True
            
    def generate_batch(self, prompts: List[str]) -> List[str]:
        if self.is_mock:
            import random
            return [f"The answer is \\boxed{{{random.randint(1, 100)}}}" for _ in prompts]
            
        outputs = self.model.generate(prompts, self.sampling_params, use_tqdm=False)
        return [output.outputs[0].text for output in outputs]

# ==========================================
# 4. Main Solver Logic
# ==========================================
class AIMSolver:
    def __init__(self, config: CompetitionConfig, llm: LLMInterface):
        self.config = config
        self.llm = llm
        
    def format_prompt(self, problem: str) -> str:
        """Qwen Math Instruct Template"""
        system = "You are an expert mathematician. Solve the problem step-by-step. If you write Python code, enclose it in ```python\n...\n```. Always put your final answer inside \\boxed{}."
        return f"<|im_start|>system\n{system}<|im_end|>\n<|im_start|>user\n{problem}<|im_end|>\n<|im_start|>assistant\n"

    def extract_answer(self, text: str) -> int:
        match = re.search(r'\\boxed\{([0-9,]+)\}', text)
        if match: 
            try:
                return int(match.group(1).replace(',', '')) % 100000
            except: pass
        
        # Fallback
        match = re.search(r'final answer is\s*([0-9,]+)', text, re.IGNORECASE)
        if match:
            try:
                return int(match.group(1).replace(',', '')) % 100000
            except: pass
        return -1

    def solve(self, problem_text: str) -> int:
        """
        Batched generation for Majority Voting.
        1. 16개의 프롬프트를 한 번에 생성
        2. 병렬로 응답 수집
        3. 가장 많이 나온 답 선택
        """
        # Create N identical prompts for sampling diverse paths (due to temp=0.7)
        prompts = [self.format_prompt(problem_text)] * self.config.n_repetitions
        
        # Batch Generate (vLLM handles this extremely efficiently)
        responses = self.llm.generate_batch(prompts)
        
        valid_answers = []
        for resp in responses:
            ans = self.extract_answer(resp)
            if ans >= 0:
                valid_answers.append(ans)
                
        if not valid_answers:
            print("No valid answers found. Returning 0.")
            return 0
            
        # Majority Vote
        counts = Counter(valid_answers)
        most_common, count = counts.most_common(1)[0]
        print(f"Votes: {dict(counts)} -> Selected: {most_common}")
        return most_common



In [None]:
# 2. Configure Environment & Model
config = CompetitionConfig()
config.n_repetitions = 16 

print(f"Environment: {'Kaggle' if config.is_kaggle else 'Local'}")

if config.is_kaggle and os.path.exists(config.model_path):
    print(f"Loading real VLLM Engine from {config.model_path}...")
    llm = VLLMEngine(config)
else:
    print("Using MockLLM for local testing or if model path not found.")
    llm = MockLLM()

solver = AIMSolver(config, llm)

## Submission Loop (Parquet)

In [None]:
import sys
import os
import glob
import pandas as pd

# --------------------------------------------------------------------------------
# 3. AIMO 3 API Setup (Inference Server Pattern)
# --------------------------------------------------------------------------------
aimo_server_mod = None

api_files = glob.glob('/kaggle/input/**/aimo_3_inference_server.py', recursive=True)
if not api_files:
    api_files = glob.glob('data/**/aimo_3_inference_server.py', recursive=True)

if api_files:
    api_path = os.path.dirname(api_files[0])
    if os.path.basename(api_path) == 'kaggle_evaluation':
        parent_dir = os.path.dirname(api_path)
        if parent_dir not in sys.path: sys.path.append(parent_dir)
    elif api_path not in sys.path: sys.path.append(api_path)

    try:
        import aimo_3_inference_server as aimo_server_mod
        print('✅ Imported aimo_3_inference_server.')
    except ImportError:
        try:
            from kaggle_evaluation import aimo_3_inference_server as aimo_server_mod
            print('✅ Imported aimo_3_inference_server from package.')
        except ImportError as e:
            print(f'❌ Failed to import API: {e}')

found_test_csv = glob.glob('/kaggle/input/**/test.csv', recursive=True)
actual_test_csv = found_test_csv[0] if found_test_csv else '/kaggle/input/ai-mathematical-olympiad-progress-prize-3/test.csv'

def predict(*args, **kwargs):
    try:
        input_data = args[0] if args else None
        problem_text = 'What is 0+0?'
        if hasattr(input_data, 'columns') and 'problem' in input_data.columns:
            try:
                problem_text = str(input_data['problem'][0])
            except:
                problem_text = str(input_data.iloc[0]['problem'])
        elif isinstance(input_data, str):
            problem_text = input_data
        
        answer = solver.solve(problem_text)
        return pd.DataFrame({'answer': [answer]})
    except Exception as e:
        print(f'Predict Error: {e}')
        return pd.DataFrame({'answer': [0]})

if aimo_server_mod:
    try:
        import aimo_3_gateway
        old_gen = aimo_3_gateway.AIMO3Gateway.generate_data_batches
        def patched_gen(self):
            for data_batch, row_ids in old_gen(self):
                yield (data_batch,), row_ids
        aimo_3_gateway.AIMO3Gateway.generate_data_batches = patched_gen
        aimo_3_gateway.AIMO3Gateway.target_column_name = 'answer'
        aimo_3_gateway.AIMO3Gateway.row_id_column_name = 'id'
        print('✅ Applied Gateway monkey-patches.')
    except Exception as e:
        print(f'⚠️ Failed to patch Gateway: {e}')

    server = aimo_server_mod.AIMO3InferenceServer(predict)
    if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
        server.serve()
    else:
        try:
            server.run_local_gateway(data_paths=(actual_test_csv,))
            print('✅ Local Gateway finished.')
        except Exception as e:
            print(f'❌ Local Gateway failed: {e}')
            pd.DataFrame({'id': ['test'], 'answer': [0]}).to_parquet('submission.parquet', index=False)
else:
    pd.DataFrame({'id': ['test'], 'answer': [0]}).to_parquet('submission.parquet', index=False)
