References

- https://www.kaggle.com/code/richolson/ai-math-olympiad-qwen2-5-72b for showing how to submit
- https://www.kaggle.com/code/abdullahmeda/load-72b-awq-model-using-vllm-on-l4-x4
- https://www.kaggle.com/code/huikang/qwen2-5-math-1-5b-instruct

In [1]:
import os
import gc
import time
import warnings

import pandas as pd
import polars as pl

import torch
import kaggle_evaluation.aimo_2_inference_server

pd.set_option('display.max_colwidth', None)

In [2]:
from vllm import LLM, SamplingParams

warnings.simplefilter('ignore')

os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

def clean_memory(deep=False):
    gc.collect()
    if deep:
        ctypes.CDLL("libc.so.6").malloc_trim(0)
    torch.cuda.empty_cache()


llm_model_pth = '/kaggle/input/qwen2.5/transformers/72b-instruct-awq/1'

llm = LLM(
    llm_model_pth,
    dtype="half",                # The data type for the model weights and activations
    max_num_seqs=16,             # Maximum number of sequences per iteration. Default is 256
    max_model_len=4096,          # Model context length
    trust_remote_code=True,      # Trust remote code (e.g., from HuggingFace) when downloading the model and tokenizer
    tensor_parallel_size=4,      # The number of GPUs to use for distributed execution with tensor parallelism
    gpu_memory_utilization=0.97, # The ratio (between 0 and 1) of GPU memory to reserve for the model
)

2024-10-29 09:39:26,419	INFO util.py:124 -- Outdated packages:
  ipywidgets==7.7.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


INFO 10-29 09:39:31 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.
INFO 10-29 09:39:31 config.py:904] Defaulting to use mp for distributed inference
INFO 10-29 09:39:31 llm_engine.py:223] Initializing an LLM engine (v0.6.1.post2) with config: model='/kaggle/input/qwen2.5/transformers/72b-instruct-awq/1', speculative_config=None, tokenizer='/kaggle/input/qwen2.5/transformers/72b-instruct-awq/1', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=4, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_confi

Loading safetensors checkpoint shards:   0% Completed | 0/11 [00:00<?, ?it/s]


[1;36m(VllmWorkerProcess pid=245)[0;0m INFO 10-29 09:44:20 model_runner.py:1008] Loading model weights took 9.7897 GB
INFO 10-29 09:44:20 model_runner.py:1008] Loading model weights took 9.7897 GB
[1;36m(VllmWorkerProcess pid=243)[0;0m INFO 10-29 09:44:20 model_runner.py:1008] Loading model weights took 9.7897 GB
[1;36m(VllmWorkerProcess pid=244)[0;0m INFO 10-29 09:44:20 model_runner.py:1008] Loading model weights took 9.7897 GB
INFO 10-29 09:44:27 distributed_gpu_executor.py:57] # GPU blocks: 9006, # CPU blocks: 3276
INFO 10-29 09:44:30 model_runner.py:1311] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
INFO 10-29 09:44:30 model_runner.py:1315] CUDA graphs can take additional 1~3 GiB memory per GPU. If you are running out of memory, consider decreasing `gpu_memory_utilization` or enforcing eager mode. You can also reduce the `m

In [3]:
tokenizer = llm.get_tokenizer()

In [4]:
import re


def extract_python_code(text):
    pattern = r'```python\s*(.*?)\s*```'
    matches = re.findall(pattern, text, re.DOTALL)
    return "\n\n".join(matches)


def process_python_code(query):
    query = "import math\nimport numpy as np\nimport sympy as sp\n" + query
    current_rows = query.strip().split("\n")
    new_rows = []
    for row in current_rows:
        new_rows.append(row)
        if not row.startswith(" ") and "=" in row:
                variable_to_print = row.split("=")[0].strip()
                new_rows.append(f'print(f"{{{variable_to_print}=}}")')
    return "\n".join(new_rows)


def extract_boxed_text(text):
    pattern = r'oxed{(.*?)}'
    matches = re.findall(pattern, text)
    if not matches:
        return ""
    return matches[0]


from collections import Counter
import random
def select_answer(answers):
    counter = Counter()
    for answer in answers:
        try:
            if int(answer) == float(answer):
                counter[int(answer)] += 1 + random.random() / 1_000
        except:
            pass
    if not counter:
        return 210
    _, answer = sorted([(v,k) for k,v in counter.items()], reverse=True)[0]
    return answer%1000

In [5]:
import os
import tempfile
import subprocess

class PythonREPL:
    def __init__(self, timeout=5):
        self.timeout = timeout

    def __call__(self, query):
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_file_path = os.path.join(temp_dir, "tmp.py")
            with open(temp_file_path, "w", encoding="utf-8") as f:
                f.write(query)
            
            try:
                result = subprocess.run(
                    ["python3", temp_file_path],
                    capture_output=True,
                    check=False,
                    text=True,
                    timeout=self.timeout,
                )
            except subprocess.TimeoutExpired:
                return False, f"Execution timed out after {self.timeout} seconds."

            if result.returncode == 0:
                output = result.stdout.strip()
                return True, output
            else:
                error_msg = result.stderr.strip()
                # Process the error message to remove the temporary file path
                # This makes the error message cleaner and more user-friendly
                error_lines = error_msg.split("\n")
                cleaned_errors = []
                for line in error_lines:
                    if temp_file_path in line:
                        # Remove the path from the error line
                        line = line.replace(temp_file_path, "<temporary_file>")
                    cleaned_errors.append(line)
                cleaned_error_msg = "\n".join(cleaned_errors)
                return False, cleaned_error_msg

In [6]:
sampling_params = SamplingParams(
    temperature=1.0,              # randomness of the sampling
    min_p=0.01,
    skip_special_tokens=True,     # Whether to skip special tokens in the output.
    max_tokens=1800,
    stop=["```\n"],
    include_stop_str_in_output=True,
)

def batch_message_generate(list_of_messages) -> list[list[dict]]:

    list_of_texts = [
        tokenizer.apply_chat_template(
            conversation=messages,
            tokenize=False,
            add_generation_prompt=True
        )
        for messages in list_of_messages
    ]
    
    request_output = llm.generate(
        prompts=list_of_texts,
        sampling_params=sampling_params,
    )
    
    for messages, single_request_output in zip(list_of_messages, request_output):
        # print()
        # print(single_request_output.outputs[0].text)
        # print()
        messages.append({'role': 'assistant', 'content': single_request_output.outputs[0].text})

    return list_of_messages

In [7]:
def batch_message_filter(list_of_messages) -> tuple[list[list[dict]], list[str]]:
    extracted_answers = []
    list_of_messages_to_keep = []
    for messages in list_of_messages:
        answer = extract_boxed_text(messages[-1]['content'])
        if answer:
            extracted_answers.append(answer)
        else:
            list_of_messages_to_keep.append(messages)
    return list_of_messages_to_keep, extracted_answers

In [8]:
def batch_message_execute(list_of_messages) -> list[list[dict]]:
    for messages in list_of_messages:
        python_code = extract_python_code(messages[-1]['content'])
        python_code = process_python_code(python_code)
        # print('\n\n' + python_code + '\n\n')
        try:
            print('c', end='')
            is_successful, output = PythonREPL()(python_code)
            if is_successful:
                print('o', end='')
            else:
                print('e', end='')
        except Exception as e:
            print('f', end='')
            output = str(e)
        # print(output)
        messages.append({'role': 'user', 'content': output})
    print()
    return list_of_messages

In [9]:
import os

import pandas as pd
import polars as pl

import kaggle_evaluation.aimo_2_inference_server

In [10]:
def create_starter_messages(question, index):
    cycle_size = 4
    if False:
        pass
    elif index % cycle_size == 3:
        return [
            {"role": "user", "content": "Translate this problem into Python code.\n\n" + question + "\n\nStart by importing numpy and sympy. If you get a confident answer after running the sympy code, put your final answer within \boxed{}"}
        ]
    elif index % cycle_size == 2:
        return [
            {"role": "user", "content": "Translate the following problem into sympy.\n\n" + question + "\n\nStart by importing sympy. If you get a confident answer after running the sympy code, put your final answer within \boxed{}"}
        ]
    elif index % cycle_size == 1:
        # https://github.com/QwenLM/Qwen2.5-Math?tab=readme-ov-file#-hugging-face-transformers
        return [
            {"role": "system", "content": "Please reason step by step, and put your final answer within \\boxed{}."},
            {"role": "user", "content": question}
        ]
    else:
        # https://github.com/QwenLM/Qwen2.5-Math?tab=readme-ov-file#-hugging-face-transformers
        return [
            {"role": "system", "content": "Please integrate natural language reasoning with Python programs to solve the problem above, and put your final answer within \\boxed{}."},
            {"role": "user", "content": question}
        ]

In [11]:
def predict_for_question(question: str) -> int:
    import os
    if not os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
        if question != "Triangle $ABC$ has side length $AB = 120$ and circumradius $R = 100$. Let $D$ be the foot of the perpendicular from $C$ to the line $AB$. What is the greatest possible length of segment $CD$?":
            return 210
    
    question += "\nIf the final answer is a number larger than 1 million, take modulo 1000."
    print(question)

    list_of_messages = [create_starter_messages(question, index) for index in range(16)]

    all_extracted_answers = []
    for _ in range(4):
        list_of_messages = batch_message_generate(list_of_messages)
        list_of_messages, extracted_answers = batch_message_filter(list_of_messages)
        all_extracted_answers.extend(extracted_answers)
        if not list_of_messages:
            break
        list_of_messages = batch_message_execute(list_of_messages)

    print(all_extracted_answers)
    answer = select_answer(all_extracted_answers)
    print(answer)

    print("\n\n")
    return answer

In [12]:
# Replace this function with your inference code.
# The function should return a single integer between 0 and 999, inclusive.
# Each prediction (except the very first) must be returned within 30 minutes of the question being provided.
def predict(id_: pl.DataFrame, question: pl.DataFrame) -> pl.DataFrame | pd.DataFrame:
    id_ = id_.item(0)
    print("------")
    print(id_)
    
    question = question.item(0)
    answer = predict_for_question(question)
    print(question)
    print("------\n\n\n")
    return pl.DataFrame({'id': id_, 'answer': answer})

In [13]:
# predict_for_question("Triangle $ABC$ has side length $AB = 120$ and circumradius $R = 100$. Let $D$ be the foot of the perpendicular from $C$ to the line $AB$. What is the greatest possible length of segment $CD$?")

In [14]:
pd.read_csv(
    '/kaggle/input/ai-mathematical-olympiad-progress-prize-2/reference.csv'
).drop('answer', axis=1).to_csv('reference.csv', index=False)

In [15]:
inference_server = kaggle_evaluation.aimo_2_inference_server.AIMO2InferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        (
#             '/kaggle/input/ai-mathematical-olympiad-progress-prize-2/test.csv',
            'reference.csv',
        )
    )

------
a1d40b
The Fibonacci numbers are defined as follows: $F_0 = 0$, $F_1 = 1$, and $F_{n+1} = F_n + F_{n-1}$ for $n \geq 1$. There are $N$ positive integers $n$ strictly less than $10^{101}$ such that $n^2 + (n+1)^2$ is a multiple of 5 but $F_{n-1}^2 + F_n^2$ is not. How many prime factors does $N$ have, counted with multiplicity?
------



------
1acac0
Triangle $ABC$ has side length $AB = 120$ and circumradius $R = 100$. Let $D$ be the foot of the perpendicular from $C$ to the line $AB$. What is the greatest possible length of segment $CD$?
If the final answer is a number larger than 1 million, take modulo 1000.


Processed prompts: 100%|██████████| 16/16 [01:33<00:00,  5.85s/it, est. speed input: 21.91 toks/s, output: 164.46 toks/s]


cecoce


Processed prompts: 100%|██████████| 3/3 [01:04<00:00, 21.54s/it, est. speed input: 55.55 toks/s, output: 30.71 toks/s]


ce


Processed prompts: 100%|██████████| 1/1 [00:14<00:00, 14.99s/it, est. speed input: 96.65 toks/s, output: 17.68 toks/s]


ce


Processed prompts: 100%|██████████| 1/1 [00:10<00:00, 10.62s/it, est. speed input: 165.79 toks/s, output: 15.64 toks/s]


co
['190', '180', '100', '120', '{ {final_answer', '36', '36', '{ {max_CD', 'CD', '90', '180', '36', '60', '60', '152']
36



Triangle $ABC$ has side length $AB = 120$ and circumradius $R = 100$. Let $D$ be the foot of the perpendicular from $C$ to the line $AB$. What is the greatest possible length of segment $CD$?
------



------
057f8a
Three airline companies operate flights from Dodola island. Each company has a different schedule of departures. The first company departs every 100 days, the second every 120 days and the third every 150 days. What is the greatest positive integer $d$ for which it is true that there will be $d$ consecutive days without a flight from Dodola island, regardless of the departure times of the various airlines?
------



------
1fce4b
Find the three-digit number $n$ such that writing any other three-digit number $10^{2024}$ times in a row and $10^{2024}+2$ times in a row results in two numbers divisible by $n$.
------



------
480182
Let $ABC$ be a trian