In [8]:
import asyncio
import logging
import os
import uuid
from pathlib import Path
from typing import Any, Optional

# import click
import pandas as pd
import yaml

from chainscope.api_utils.deepseek_utils import (
    DeepSeekBatchProcessor,
    DeepSeekRateLimiter,
)
from chainscope.api_utils.open_router_utils import ORBatchProcessor, ORRateLimiter
from chainscope.typing import (
    CotResponses,
    DefaultSamplingParams,
    MathDatasetParams,
    MathQsDataset,
    MathQuestion,
    MathResponse,
)




In [9]:
def load_putnam_results_as_df(yaml_path: Path) -> pd.DataFrame:
    """Load Putnam results from YAML into a pandas DataFrame."""
    with open(yaml_path) as f:
        data = yaml.safe_load(f)
    return pd.DataFrame(data)


def create_putnam_dataset(df: pd.DataFrame) -> MathQsDataset:
    """Create a MathQsDataset from a Putnam DataFrame."""
    # Sort problems by year and type
    df = df.sort_values(
        by="problem_name",
        key=lambda x: pd.Series(
            [
                # Extract year and problem type (e.g. 'a1', 'b2')
                (int(name.split("_")[1]), name.split("_")[2])
                for name in x
            ]
        ).map(
            lambda t: (
                {
                    "a1": 0,
                    "b1": 1,
                    "a2": 2,
                    "b2": 3,
                    "a3": 4,
                    "b3": 5,
                    "a4": 6,
                    "b4": 7,
                    "a5": 8,
                    "b5": 9,
                    "a6": 10,
                    "b6": 11,
                }[t[1]],
                -t[0],
            )
        ),
    )

    return MathQsDataset(
        questions=[
            MathQuestion(
                name=row["problem_name"],
                problem=row["informal_statement"],
                solution=row["informal_solution"],
            )
            for _, row in df.iterrows()
        ],
        params=MathDatasetParams(
            description="Putnam Competition Problems",
            id="filtered_putnambench",
            pre_id=None,
        ),
    )


In [10]:
data_df = load_putnam_results_as_df("minimal_fork_of_putnambench_with_clear_answers.yaml")

In [11]:
data_df

Unnamed: 0,problem_name,informal_statement,informal_solution
0,putnam_1962_a2,Find every real-valued function $f$ whose doma...,Show that \[ f(x) = \frac{a}{(1 - cx)^2} \begi...
1,putnam_1962_a5,Evaluate in closed form \[ \sum_{k=1}^n {n \ch...,Show that the expression equals $n(n+1)2^{n-2}$.
2,putnam_1963_a3,"Find an integral formula (i.e., a function $z$...",Show that the solution is $$y(x) = \int_{1}^{x...
3,putnam_1963_b1,For what integer $a$ does $x^2-x+a$ divide $x^...,Show that $a=2$.
4,putnam_1963_b3,Find every twice-differentiable real-valued fu...,Show that the solution is the sets of function...
...,...,...,...
210,putnam_2023_b1,"Consider an $m$-by-$n$ grid of unit squares, i...",Show that the number of such configurations is...
211,putnam_2023_b3,"A sequence $y_1, y_2, \ldots, y_k$ of real num...",Show that the expected value is \frac{2n + 2}{3}.
212,putnam_2023_b4,For a nonnegative integer $n$ and a strictly i...,Show that the minimum value of $T$ is $29$.
213,putnam_2023_b5,Determine which positive integers $n$ have the...,Show that the desired property holds if and on...


In [12]:
putnam_ques = create_putnam_dataset(data_df)

In [16]:
from pprint import pprint
pprint(putnam_ques)

MathQsDataset(questions=[MathQuestion(name='putnam_2023_a1',
                                      problem='For a positive integer $n$, let '
                                              '$f_n(x) = \\cos(x) \\cos(2x) '
                                              '\\cos(3x) \\cdots \\cos(nx)$. '
                                              'Find the smallest $n$ such that '
                                              "$|f_n''(0)| > 2023$.",
                                      solution='Show that the solution is $n = '
                                               '18$.'),
                         MathQuestion(name='putnam_2022_a1',
                                      problem='Determine all ordered pairs of '
                                              'real numbers $(a,b)$ such that '
                                              'the line $y = ax+b$ intersects '
                                              'the curve $y = \\ln(1+x^2)$ in '
                                