In [2]:
import os

import pandas as pd
from datasets import load_dataset
import cohere
from dotenv import load_dotenv



  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Retrieve your API key from environment variables, and create a Cohere client
load_dotenv()
key = os.getenv("COHERE_API_KEY")
co = cohere.Client(key)

# Test the API
print(co.chat(message="Hello, world!"))



In [4]:
# Download the 859594-record (859494 train, 100 test) NuminaMath-CoT Dataset from HF
# This dataset contains math problems with solutiosn formulated in a CoT fashion
# Sources range from Chinese high-school math to IMO problems
# The "cn_k12" Chinese high-school subset contains the 276591 records we'll use
dataset_name = "AI-MO/NuminaMath-CoT"
dataset = load_dataset(dataset_name)


In [5]:
file_path = "datasets/cn_k12_math_problems.csv"
if os.path.exists(file_path):
    print("Loading cn_k12 from local file...")
    df = pd.read_csv(file_path)
else:
    print("Downloading cn_k12 from Hugging Face, processing, and saving locally...")
    # Download the 859594-record (859494 train, 100 test) NuminaMath-CoT Dataset from HF
    # This dataset contains math problems with solutiosn formulated in a CoT fashion
    # Sources range from Chinese high-school math to IMO problems
    # The "cn_k12" Chinese high-school subset contains the 276591 records we'll use

    dataset_name = "AI-MO/NuminaMath-CoT"
    dataset = load_dataset(dataset_name)

    # Combine the train/test splits, with a new column indicating original split
    train_df = pd.DataFrame(dataset["train"])
    test_df = pd.DataFrame(dataset["test"])
    train_df["set"] = "train"
    test_df["set"] = "test"
    df = pd.concat([train_df, test_df])
    df = df[df["source"] == "cn_k12"]

    # Now that we've downloaded and formatted the datset, let's save it locally.
    df.to_csv(file_path, index=False)
print("Loaded cn_k12 dataset")

Loading cn_k12 from local file...
Loaded cn_k12 dataset


The Dataframe contains the following columns
- **source**: The source of the problem
- **problem**: The problem statement
- **solution**: The solution to the problem
- **messages**: problem/solution columns reformatted in a JSON chat-assitant format
- **set**: The original split of the problem (train or test)


In [6]:
# This stops the text from being truncated in the dataframe
pd.set_option("display.max_colwidth", None)

In [9]:

questions = df[["problem", "solution"]]
questions.head(n=1)

Unnamed: 0,problem,solution
0,"Given the functions $f(x) = \log_a(1+x)$ and $g(x) = \log_a(1-x)$, where $a>0$ and $a \neq 1$.\n1. Find the domain of the function $f(x) - g(x)$.\n2. Determine the parity of the function $f(x) - g(x)$.\n3. Find the range of $x$ for which $f(x) - g(x) > 0$.","1. Since $f(x) = \log_a(1+x)$ and $g(x) = \log_a(1-x)$, where $a>0$ and $a \neq 1$, we have $f(x) - g(x) = \log_a(1+x) - \log_a(1-x)$, where $a>0$ and $a \neq 1$. To ensure the function $f(x) - g(x)$ is meaningful, we need\n$$\n\begin{cases}\n1+x > 0 \\\n1-x > 0\n\end{cases}\n$$\nSolving this, we get $-1 < x < 1$, which means the domain of the function $f(x) - g(x)$ is $(-1, 1)$.\n\n2. Since the domain of $f(x) - g(x)$ is $(-1, 1)$, which is symmetric about the origin, let $F(x) = f(x) - g(x)$. Then $F(-x) = f(-x) - g(-x) = \log_a(1-x) - \log_a(1+x) = -[\log_a(1+x) - \log_a(1-x)] = -F(x)$. Therefore, $f(x) - g(x)$ is an odd function.\n\n3. From $f(x) - g(x) > 0$, we get $f(x) > g(x)$, which means $\log_a(1+x) > \log_a(1-x)$. If $a > 1$, then\n$$\n\begin{cases}\n-1 < x < 1 \\\n1+x > 1-x\n\end{cases}\n$$\nwhich simplifies to\n$$\n\begin{cases}\n-1 < x < 1 \\\nx > 0\n\end{cases}\n$$\nSolving this, we get $0 < x < 1$. If $0 < a < 1$, then\n$$\n\begin{cases}\n-1 < x < 1 \\\n1+x < 1-x\n\end{cases}\n$$\nwhich simplifies to\n$$\n\begin{cases}\n-1 < x < 1 \\\nx < 0\n\end{cases}\n$$\nSolving this, we get $-1 < x < 0$. In summary, if $a > 1$, the solution set for the inequality is $(0, 1)$, and if $0 < a < 1$, the solution set for the inequality is $(-1, 0)$.\n\nTherefore, the final answers are:\n1. The domain of $f(x) - g(x)$ is $\boxed{(-1, 1)}$.\n2. The function $f(x) - g(x)$ is an $\boxed{\text{odd function}}$.\n3. The range of $x$ for which $f(x) - g(x) > 0$ is $\boxed{(0, 1)}$ if $a > 1$, and $\boxed{(-1, 0)}$ if $0 < a < 1$."
