In [3]:
import pandas as pd

In [4]:
df1 = pd.read_csv("train.csv")
df2 = pd.read_csv("test.csv")

# Combine test and train datasets
df = pd.concat([df1, df2], ignore_index=True)
df

Unnamed: 0,problem,level,type,solution,answer
0,The United States Postal Service charges an ex...,Level 3,Prealgebra,We calculate the desired ratio for each envelo...,3
1,How many integers between 1000 and 2000 have a...,Level 4,Prealgebra,"A number with 15, 20 and 25 as factors must be...",3
2,"Given that $n$ is an integer and $0 < 4n <30$,...",Level 2,Prealgebra,"Dividing by $4$, we have $0<n<7\frac{1}{2}$. T...",28
3,How many integers between $100$ and $150$ have...,Level 4,Prealgebra,We will break up the problem into cases based ...,18
4,Regular pentagon $ABCDE$ and regular hexagon $...,Level 4,Prealgebra,We know that the sum of the degree measures of...,132
...,...,...,...,...,...
12493,Compute the smallest positive integer $x$ grea...,Level 4,Intermediate Algebra,Let $q$ and $r$ be the remainder when $x$ is d...,1700
12494,"For positive real numbers $a,$ $b,$ $c,$ and $...",Level 5,Intermediate Algebra,"Let $S$ denote the given sum. First, we apply...",9
12495,"Let $a,$ $b,$ and $c$ be positive real numbers...",Level 5,Intermediate Algebra,"By AM-GM,\n\[(a - b) + b + \frac{c^3}{(a - b)b...",12
12496,The complex numbers $a$ and $b$ satisfy\n\[a \...,Level 3,Intermediate Algebra,"Taking the conjugate of both sides, we get\n\[...",-1 - 5i


In [5]:
import random

idx = random.randrange(df.shape[0])

print(idx)

print("Question: ----------------------------------------------------")
print(df['problem'][idx])
print("Solution: ----------------------------------------------------")
print(df['solution'][idx])

4753
Question: ----------------------------------------------------
What is the sum of all integer solutions to $|n| < |n-3| < 9$?
Solution: ----------------------------------------------------
First let's solve $|n-3|<9$.  The absolute value of a quantity is less than 9 if and only if the quantity is between $-9$ and 9, so solve \[
\begin{array}{r@{\;\;<\;\;}c@{\;\;<\;\;}lc}
-9 & n-3 & 9 &\quad \implies \\
-9+3 & n & 9+3 &\quad \implies \\
-6 & n & 12.
\end{array}
\] Now consider $|n|<|n-3|$.  The distance from $n$ to 0 is $|n|$, and the distance from $n$ to 3 is $|n-3|$.  Therefore, this inequality is satisfied by the numbers that are closer to 0 than to 3.  These are the numbers less than $1.5$.  So the integer solutions of $|n|<|n-3|<9$ are $-5$, $-4$, $-3$, $-2$, $-1$, 0, and 1, and their sum is $-5-4-3-2=\boxed{-14}$.


In [113]:
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document

documents = []

for _, row in df.iterrows():
    doc = Document(
        page_content=row['problem'],  # what gets embedded
        metadata={'solution': row['solution']}
    )
    documents.append(doc)

In [127]:
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Create or load Chroma DB
vectordb = Chroma.from_documents(
    documents=documents,
    embedding=embedding_model,
    persist_directory="./math_db",
    collection_metadata={"hnsw:space": "cosine"} 
)

vectordb.persist()

In [131]:
query = "What's the integral of x^2?"
results = vectordb.similarity_search_with_relevance_scores(query, k=2)

for doc, score in results:
    print(f"Question: {doc.page_content}")
    print(f"Answer: {doc.metadata['solution']}")
    print(f"Score: {score}\n")

Question: What is $\left(\frac{6}{7}\right)^2 \cdot \left(\frac{1}{2}\right)^2$?
Answer: Since $\left(\frac{a}{b}\right)^j \cdot \left(\frac{c}{d}\right)^j = \left(\frac{a \cdot c}{b \cdot d}\right)^{j}$, we know  $\left(\frac{6}{7}\right)^2 \cdot \left(\frac{1}{2}\right)^2 = \left(\frac{6 \cdot 1}{7 \cdot 2}\right)^2$.  Simplifying, we have $\left(\frac{3}{7}\right)^2 = \frac{3^2}{7^2}$, because $\left(\frac{a}{b}\right)^n = \frac{a^n}{b^n}$.  We know $3^2 = 9$ and $7^2 = 49$, so our answer is $\boxed{\frac{9}{49}}$.
Score: 0.20752115252566472

Question: What is $\left(\frac{6}{7}\right)^2 \cdot \left(\frac{1}{2}\right)^2$?
Answer: Since $\left(\frac{a}{b}\right)^j \cdot \left(\frac{c}{d}\right)^j = \left(\frac{a \cdot c}{b \cdot d}\right)^{j}$, we know  $\left(\frac{6}{7}\right)^2 \cdot \left(\frac{1}{2}\right)^2 = \left(\frac{6 \cdot 1}{7 \cdot 2}\right)^2$.  Simplifying, we have $\left(\frac{3}{7}\right)^2 = \frac{3^2}{7^2}$, because $\left(\frac{a}{b}\right)^n = \frac{a^n}{b^n}$. 

In [129]:
ans = vectordb.similarity_search(query=query)
ans

[Document(metadata={'solution': 'Since $\\left(\\frac{a}{b}\\right)^j \\cdot \\left(\\frac{c}{d}\\right)^j = \\left(\\frac{a \\cdot c}{b \\cdot d}\\right)^{j}$, we know  $\\left(\\frac{6}{7}\\right)^2 \\cdot \\left(\\frac{1}{2}\\right)^2 = \\left(\\frac{6 \\cdot 1}{7 \\cdot 2}\\right)^2$.  Simplifying, we have $\\left(\\frac{3}{7}\\right)^2 = \\frac{3^2}{7^2}$, because $\\left(\\frac{a}{b}\\right)^n = \\frac{a^n}{b^n}$.  We know $3^2 = 9$ and $7^2 = 49$, so our answer is $\\boxed{\\frac{9}{49}}$.'}, page_content='What is $\\left(\\frac{6}{7}\\right)^2 \\cdot \\left(\\frac{1}{2}\\right)^2$?'),
 Document(metadata={'solution': 'Since $\\left(\\frac{a}{b}\\right)^j \\cdot \\left(\\frac{c}{d}\\right)^j = \\left(\\frac{a \\cdot c}{b \\cdot d}\\right)^{j}$, we know  $\\left(\\frac{6}{7}\\right)^2 \\cdot \\left(\\frac{1}{2}\\right)^2 = \\left(\\frac{6 \\cdot 1}{7 \\cdot 2}\\right)^2$.  Simplifying, we have $\\left(\\frac{3}{7}\\right)^2 = \\frac{3^2}{7^2}$, because $\\left(\\frac{a}{b}\\right)

In [125]:
# Loading the database again.
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Load the Chroma vector store from disk
vectordb2 = Chroma(
    persist_directory="./math_db",  # same as used during .persist()
    embedding_function=embedding_model,
    collection_metadata={"hnsw:space": "cosine"} 
)