In [21]:
import base64
import anthropic
import httpx
import json

question_data = {
    "question_number": 9,
    "question_text": "The diagram shows the front view and top view of a solid made with some cubes.\n\nWhich of the following could be the solid?\n\n",
    "options": [
        {"option_number": 1, "option_text": "A and B only"},
        {"option_number": 2, "option_text": "A and C only"},
        {"option_number": 3, "option_text": "B and C only"},
        {"option_number": 4, "option_text": "C only"},
    ],
    "image_path": "https://cdn.mathpix.com/snip/images/Let3_kVSE0FQ0U_oHOPZKLQupMjUETH_pCmdejkihho.original.fullsize.png",
}


def process_image_question(question_data):
    if ".png" in question_data["image_path"]:
        image_type = "png"
    elif (
        ".jpg" in question_data["image_path"] or ".jpeg" in question_data["image_path"]
    ):
        image_type = "jpeg"
    else:
        raise ValueError("Invalid image path")

    message = anthropic.Anthropic().messages.create(
        model="claude-3-5-sonnet-20240620",
        max_tokens=8192,
        temperature=0,
        system="You are a helpful assistant that can solve math problems. Answer the question with the correct answers. Before answering, explain your reasoning step-by-step in the chain_of_thought key. Use the correct_answer key to return the option number of the correct answer.",
        tools=[
            {
                "name": "math_problem_solver",
                "description": "Solve a math problem using well-structured JSON.",
                "input_schema": {
                    "type": "object",
                    "properties": {
                        "chain_of_thought": {
                            "type": "string",
                            "description": "The chain of thought for solving the math problem.",
                        },
                        "correct_answer": {
                            "type": "string",
                            "description": "The correct answer to the math problem. (e.g. [1], [2], [3], [4])",
                        },
                    },
                    "required": [
                        "chain_of_thought",
                        "correct_answer",
                    ],
                },
            }
        ],
        tool_choice={"type": "tool", "name": "math_problem_solver"},
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "image",
                        "source": {
                            "type": "base64",
                            "media_type": f"image/{image_type}",
                            "data": base64.b64encode(
                                httpx.get(question_data["image_path"]).content
                            ).decode("utf-8"),
                        },
                    },
                    {
                        "type": "text",
                        "text": f"{json.dumps(question_data)}",
                    },
                ],
            }
        ],
    )

    return message.content[0].input


print(process_image_question(question_data))
# print()
# print(message.content[0].input)

{'chain_of_thought': "Let's analyze this step-by-step:\n\n1. First, let's look at the given front view and top view:\n   - The front view shows a 2x2 square of cubes.\n   - The top view shows a 2x2 square with one cube missing in the bottom right corner.\n\n2. Now, let's compare this with the given solids:\n\n   Solid A:\n   - Front view matches the given front view (2x2 square).\n   - Top view shows a 2x2 square with one cube missing in the bottom right corner, which matches the given top view.\n\n   Solid B:\n   - Front view shows a 2x2 square with one cube missing in the top right corner, which doesn't match the given front view.\n   - Top view doesn't match the given top view.\n\n   Solid C:\n   - Front view matches the given front view (2x2 square).\n   - Top view shows a 2x2 square with one cube missing in the bottom right corner, which matches the given top view.\n\n3. Conclusion:\n   - Solid A matches both the front and top views.\n   - Solid B doesn't match either view.\n   - 

In [22]:
import anthropic
import httpx
import json

question_data = {
    "question_number": 8,
    "question_text": "Which of the following is the same as 2 kg 30 g ?",
    "options": [
        {"option_number": 1, "option_text": "230 g"},
        {"option_number": 2, "option_text": "2030 g"},
        {"option_number": 3, "option_text": "2300 g"},
        {"option_number": 4, "option_text": "20030 g"},
    ],
    "latex": None,
}


# anthropic optiomised way
def process_text_question(question_data):
    message = anthropic.Anthropic().messages.create(
        model="claude-3-5-sonnet-20240620",
        max_tokens=8192,
        temperature=0,
        system="You are a helpful assistant that can solve math problems. Answer the question with the correct answers. Before answering, explain your reasoning step-by-step in the chain_of_thought key. Use the correct_answer key to return the option number of the correct answer.",
        tools=[
            {
                "name": "math_problem_solver",
                "description": "Solve a math problem using well-structured JSON.",
                "input_schema": {
                    "type": "object",
                    "properties": {
                        "chain_of_thought": {
                            "type": "string",
                            "description": "The chain of thought for solving the math problem.",
                        },
                        "correct_answer": {
                            "type": "string",
                            "description": "The correct answer to the math problem. (e.g. [1], [2], [3], [4])",
                        },
                    },
                    "required": [
                        "chain_of_thought",
                        "correct_answer",
                    ],
                },
            }
        ],
        tool_choice={"type": "tool", "name": "math_problem_solver"},
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"{json.dumps(question_data)}",
                    },
                ],
            }
        ],
    )

    # return message
    return message.content[0].input


output = process_text_question(question_data)
print(output)
# print(output["correct_answers"])
# print(message.content[0].input['correct_answers'])
# print()
# print(message.content[0].input)

{'chain_of_thought': "To solve this problem, let's convert the given weight to grams:\n\n1. We know that 1 kg = 1000 g\n2. We have 2 kg, so: 2 kg = 2 * 1000 g = 2000 g\n3. We also have an additional 30 g\n4. So, the total weight in grams is: 2000 g + 30 g = 2030 g\n\nNow, let's compare this result with the given options:\n1. 230 g - This is much less than our result\n2. 2030 g - This matches our calculation exactly\n3. 2300 g - This is close but larger than our result\n4. 20030 g - This is much larger than our result\n\nTherefore, the correct answer is option 2: 2030 g", 'correct_answer': '[2]'}


In [28]:
import json


def process_questions(file_path: str) -> None:
    """
    Opens a JSONL file containing questions and loops through each row.

    Args:
        file_path (str): The path to the JSONL file.

    Raises:
        FileNotFoundError: If the specified file is not found.
        json.JSONDecodeError: If there's an issue parsing the JSON data.
    """
    try:
        with open(file_path, "r") as file:
            for line in file:
                # Skip empty lines or lines that start with the file path
                if not line.strip() or line.strip().startswith("qna-proc"):
                    continue

                # Parse each line as a JSON object
                question_data = json.loads(line.strip())

                # Process the question data here
                question_number = question_data["question_number"]
                question_text = question_data["question_text"]

                # Example: Print the question number and text
                print(f"Question {question_number}: {question_text}")

                # if question_data["image_path"] is None:
                #     res = process_text_question(question_data)
                # else:
                #     res = process_image_question(question_data)

                # print("AI_Answer", res["correct_answer"])
                # print("chain_of_thought", res["chain_of_thought"])
                print()

    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
    except json.JSONDecodeError as e:
        print(f"Error: Invalid JSON data in the file. {str(e)}")


if __name__ == "__main__":
    file_path = "/Users/ob1/projects/sgllm/qna-proc/2021/p1-bookB/questions.jsonl" # TODO: Change Path to questions!
    process_questions(file_path)

Question 16: Find the value of $565+39$

Question 17: Find the value of $10.12-8.99$

Question 18: Find the value of $7 \div 8$. Express your answer as a decimal.

Question 19: Squares $P, Q$ and $R$ are each made up of 8 identical triangles.
Name the squares with a line of symmetry.

Question 20: Aishah paid $\$ 15$ for 20 stickers. What was the cost of each sticker in cents?

Question 21: (a) Find the value of $\frac{2}{3}+\frac{1}{5}$
(b) Express 1.14 as a mixed number in the simplest form.

Question 22: (a) Name the smallest angle.
(b) Name the two angles that are greater than $90^{\circ}$.

Question 23: Find the area of the shaded triangle.

Question 24: The table shows the temperatures of cities A, B, C and D.
| City | Lowest Temperature $\left({ }^{\circ} \mathrm{C}\right)$ | Highest Temperature $\left({ }^{\circ} \mathrm{C}\right)$ |
| :---: | :---: | :---: |
| A | 10 | 17 |
| B | 16 | 31 |
| C | 19 | 33 |
| D | 5 | 26 |
(a) When Faizal was at some of these cities, the temperat