In [9]:
import base64
import anthropic
import httpx
import json

# ... existing code ...

# New JSON object
question_data = {"question_number": 9, "question_text": "The diagram shows the front view and top view of a solid made with some cubes.\n\nWhich of the following could be the solid?\n\n", "options": [{"option_number": 1, "option_text": "A and B only"}, {"option_number": 2, "option_text": "A and C only"}, {"option_number": 3, "option_text": "B and C only"}, {"option_number": 4, "option_text": "C only"}], "image_path": "https://cdn.mathpix.com/snip/images/Let3_kVSE0FQ0U_oHOPZKLQupMjUETH_pCmdejkihho.original.fullsize.png"}

def process_image_question(question_data):
    message = anthropic.Anthropic().messages.create(
        model="claude-3-5-sonnet-20240620",
    max_tokens=8192,
    temperature=0,
    tools=[
        {
            "name": "solve_math_problem",
            "description": "Solve a math problem using well-structured JSON.",
            "input_schema": {
                "type": "object",
                "properties": {
                    "chain_of_thought": {
                        "type": "string",
                        "description": "Chain of thought for solving the math problem.",
                    },
                    "correct_answers": {
                        "type": "array",
                        "description": "Correct answers to the math problem.",
                    },
                    "question_data": {
                        "type": "object",
                        "description": "Additional question data.",
                    }
                },
                "required": ["correct_answers", "chain_of_thought", "question_data"],
            },
        }
    ],
    tool_choice={"type": "tool", "name": "solve_math_problem"},
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": "image/png",
                        "data": base64.b64encode(httpx.get(question_data['image_path']).content).decode("utf-8"),
                    },
                },
                {
                    "type": "text",
                    "text": f"Solve the following math problem and return the option number of the correct answer e.g [1] for option 1: {json.dumps(question_data)}",
                },
            ],
        }
    ],
)

    return message.content[0].input

print(process_image_question(question_data)["correct_answers"])
# print()
# print(message.content[0].input)

[3]


In [10]:
# primport base64
import anthropic
import httpx
import json

# ... existing code ...

# New JSON object
question_data = {"question_number": 8, "question_text": "Which of the following is the same as 2 kg 30 g ?", "options": [{"option_number": 1, "option_text": "230 g"}, {"option_number": 2, "option_text": "2030 g"}, {"option_number": 3, "option_text": "2300 g"}, {"option_number": 4, "option_text": "20030 g"}], "latex": None}

def process_text_question(question_data):
    message = anthropic.Anthropic().messages.create(
        model="claude-3-5-sonnet-20240620",
        max_tokens=8192,
        temperature=0,
        tools=[
            {
                "name": "solve_math_problem",
            "description": "Solve a math problem using well-structured JSON.",
            "input_schema": {
                "type": "object",
                "properties": {
                    "chain_of_thought": {
                        "type": "string",
                        "description": "Chain of thought for solving the math problem.",
                    },
                    "correct_answers": {
                        "type": "array",
                        "description": "Correct answers to the math problem.",
                    },
                    "question_data": {
                        "type": "object",
                        "description": "Additional question data.",
                    }
                },
                "required": ["correct_answers", "chain_of_thought", "question_data"],
            },
        }
    ],
    tool_choice={"type": "tool", "name": "solve_math_problem"},
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": f"Solve the following math problem and return the option number of the correct answer: {json.dumps(question_data)}",
                    },
                ],
            }
        ],
    )

    return message.content[0].input

print(process_text_question(question_data)["correct_answers"])
# print(message.content[0].input['correct_answers'])
# print()
# print(message.content[0].input)

[2]


In [11]:
import json

def process_questions(file_path: str) -> None:
    """
    Opens a JSONL file containing questions and loops through each row.

    Args:
        file_path (str): The path to the JSONL file.

    Raises:
        FileNotFoundError: If the specified file is not found.
        json.JSONDecodeError: If there's an issue parsing the JSON data.
    """
    try:
        with open(file_path, "r") as file:
            for line in file:
                # Skip empty lines or lines that start with the file path
                if not line.strip() or line.strip().startswith("qna-proc"):
                    continue
                
                # Parse each line as a JSON object
                question_data = json.loads(line.strip())
                
                # Process the question data here
                question_number = question_data["question_number"]
                question_text = question_data["question_text"]
                
                # Example: Print the question number and text
                print(f"Question {question_number}: {question_text}")
                
                if question_data["image_path"] is None:
                    res = process_text_question(question_data)
                else:
                    res = process_image_question(question_data)

                print("correct_answers", res["correct_answers"])
                print("chain_of_thought", res["chain_of_thought"])
                print()
    
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
    except json.JSONDecodeError as e:
        print(f"Error: Invalid JSON data in the file. {str(e)}")

# Example usage
if __name__ == "__main__":
    file_path = "qna-proc/2021/questions.jsonl"
    process_questions(file_path)

Question 1: $160000+5000+400+3=$
correct_answers [2]
chain_of_thought Let's solve this problem step by step:

1. We need to add $160000 + 5000 + 400 + 3

2. Let's start with the largest numbers:
   160000 + 5000 = 165000

3. Now add 400:
   165000 + 400 = 165400

4. Finally, add 3:
   165400 + 3 = 165403

5. Our final answer is 165403

6. Looking at the given options, we can see that 165403 matches option number 2.

Question 2: In 7.654, which digit is in the tenths place?
correct_answers [2]
chain_of_thought To solve this problem, let's break down the number 7.654 and understand the place values:

1. In a decimal number, the digits to the right of the decimal point represent fractional parts.
2. The first digit after the decimal point is in the tenths place.
3. The second digit after the decimal point is in the hundredths place.
4. The third digit after the decimal point is in the thousandths place.

In 7.654:
- 7 is in the ones place
- 6 is in the tenths place
- 5 is in the hundredth

BadRequestError: Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'messages.0.content.0.image.source.base64.data: The image was specified using the image/png media type, but does not appear to be a valid png image'}}