In [18]:
import base64
import anthropic
import httpx
import json

# ... existing code ...

# New JSON object
question_data = {"question_number": 9, "question_text": "The diagram shows the front view and top view of a solid made with some cubes.\n\nWhich of the following could be the solid?\n\n", "options": [{"option_number": 1, "option_text": "A and B only"}, {"option_number": 2, "option_text": "A and C only"}, {"option_number": 3, "option_text": "B and C only"}, {"option_number": 4, "option_text": "C only"}], "image_path": "https://cdn.mathpix.com/snip/images/Let3_kVSE0FQ0U_oHOPZKLQupMjUETH_pCmdejkihho.original.fullsize.png"}

def process_image_question(question_data):
    # write logic to detect image type from image_path as png or jpg by reading the last few characters of the string
    if ".png" in question_data["image_path"]:
        image_type = "png"
    elif ".jpg" in question_data["image_path"] or ".jpeg" in question_data["image_path"]:
        image_type = "jpeg"
    else:
        raise ValueError("Invalid image path")

    message = anthropic.Anthropic().messages.create(
        model="claude-3-5-sonnet-20240620",
    max_tokens=8192,
    temperature=0,
    tools=[
        {
            "name": "solve_math_problem",
            "description": "Solve a math problem using well-structured JSON.",
            "input_schema": {
                "type": "object",
                "properties": {
                    "chain_of_thought": {
                        "type": "string",
                        "description": "Chain of thought for solving the math problem.",
                    },
                    "correct_answers": {
                        "type": "array",
                        "description": "Correct answers to the math problem.",
                    },
                    "question_data": {
                        "type": "object",
                        "description": "Additional question data.",
                    }
                },
                "required": ["correct_answers", "chain_of_thought", "question_data"],
            },
        }
    ],
    tool_choice={"type": "tool", "name": "solve_math_problem"},
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": f"image/{image_type}",
                        "data": base64.b64encode(httpx.get(question_data['image_path']).content).decode("utf-8"),
                    },
                },
                {
                    "type": "text",
                    "text": f"Solve the following math problem and return the option number of the correct answer e.g [1] for option 1: {json.dumps(question_data)}",
                },
            ],
        }
    ],
)

    return message.content[0].input

# print(process_image_question(question_data)["correct_answers"])
# print()
# print(message.content[0].input)

In [10]:
# primport base64
import anthropic
import httpx
import json

# ... existing code ...

# New JSON object
question_data = {"question_number": 8, "question_text": "Which of the following is the same as 2 kg 30 g ?", "options": [{"option_number": 1, "option_text": "230 g"}, {"option_number": 2, "option_text": "2030 g"}, {"option_number": 3, "option_text": "2300 g"}, {"option_number": 4, "option_text": "20030 g"}], "latex": None}

def process_text_question(question_data):
    message = anthropic.Anthropic().messages.create(
        model="claude-3-5-sonnet-20240620",
        max_tokens=8192,
        temperature=0,
        tools=[
            {
                "name": "solve_math_problem",
            "description": "Solve a math problem using well-structured JSON.",
            "input_schema": {
                "type": "object",
                "properties": {
                    "chain_of_thought": {
                        "type": "string",
                        "description": "Chain of thought for solving the math problem.",
                    },
                    "correct_answers": {
                        "type": "array",
                        "description": "Correct answers to the math problem.",
                    },
                    "question_data": {
                        "type": "object",
                        "description": "Additional question data.",
                    }
                },
                "required": ["correct_answers", "chain_of_thought", "question_data"],
            },
        }
    ],
    tool_choice={"type": "tool", "name": "solve_math_problem"},
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": f"Solve the following math problem and return the option number of the correct answer: {json.dumps(question_data)}",
                    },
                ],
            }
        ],
    )

    return message.content[0].input

print(process_text_question(question_data)["correct_answers"])
# print(message.content[0].input['correct_answers'])
# print()
# print(message.content[0].input)

[2]


In [19]:
import json

def process_questions(file_path: str) -> None:
    """
    Opens a JSONL file containing questions and loops through each row.

    Args:
        file_path (str): The path to the JSONL file.

    Raises:
        FileNotFoundError: If the specified file is not found.
        json.JSONDecodeError: If there's an issue parsing the JSON data.
    """
    try:
        with open(file_path, "r") as file:
            for _ in range(4):
                next(file)

            for line in file:
                # Skip empty lines or lines that start with the file path
                if not line.strip() or line.strip().startswith("qna-proc"):
                    continue
                
                # Parse each line as a JSON object
                question_data = json.loads(line.strip())
                
                # Process the question data here
                question_number = question_data["question_number"]
                question_text = question_data["question_text"]
                
                # Example: Print the question number and text
                print(f"Question {question_number}: {question_text}")
                
                if question_data["image_path"] is None:
                    res = process_text_question(question_data)
                else:
                    res = process_image_question(question_data)

                print("correct_answers", res["correct_answers"])
                print("chain_of_thought", res["chain_of_thought"])
                print()
    
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
    except json.JSONDecodeError as e:
        print(f"Error: Invalid JSON data in the file. {str(e)}")

# Example usage
if __name__ == "__main__":
    file_path = "qna-proc/2021/questions.jsonl"
    process_questions(file_path)

Question 5: Which two lines are perpendicular to each other?
correct_answers [2]
chain_of_thought To determine which two lines are perpendicular to each other, we need to look for lines that intersect at a 90-degree angle. Let's analyze the given figure:

1. Line AB is the top line of the triangle.
2. Line DE is a horizontal line that intersects the triangle.
3. Line FG is a diagonal line that intersects the triangle.
4. BC is one of the sides of the triangle.

Perpendicular lines form a right angle (90 degrees) when they intersect. Looking at the figure:

- AB and DE appear to intersect at a right angle. The line DE is horizontal, and AB is part of the triangle that looks to be forming a right angle with DE.
- None of the other line pairs form an obvious right angle.

Therefore, the lines that are perpendicular to each other are AB and DE.

Question 6: Arrange these fractions from the largest to the smallest.
correct_answers [1]
chain_of_thought To solve this problem, we need to compa