In [None]:
%pip install datasets, aixplain

In [None]:
import os
os.environ["TEAM_API_KEY"] = ""

In [None]:
from aixplain.factories import AgentFactory
import json

code=AgentFactory.create_python_interpreter_tool()
agent = AgentFactory.create(
        name="Python Code Executor",
        description="Python code executor",
        instructions="This is a Python Code Generator and Executor. Use it to generate and run Python commands. Input should describe a problem that can be solved programmatically. The output will be a detailed explanation of what the executed code achieved. If you receive code as input, you should either run it or explain why it cannot be executed. Additionally, ensure the tool prints the outcome! PRINT THE OUTPUT do not just show it, use the python print() function.",
        llm_id="654a42a36eb5634a236f5eb1",
        tools=[code]
    )


In [None]:
with open("data/code_kaggle_20240712.jsonl") as f:
    data = [json.loads(line) for line in f.readlines()]

In [None]:
import re
def evaluate_response(predicted_json_str, ground_truth_answers):
    try:
        # Strip markdown-style triple backticks if present
        match = re.search(r"```(?:json)?\s*(\[\s*{.*?}\s*])\s*```", predicted_json_str, re.DOTALL)
        cleaned = match.group(1) if match else predicted_json_str.strip()

        parsed = json.loads(cleaned)

        # Handle the special case if embedded in an action dict
        if isinstance(parsed, dict) and isinstance(parsed.get("action_input"), str):
            predictions = json.loads(parsed["action_input"])
        elif isinstance(parsed, list):
            predictions = parsed
        else:
            predictions = []
    except Exception as e:
        print("❌ Failed to parse prediction:", e)
        return 0.0, 0

    correct = 0
    for pred, gt in zip(predictions, ground_truth_answers):
        if pred.get("answer", "").strip().upper() == gt.strip().upper():
            correct += 1

    total = len(ground_truth_answers)
    return (correct / total if total > 0 else 0.0), correct


In [None]:
for idx, _ in enumerate(data):
    if idx==1:
        break
    if data[idx]["image_expected"] is False:
        import re

        content = data[idx]["messages"][0]["content"]

        option = content.split("\nData Files:")[-1].split("\n\n\n")[0].strip()

        option = re.sub(r'(\.\./)?input/[\w\-/\.]+', '{{file1}}', option)

        option = "Data Files:\n" + option

        suffix = f"""Answer the questions based on the data file, notebook and user request using the available tools.
            Question:
            {data[idx]['questions']}

            Answer Format:
            ```json
            [
                {{
                    "answer": "LETTER OF THE ANSWER",
                    "explanation": "EXPLANATION"
                }}
            ]
            ```
            """
        inp = option + "\n\n" + suffix
        match = re.search(r'(\.\./)?input/([\w\-/\.]+)', content)
        if match:
            relative_path = match.group(2)
            print(relative_path)
            file_url = f"https://aixplain-platform-assets.s3.us-east-1.amazonaws.com/samples/code_kaggle_source/input/{relative_path}"
            content_dict = {"file1": file_url}
        else:
            content_dict = {}
            print(f"⚠️ No file found in sample {idx}, skipping file upload.")
        response = agent.run(query=inp, max_iterations=10, content=content_dict)
        acc, correct = evaluate_response(response.data.output, data[idx]["gt_answers"])
