In [1]:
# %pip install -q langfuse wikipedia openai google-search-results pandas openai-whisper ffmpeg-python

In [None]:
# Step 1: Load Configuration and Dependencies

import json

# Load sensitive config from config.json
with open('config.json', 'r') as f:
    config = json.load(f)

# Set notebook variables (these should match what you set in your notebook)
temperature = 0.2
verbose = True
use_langfuse = True
# model_name = "o4-mini-2025-04-16"
model_name = "gpt-4.1-2025-04-14"
username = "Shreyashgupta5"
code_link = "https://huggingface.co/spaces/Shreyashgupta5/ai_agents_course"
api_base_url = "https://agents-course-unit4-scoring.hf.space"

# Print to verify
print("Config loaded. Sensitive keys available for use.")
print("Notebook variables set:")
print(f"  model_name: {model_name}")
print(f"  temperature: {temperature}")
print(f"  verbose: {verbose}")
print(f"  use_langfuse: {use_langfuse}")
print(f"  username: {username}")
print(f"  code_link: {code_link}")
print(f"  api_base_url: {api_base_url}")

In [3]:
# Step 2: Configure Langfuse Decorator-Based Client

from langfuse.decorators import langfuse_context

langfuse_context.configure(
    secret_key=config["langfuse_secret"],
    public_key=config["langfuse_public_key"],
    host=config["host"]
)

In [None]:
# Step 3: Load Questions and Download Required Files

import json
import os
import requests

# Set how many questions you want to process
NUM_QUESTIONS_TO_RUN = 20  # <--- Change this number as needed

# Load all questions from all-json/all_questions.json
with open('all-json/all_questions.json', 'r') as f:
    all_questions = json.load(f)

# Only process up to NUM_QUESTIONS_TO_RUN questions
questions = all_questions[:NUM_QUESTIONS_TO_RUN]

# Helper: Download file if it's not already present (supports images, Excel, CSV, etc.)
def download_file_if_needed(q, api_base_url):
    file_name = q.get("file_name")
    if file_name:
        file_url = f"{api_base_url}/files/{q['task_id']}"
        file_path = os.path.join("files-for-agent", file_name)
        if not os.path.exists(file_path):
            r = requests.get(file_url)
            if r.status_code == 200:
                with open(file_path, "wb") as f:
                    f.write(r.content)
                print(f"Downloaded file for task {q['task_id']}: {file_path}")
            else:
                print(f"Failed to download file for task {q['task_id']}: {r.status_code}")

# Download files for relevant questions (images, Excel, CSV, etc.)
for q in questions:
    download_file_if_needed(q, api_base_url)

# Print out each question's task_id and question text for verification
for q in questions:
    print(f"Task ID: {q['task_id']}")
    print(f"Question: {q['question']}")
    if q.get("file_name"):
        print(f"File: files-for-agent/{q['file_name']}")
    print("-" * 40)

In [5]:
# Step 4: Define Tools

import os
import requests
import wikipedia
import pandas as pd
from langfuse.decorators import observe
import base64
import mimetypes

# Wikipedia Search Tool
@observe()
def wikipedia_search(query, sentences=2):
    try:
        summary = wikipedia.summary(query, sentences=sentences, auto_suggest=True, redirect=True)
        return summary
    except wikipedia.DisambiguationError as e:
        return f"Disambiguation error. Options: {e.options[:5]}"
    except wikipedia.PageError:
        return "No Wikipedia page found for the query."
    except Exception as e:
        return f"Error: {str(e)}"

# SerpAPI Web Search Tool
@observe()
def serpapi_search(query):
    api_key = config.get("SERPAPI_API_KEY") or os.environ.get("SERPAPI_API_KEY")
    if not api_key:
        return "No SerpAPI key provided."
    params = {
        "q": query,
        "api_key": api_key,
        "engine": "google",
        "num": 3
    }
    response = requests.get("https://serpapi.com/search", params=params)
    if response.status_code == 200:
        data = response.json()
        if "answer_box" in data and "answer" in data["answer_box"]:
            return data["answer_box"]["answer"]
        elif "organic_results" in data and len(data["organic_results"]) > 0:
            return data["organic_results"][0].get("snippet", "No snippet found.")
        else:
            return "No relevant results found."
    else:
        return f"SerpAPI error: {response.status_code} {response.text}"

@observe()
def parse_excel_csv(file_name):
    """
    Reads an Excel or CSV file and extracts schema, sample data, and summary statistics according to what the question asks for.
    """
    import os
    file_path = os.path.join("files-for-agent", file_name)
    try:
        if file_path.lower().endswith('.csv'):
            df = pd.read_csv(file_path)
        else:
            df = pd.read_excel(file_path)
        schema = df.columns.tolist()
        dtypes = df.dtypes.astype(str).to_dict()
        sample = df.head(3).to_dict(orient="records")
        stats = df.describe(include='all').to_dict()
        return {
            "schema": schema,
            "dtypes": dtypes,
            "sample": sample,
            "stats": stats
        }
    except Exception as e:
        return f"Error parsing file {file_path}: {str(e)}"

@observe()
def analyze_image_with_vision(file_name, question, model_name, temperature):
    """
    Uses a vision-capable model to analyze an image and answer a question about it.
    """
    import openai
    import os
    file_path = os.path.join("files-for-agent", file_name)
    api_key = config.get("openai_api_key")
    client = openai.OpenAI(api_key=api_key)
    # Detect MIME type
    mime_type, _ = mimetypes.guess_type(file_path)
    if mime_type is None:
        mime_type = "image/png"  # fallback
    with open(file_path, "rb") as img_file:
        image_bytes = img_file.read()
    base64_image = base64.b64encode(image_bytes).decode("utf-8")
    data_url = f"data:{mime_type};base64,{base64_image}"
    response = client.chat.completions.create(
        model=model_name,
        messages=[
            {"role": "user", "content": [
                {"type": "text", "text": question},
                {"type": "image_url", "image_url": {"url": data_url}}
            ]}
        ],
        temperature=temperature
    )
    return response.choices[0].message.content

# Whisper Audio Transcription Tool
@observe()
def transcribe_audio_with_whisper(file_name, model_size="base"):
    """
    Transcribes an audio file (e.g., .mp3) using OpenAI Whisper.
    Returns the transcribed text.
    """
    import whisper
    file_path = os.path.join("files-for-agent", file_name)
    try:
        model = whisper.load_model(model_size)
        result = model.transcribe(file_path)
        return result["text"]
    except Exception as e:
        return f"Error transcribing audio {file_path}: {str(e)}"

In [6]:
# Step 5: Agent Planning Step (OpenAI v1.x+)

import openai
from langfuse.decorators import observe  # (if not already imported)

# Set your OpenAI API key from config
client = openai.OpenAI(api_key=config["openai_api_key"])

@observe(as_type="generation")
def get_agent_plan(question, model_name, temperature, file_name=None):
    """
    Sends the question to the model and asks for a plan and tool list.
    The prompt now describes the Excel/CSV tool.
    """
    prompt = (
        "You are an AI agent. Here is a question you need to answer:\n"
        f"Question: {question}\n\n"
        "You have access to the following tools:\n"
        "- Wikipedia Search: For factual and encyclopedic information.\n"
        "- SerpAPI Web Search: For general web search (Google, Bing, etc.).\n"
        "- Excel/CSV File Parser: For reading Excel or CSV files attached to the question.\n"
        "- Image Analysis (Vision): For analyzing images attached to the question. This tool sends the image and the question to a vision-capable model and returns the model's answer.\n\n"
        "- Audio Transcription(Whisper): For transcribing audio files (eg: .mp3) to text that are attached to the question. This tool sends the audio file to a transcription model and returns the transcribed text from OpenAI Whisper.\n\n"
        "If a file is attached and it is an image (e.g., .png, .jpg, .jpeg), use the Image Analysis tool to analyze the image and answer the question.\n"
        "Create a step-by-step plan to answer this question. For each step, specify which tool you would use and why. "
        "Be explicit about your reasoning for tool selection."
    )
    if file_name:
        prompt += f"\n\nA file is attached: {file_name}"
    response = client.chat.completions.create(
        model=model_name,
        messages=[{"role": "user", "content": prompt}],
        temperature=temperature
    )
    plan = response.choices[0].message.content
    return plan

In [7]:
# Step 6: Tool Execution Step

from langfuse.decorators import observe
import re

def execute_tools(plan, question, file_name=None, temperature=temperature):
    tool_outputs = {}
    plan_lower = plan.lower()

    def tool_in_plan(tool_name):
        return tool_name in plan_lower
    
    # Audio file transcription
    if file_name and file_name.lower().endswith(('.mp3', '.wav', '.m4a', '.flac', '.ogg')):
        # Always transcribe audio if present
        audio_result = transcribe_audio_with_whisper(file_name)
        tool_outputs['audio_transcription'] = audio_result

    # Image file analysis
    if file_name and file_name.lower().endswith(('.png', '.jpg', '.jpeg')):
        if tool_in_plan("image analysis") or tool_in_plan("vision") or tool_in_plan("analyze image"):
            image_result = analyze_image_with_vision(file_name, question, model_name, temperature)
            tool_outputs['image_analysis'] = image_result
        else:
            # Optionally, always analyze if image is present
            image_result = analyze_image_with_vision(file_name, question, model_name, temperature)
            tool_outputs['image_analysis'] = image_result

    # Excel/CSV file parsing (existing)
    if file_name and file_name.lower().endswith(('.xlsx', '.xls', '.csv')):
        excel_result = parse_excel_csv(file_name)
        tool_outputs['excel_csv'] = excel_result

    # Wikipedia
    if tool_in_plan("wikipedia"):
        wiki_result = wikipedia_search(question)
        tool_outputs['wikipedia'] = wiki_result
    else:
        wiki_result = None

    # SerpAPI Web Search
    if tool_in_plan("serpapi web") or tool_in_plan("web search") or tool_in_plan("serpapi search") or tool_in_plan("google search"):
        serp_result = serpapi_search(question)
        tool_outputs['serpapi'] = serp_result

    # Fallback: If Wikipedia was run and is insufficient, and SerpAPI web search wasn't already run, run it
    fallback_needed = (
        wiki_result is not None and (
            "no wikipedia page found" in wiki_result.lower() or
            "disambiguation error" in wiki_result.lower() or
            "error:" in wiki_result.lower() or
            len(wiki_result) < 50
        ) and 'serpapi' not in tool_outputs
    )
    if fallback_needed:
        serp_result = serpapi_search(question)
        tool_outputs['serpapi'] = serp_result
        print("Fallback: Used SerpAPI web search due to insufficient Wikipedia result.")

    print("Tools used for this question:", list(tool_outputs.keys()))
    return tool_outputs

In [8]:
# Step 7: Synthesis Step (OpenAI v1.x+)

from langfuse.decorators import observe  # (if not already imported)

@observe(as_type="generation")
def synthesize_final_answer(task_id, question, tool_outputs, gaia_doc, model_name, temperature):
    """
    Uses the model to synthesize a final answer in GAIA format.
    The prompt is enhanced to force careful question analysis, step-by-step reasoning, and self-critique.
    """
    prompt = (
        f"You are an AI agent participating in the GAIA benchmark.\n"
        f"Here is the official GAIA documentation for answer formatting:\n\n"
        f"{gaia_doc}\n\n"
        f"Here is the original question:\n{question}\n\n"
        f"Here are the outputs from the tools you used:\n{tool_outputs}\n\n"
        "Your task is to generate the most accurate and complete answer possible, strictly following the question's requirements and the GAIA format.\n\n"
        "Follow these steps:\n"
        "1. Carefully re-read and break down the question. List all requirements, constraints, and any special instructions (e.g., only list ingredients, exclude measurements, alphabetize, etc.).\n"
        "2. Summarize the relevant information from the tool outputs that directly addresses the question.\n"
        "3. Step-by-step, reason through how to answer the question, making sure to address every requirement and constraint you listed in step 1.\n"
        "4. Critique your draft answer: Does it fully and precisely answer the question? Does it include anything extra or miss anything required? If so, revise it.\n"
        "5. Only after this, output the final answer in the required GAIA JSON format. Only output the JSON object, nothing else.\n"
        "Show your work for steps 1-4 as comments (using lines starting with #), then output the JSON answer."
    )
    response = client.chat.completions.create(
        model=model_name,
        messages=[{"role": "user", "content": prompt}],
        temperature=temperature
    )
    final_answer_json = response.choices[0].message.content
    return final_answer_json

In [None]:
# Step 8: Main Agent Loop with Langfuse Traceability

from langfuse.decorators import observe, langfuse_context

@observe()
def process_all_questions(questions, model_name, temperature, gaia_doc):
    final_answers = []
    for q in questions:
        print(f"Processing Task ID: {q['task_id']}")
        plan = get_agent_plan(q['question'], model_name, temperature, file_name=q.get("file_name"))
        tool_outputs = execute_tools(plan, q['question'], file_name=q.get("file_name"), temperature=temperature)
        # Use the correct task_id from the question
        final_answer_json = synthesize_final_answer(
            task_id=q['task_id'],
            question=q['question'],
            tool_outputs=tool_outputs,
            gaia_doc=gaia_doc,
            model_name=model_name,
            temperature=temperature
        )
        final_answers.append(final_answer_json)
        # Print trace URL for traceability
        print("Langfuse Trace URL:", langfuse_context.get_current_trace_url())
    return final_answers

# Load GAIA documentation from file
with open("documentation/GIAI-documentation.md", "r") as f:
    gaia_doc = f.read()

# Process all questions:
final_answers = process_all_questions(questions, model_name, temperature, gaia_doc)

In [None]:
# Step 9: Save Results (with cleaning and validation)

import json
import re

def extract_json_from_response(answer_str):
    """
    Extracts the first JSON object found in the string.
    """
    # Remove code block markers if present
    answer_str = answer_str.strip()
    answer_str = re.sub(r"^```[a-zA-Z]*", "", answer_str)
    answer_str = re.sub(r"```$", "", answer_str).strip()
    # Find the first JSON object in the string
    match = re.search(r"\{[\s\S]*\}", answer_str)
    if match:
        return match.group(0)
    else:
        raise ValueError(f"No JSON object found in answer:\n{answer_str}")

def clean_and_validate_answer(answer_str, required_fields=("task_id", "submitted_answer"), correct_task_id=None):
    try:
        json_str = extract_json_from_response(answer_str)
        answer_obj = json.loads(json_str)
    except Exception as e:
        raise ValueError(f"Invalid JSON: {e}\nRaw output: {answer_str}")
    if correct_task_id is not None:
        answer_obj["task_id"] = correct_task_id
    for field in required_fields:
        if field not in answer_obj:
            raise ValueError(f"Missing required field '{field}' in answer: {answer_obj}")
    return answer_obj

def save_final_answers(final_answers, questions, filename="all-json/final_answers.jsonl"):
    cleaned_answers = []
    for i, answer in enumerate(final_answers):
        correct_task_id = questions[i]["task_id"]
        if isinstance(answer, str):
            try:
                answer_obj = clean_and_validate_answer(answer, correct_task_id=correct_task_id)
            except Exception as e:
                print(f"Error in answer {i}: {e}")
                continue
        else:
            answer_obj = answer
            answer_obj["task_id"] = correct_task_id
        cleaned_answers.append(answer_obj)
    with open(filename, "w") as f:
        for answer_obj in cleaned_answers:
            f.write(json.dumps(answer_obj, ensure_ascii=False) + "\n")
    print(f"Saved {len(cleaned_answers)} answers to {filename}")
    return cleaned_answers

# Example usage:
final_answers_cleaned = save_final_answers(final_answers, questions)

In [None]:
# Step 10: Validate Answers (Check with GAIA API)

import requests

def validate_answers(final_answers, username, code_link, api_base_url, agent_code=None):
    """
    Submits answers to the GAIA evaluation endpoint for validation.
    Prints the score and which answers were correct.
    """
    url = f"{api_base_url}/submit"
    if agent_code is None:
        # Try to read your notebook as code, or use code_link as fallback
        try:
            with open("agent.ipynb", "r") as f:
                agent_code = f.read()
        except Exception:
            agent_code = code_link  # fallback
    payload = {
        "username": username,
        "code_link": code_link,
        "agent_code": agent_code,
        "answers": final_answers  # <-- Use the cleaned answers here!
    }
    response = requests.post(url, json=payload)
    if response.status_code == 200:
        result = response.json()
        print("Submission successful!")
        print(f"Score: {result.get('score', 'N/A')}%")
        if "results" in result:
            print("\nDetailed Results:")
            for r in result["results"]:
                status = "✅" if r.get("correct") else "❌"
                print(f"{status} Task ID: {r['task_id']} | Your Answer: {r['submitted_answer']} | Correct: {r.get('correct_answer', 'N/A')}")
        else:
            print("No detailed results returned.")
        return result
    else:
        print("Submission failed:", response.status_code, response.text)
        return None

# Example usage:
validation_result = validate_answers(final_answers_cleaned, username, code_link, api_base_url)

In [None]:
# Step 11: (Optional) Save Validation Results

def save_validation_results(validation_result, filename="all-json/validation_results.json"):
    if validation_result is not None:
        with open(filename, "w") as f:
            json.dump(validation_result, f, indent=2, ensure_ascii=False)
        print(f"Validation results saved to {filename}")

# Example usage:
save_validation_results(validation_result)

In [13]:
# Step 12: Save Validation Results (with cleaning and validation)

# --- Langfuse flush at the end of the notebook ---
from langfuse.decorators import langfuse_context  # (if not already imported)
langfuse_context.flush()