In [2]:
from datasets import load_dataset

# Path to your GAIA loader script
loader_path = "./GAIA.py"

# Levels to load
levels = ["2023_level1", "2023_level2", "2023_level3"]

# Dictionary to store one example from each
examples = {}

for level in levels:
    dataset = load_dataset(loader_path, name=level, split="validation")
    examples[level] = dataset[0]  # Get the first item

# Print the examples
for level, example in examples.items():
    print(f"\n--- Example from {level} ---")
    for k, v in example.items():
        print(f"{k}: {v}")



--- Example from 2023_level1 ---
task_id: e1fc63a2-da7a-432f-be78-7c4a95598703
Question: If Eliud Kipchoge could maintain his record-making marathon pace indefinitely, how many thousand hours would it take him to run the distance between the Earth and the Moon its closest approach? Please use the minimum perigee value on the Wikipedia page for the Moon when carrying out your calculation. Round your result to the nearest 1000 hours and do not use any comma separators if necessary.
Level: 1
Final answer: 17
file_name: 
file_path: 
Annotator Metadata: {'Steps': '1. Googled Eliud Kipchoge marathon pace to find 4min 37sec/mile\n2. Converted into fractions of hours.\n3. Found moon periapsis in miles (225,623 miles).\n4. Multiplied the two to find the number of hours and rounded to the nearest 100 hours.', 'Number of steps': '4', 'How long did this take?': '20 Minutes', 'Tools': '1. A web browser.\n2. A search engine.\n3. A calculator.', 'Number of tools': '3'}

--- Example from 2023_level2 

In [5]:
# We already have the loader_path and levels defined
# Let's update to fetch 10 examples from each level
num_examples = 10
examples_list = {}

for level in levels:
    dataset = load_dataset(loader_path, name=level, split="validation")
    examples_list[level] = []
    
    # Get up to num_examples or as many as available
    max_items = min(num_examples, len(dataset))
    for i in range(max_items):
        examples_list[level].append(dataset[i])

# Print the examples
for level, examples in examples_list.items():
    print(f"\n=== {level}: {len(examples)} Examples ===")
    for i, example in enumerate(examples):
        print(f"\n--- Example {i+1} ---")
        for k, v in example.items():
            print(f"{k}: {v}")



=== 2023_level1: 10 Examples ===

--- Example 1 ---
task_id: e1fc63a2-da7a-432f-be78-7c4a95598703
Question: If Eliud Kipchoge could maintain his record-making marathon pace indefinitely, how many thousand hours would it take him to run the distance between the Earth and the Moon its closest approach? Please use the minimum perigee value on the Wikipedia page for the Moon when carrying out your calculation. Round your result to the nearest 1000 hours and do not use any comma separators if necessary.
Level: 1
Final answer: 17
file_name: 
file_path: 
Annotator Metadata: {'Steps': '1. Googled Eliud Kipchoge marathon pace to find 4min 37sec/mile\n2. Converted into fractions of hours.\n3. Found moon periapsis in miles (225,623 miles).\n4. Multiplied the two to find the number of hours and rounded to the nearest 100 hours.', 'Number of steps': '4', 'How long did this take?': '20 Minutes', 'Tools': '1. A web browser.\n2. A search engine.\n3. A calculator.', 'Number of tools': '3'}

--- Exampl

In [2]:
from datasets import load_dataset

loader_path = "./GAIA.py"
levels = ["2023_level1", "2023_level2", "2023_level3"]
splits = ["train", "validation", "test"]

for level in levels:
    print(f"\n### {level} ###")
    for split in splits:
        try:
            dataset = load_dataset(loader_path, name=level, split=split)
            print(f"\nSplit: {split}")
            print(f"Total Questions: {len(dataset)}")

            # Collect keys and check for missing values
            all_keys = set()
            missing_counts = {}

            for item in dataset:
                all_keys.update(item.keys())
                for key in all_keys:
                    if key not in item or item[key] in [None, "", [], {}]:
                        missing_counts[key] = missing_counts.get(key, 0) + 1

            print("Unique Keys:", sorted(all_keys))
            if missing_counts:
                print("Missing values found in:")
                for key, count in missing_counts.items():
                    print(f"  - {key}: {count}")
            else:
                print("No missing values.")

        except Exception as e:
            print(f"Error loading {level} {split}: {e}")



### 2023_level1 ###
Error loading 2023_level1 train: Unknown split "train". Should be one of ['test', 'validation'].

Split: validation
Total Questions: 53
Unique Keys: ['Annotator Metadata', 'Final answer', 'Level', 'Question', 'file_name', 'file_path', 'task_id']
Missing values found in:
  - file_name: 42
  - file_path: 42

Split: test
Total Questions: 93
Unique Keys: ['Annotator Metadata', 'Final answer', 'Level', 'Question', 'file_name', 'file_path', 'task_id']
Missing values found in:
  - file_name: 68
  - file_path: 68

### 2023_level2 ###
Error loading 2023_level2 train: Unknown split "train". Should be one of ['test', 'validation'].

Split: validation
Total Questions: 86
Unique Keys: ['Annotator Metadata', 'Final answer', 'Level', 'Question', 'file_name', 'file_path', 'task_id']
Missing values found in:
  - file_name: 66
  - file_path: 66

Split: test
Total Questions: 159
Unique Keys: ['Annotator Metadata', 'Final answer', 'Level', 'Question', 'file_name', 'file_path', 'task_i

In [3]:
from datasets import load_dataset

loader_path = "./GAIA.py"
levels = ["2023_level1", "2023_level2", "2023_level3"]
splits = ["train", "validation", "test"]

total = 0

for level in levels:
    for split in splits:
        try:
            dataset = load_dataset(loader_path, name=level, split=split)
            count = len(dataset)
            print(f"{level} | {split} -> {count}")
            total += count
        except Exception as e:
            print(f"Error loading {level} {split}: {e}")

print(f"\n🔥 Total number of questions across all levels and splits: {total}")


Error loading 2023_level1 train: Unknown split "train". Should be one of ['test', 'validation'].
2023_level1 | validation -> 53
2023_level1 | test -> 93
Error loading 2023_level2 train: Unknown split "train". Should be one of ['test', 'validation'].
2023_level2 | validation -> 86
2023_level2 | test -> 159
Error loading 2023_level3 train: Unknown split "train". Should be one of ['test', 'validation'].
2023_level3 | validation -> 26
2023_level3 | test -> 49

🔥 Total number of questions across all levels and splits: 466


In [None]:
import requests

def search_wikipedia(query):
    # Step 1: Search for relevant pages
    search_url = "https://en.wikipedia.org/w/api.php"
    params = {
        "action": "query",
        "list": "search",
        "srsearch": query,
        "format": "json"
    }
    response = requests.get(search_url, params=params).json()
    
    search_results = response.get("query", {}).get("search", [])
    if not search_results:
        return "No results found."
    
    # Step 2: Get the title of the first result
    top_title = search_results[0]["title"]
    
    # Step 3: Fetch the summary of the top result
    summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{top_title.replace(' ', '_')}"
    summary_response = requests.get(summary_url).json()
    return summary_response.get("extract", "No summary available.")

def agent(user_input):
    return search_wikipedia(user_input)

# Example usage
while True:
    user_input = input("Ask: ")
    print(agent(user_input))


Nandan may refer to:
No results found.
No results found.
Wikipedia is a free online encyclopedia, written and maintained by a community of volunteers, known as Wikipedians, through open collaboration and the wiki software MediaWiki. Founded by Jimmy Wales and Larry Sanger on January 15, 2001, Wikipedia has been hosted since 2003 by the Wikimedia Foundation, an American nonprofit organization funded mainly by donations from readers. Wikipedia is the largest and most-read reference work in history.
