In [1]:
from datasets import load_dataset

ds = load_dataset("corbyrosset/researchy_questions")

  from .autonotebook import tqdm as notebook_tqdm
Generating train split: 100%|██████████| 90000/90000 [00:00<00:00, 164335.15 examples/s]
Generating test split: 100%|██████████| 6448/6448 [00:00<00:00, 154333.12 examples/s]


In [2]:
ds

DatasetDict({
    train: Dataset({
        features: ['id', 'question', 'intrinsic_scores', 'DocStream', 'gpt4_decomposition', 'decompositional_score', 'nonfactoid_score'],
        num_rows: 90000
    })
    test: Dataset({
        features: ['id', 'question', 'intrinsic_scores', 'DocStream', 'gpt4_decomposition', 'decompositional_score', 'nonfactoid_score'],
        num_rows: 6448
    })
})

In [2]:
import random
from datasets import load_dataset
from ReAct.react_agent_llama import ReAct_agent, REACT_SYSTEM_PROMPT
from Multi_Agent.mindsearch_llama import MindSearch, PLANNER_SYSTEM_PROMPT
from dotenv import load_dotenv

load_dotenv()


ds = load_dataset("corbyrosset/researchy_questions")


questions = random.sample(ds["train"]["question"], 10)

for question in questions:
    ms = MindSearch(llm="llama 3.3 70B",
                    system_prompt=PLANNER_SYSTEM_PROMPT, max_turns=9)

    try:
        print(ms.run(question))
    except Exception as e:
        error_message = str(e)
        print(f"  發生錯誤: {error_message}")

    react = ReAct_agent(llm="llama 3.3 70B",
                        system_prompt=REACT_SYSTEM_PROMPT, max_turns=9)
    try:
        print(react.run(question))
    except Exception as e:
        error_message = str(e)
        print(f"  發生錯誤: {error_message}")

People read books for a multitude of reasons, reflecting the diverse and complex nature of human experience. At its core, reading books serves as a powerful means of entertainment, allowing individuals to escape the confines of their reality and delve into new worlds, experiences, and perspectives. This escapism not only provides relaxation and stress relief but also inspires personal growth, educates, and helps develop empathy and emotional intelligence. The educational benefits of books are profound, contributing to improved cognitive development, enhanced vocabulary and language skills, and better communication abilities. They foster critical thinking, imagination, and creativity, while serving as a rich source of knowledge on various subjects and playing a pivotal role in developing problem-solving abilities and understanding different perspectives. Moreover, reading books is a catalyst for personal growth and development, expanding one's knowledge and understanding, enhancing cogn

In [7]:
import random

questions = random.sample(ds["test"]["question"], 500)

for question in questions:
    print(question)

is criminology good for law school
how does perseverance help overcome obstacles
why was the civil war important
why are used car prices going up
are older cars more expensive to insure
was the holocaust a genocide
how did chinese isolation affect trade for china and for the world?
how does using sociological imagination help sociologists in their research?
why do we need to save our environment
what was life like on the oregon trail
what other technology so dramatically changed society faster than the internet?
is boho cultural appropriation
how computers changed all future digital communications and how the process began
how does child abuse affect the world
is project management a good recuritment niche
how did people prepare for typhoon haiyan
how the physical environment affect learning
why are hospitals overwhelmed
how does cost-benefit analysis help make economic decisions?
what is the use of technology and information systems to improvement market share
what other areas could p

In [2]:
from datasets import load_dataset
import random
import json

# 設定資料集名稱和題目欄位
dataset_name = "corbyrosset/researchy_questions"
split_name = "test"  # 從測試集抽取題目
question_field = "question"
num_samples = 500
seed = 35356  # 設定隨機種子以確保可重複性

# 載入資料集
try:
    ds = load_dataset(dataset_name)
except Exception as e:
    print(f"載入資料集 '{dataset_name}' 失敗: {e}")
    exit()

# 檢查指定的 split 是否存在
if split_name not in ds:
    print(f"資料集中找不到 '{split_name}' 這個 split。可用的 splits 為: {list(ds.keys())}")
    exit()

# 取得所有題目列表
all_questions = ds[split_name][question_field]

# 設定隨機種子
random.seed(seed)

# 隨機抽取指定數量的題目
if len(all_questions) < num_samples:
    print(f"資料集 '{split_name}' 中的題目數量 ({len(all_questions)}) 少於需要抽取的數量 ({num_samples})。")
    exit()

sampled_indices = random.sample(range(len(all_questions)), num_samples)
sampled_questions = [all_questions[i] for i in sampled_indices]

# 將抽取的題目儲存到 JSON 檔案
output_file = 'test_dataset_500.json'
try:
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(sampled_questions, f, ensure_ascii=False, indent=4)
    print(f"已從資料集 '{dataset_name}' 的 '{split_name}' split 中隨機抽取 {num_samples} 道題目並儲存至 {output_file}")
except Exception as e:
    print(f"儲存抽取的題目到 '{output_file}' 失敗: {e}")

# 抽取 10 個示範問題
num_demo_samples = 10
random.seed(seed) # 重新設定種子以確保 demo 和完整實驗的隨機性一致
if len(all_questions) < num_demo_samples:
    print(f"資料集 '{split_name}' 中的題目數量 ({len(all_questions)}) 少於需要抽取的示範題目數量 ({num_demo_samples})。")
else:
    demo_indices = random.sample(range(len(all_questions)), num_demo_samples)
    demo_questions = [all_questions[i] for i in demo_indices]
    demo_output_file = 'test_dataset_10.json'
    try:
        with open(demo_output_file, 'w', encoding='utf-8') as f:
            json.dump(demo_questions, f, ensure_ascii=False, indent=4)
        print(f"已從資料集 '{dataset_name}' 的 '{split_name}' split 中隨機抽取 {num_demo_samples} 道示範題目並儲存至 {demo_output_file}")
    except Exception as e:
        print(f"儲存示範題目到 '{demo_output_file}' 失敗: {e}")

已從資料集 'corbyrosset/researchy_questions' 的 'test' split 中隨機抽取 500 道題目並儲存至 test_dataset_500.json
已從資料集 'corbyrosset/researchy_questions' 的 'test' split 中隨機抽取 10 道示範題目並儲存至 test_dataset_10.json
