In [1]:
from datasets import load_from_disk

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 저장한 경로에서 데이터셋 로드
ds = load_from_disk("./biggen_bench_dataset")

# 데이터셋 확인
print(ds)


DatasetDict({
    test: Dataset({
        features: ['id', 'capability', 'task', 'instance_idx', 'system_prompt', 'input', 'reference_answer', 'score_rubric'],
        num_rows: 765
    })
})


In [3]:
# 테스트 데이터 접근
test_data = ds["test"]
print(test_data[0])  # 첫 번째 샘플 확인

{'id': 'planning_travel_plan_0', 'capability': 'planning', 'task': 'travel_plan', 'instance_idx': 0, 'system_prompt': 'You are a travel agent that can design travel plans.', 'input': 'Design a travel plan for a tourist traveling to the given destination. The tourist has a list of requirements and you should design your plan such that it satisfies all of these requirements.\n\nDestination: Paris\n\nRequirements:\n- Total Duration: 2 days and 1 night\n- Transportation: Walk\n- Must Have: Eiffel Tower, Louvre Museum, Escargot\n- Optional: Croissant, Onion Soup, Notre Dame Cathedral', 'reference_answer': 'Day 1 - Morning:\n- Visit the Louvre Museum (3 hours)\n- Walk to Café de Flore (15 minutes)\nDay 1 - Lunch:\n- Café de Flore - Enjoy croissants and French cuisine (1 hour)\nDay 1 - Afternoon:\n- Walk to Notre Dame Cathedral (20 minutes)\n- Explore Notre Dame (1.5 hours)\n- Walk to Eiffel Tower (1 hour)\nDay 1 - Evening:\n- Visit Eiffel Tower (2 hours)\n- Walk to Le Petit Cler (15 minutes)

In [4]:
# pandas로 변환
test_df = ds["test"].to_pandas()

In [5]:
# 데이터 확인
print(test_df.head())

                       id capability         task  instance_idx  \
0  planning_travel_plan_0   planning  travel_plan             0   
1  planning_travel_plan_1   planning  travel_plan             1   
2  planning_travel_plan_2   planning  travel_plan             2   
3  planning_travel_plan_3   planning  travel_plan             3   
4  planning_travel_plan_4   planning  travel_plan             4   

                                       system_prompt  \
0  You are a travel agent that can design travel ...   
1  You are a travel agent that can design travel ...   
2  You are a travel agent that can design travel ...   
3  You are a travel agent that can design travel ...   
4  You are a travel agent that can design travel ...   

                                               input  \
0  Design a travel plan for a tourist traveling t...   
1  Design a travel plan for a tourist traveling t...   
2  Design a travel plan for a tourist traveling t...   
3  Craft a detailed travel itinerary

In [7]:
# 데이터셋의 특정 필드만 확인
field_names = ds["test"].column_names
print(f"사용 가능한 필드: {field_names}")

사용 가능한 필드: ['id', 'capability', 'task', 'instance_idx', 'system_prompt', 'input', 'reference_answer', 'score_rubric']


In [8]:
unique_capabilities = test_df['capability'].unique()
print(unique_capabilities)

['planning' 'theory_of_mind' 'instruction_following' 'multilingual'
 'reasoning' 'tool_usage' 'grounding' 'refinement' 'safety']


In [None]:
# CSV로 내보내기
ds["test"].to_pandas().to_csv("./biggen_bench_dataset/processed/biggen_bench_datase.csv", encoding="utf-8", index=False)


In [9]:
import json

# JSON으로 내보내기
with open("./biggen_bench_dataset/processed/biggen_bench_dataset.json", "w", encoding="utf-8") as f:
    # ds["test"][:]를 pandas DataFrame으로 변환 후 records 형식으로 변환
    json_data = ds["test"].to_pandas().to_dict(orient="records")
    json.dump(json_data, f, ensure_ascii=False, indent=2)

In [None]:
json_data[:5]  # 첫 5개 샘플 확인

In [14]:
# "capability"가 "refinement"인 항목만 필터링
refinement_data = [item for item in json_data if item["capability"] == "refinement"]

# 필터링된 데이터를 새로운 JSON 파일로 저장
with open("./biggen_bench_dataset/processed/biggen_bench_refinement.json", "w", encoding="utf-8") as f:
    json.dump(refinement_data, f, ensure_ascii=False, indent=2)

In [None]:
# "capability"가 "safety"인 항목만 필터링
safety_data = [item for item in json_data if item["capability"] == "safety"]

# 필터링된 데이터를 새로운 JSON 파일로 저장
with open("./biggen_bench_dataset/processed/biggen_bench_safety.json", "w", encoding="utf-8") as f:
    json.dump(safety_data, f, ensure_ascii=False, indent=2)

In [10]:
# "capability"가 "theory_of_mind"인 항목만 필터링
ToM_data = [item for item in json_data if item["capability"] == "theory_of_mind"]

# 필터링된 데이터를 새로운 JSON 파일로 저장
with open("./biggen_bench_dataset/processed/biggen_bench_ToM.json", "w", encoding="utf-8") as f:
    json.dump(ToM_data, f, ensure_ascii=False, indent=2)

In [11]:
# "capability"가 "instruction_following"인 항목만 필터링
instruction_data = [item for item in json_data if item["capability"] == "instruction_following"]

# # 필터링된 데이터를 새로운 JSON 파일로 저장
# with open("./biggen_bench_dataset/processed/biggen_bench_instruction.json", "w", encoding="utf-8") as f:
#     json.dump(instruction_data, f, ensure_ascii=False, indent=2)

In [12]:
# "instance_idx"가 0인 instruction_following 항목만 필터링
instruction_idx0_data = [item for item in instruction_data if item["instance_idx"] == 0]

# 필터링된 데이터를 새로운 JSON 파일로 저장
with open("./biggen_bench_dataset/processed/biggen_bench_instruction_idx0.json", "w", encoding="utf-8") as f:
    json.dump(instruction_idx0_data, f, ensure_ascii=False, indent=2)


In [None]:
# Extract "system_prompt" values from the list of dictionaries
system_prompts = [item["system_prompt"] for item in ToM_data if "system_prompt" in item]
# Get unique values
unique_system_ToM = list(set(system_prompts))
num_unique_system_ToM = len(unique_system_ToM)
print(unique_system_ToM)
print(f"Number of unique values in unique_system_ToM: {num_unique_system_ToM}")

# 