# 分析task_id 397的内容

In [32]:
from datasets import load_dataset
import duckdb
from datasets import load_dataset
import re
import os
import json
from datetime import datetime
import pandas as pd

con = duckdb.connect(':memory:')
dataset = load_dataset("WebShepherd/WebPRMCollection_preference_pair")
df = dataset['test'].to_pandas()
con.register('webprm', df)

<duckdb.duckdb.DuckDBPyConnection at 0x72b5d01000f0>

任务397的44个步骤都有对应的action记录
可以安全地分析每个步骤的决策过程
适合进行go_back操作的合法性分析
数据结构支持深入的任务流程研究

In [33]:
task_exists = con.execute("""
    SELECT COUNT(*) as count 
    FROM webprm 
    WHERE task_id = '397'
    """).fetchone()[0]

print(task_exists)


action_history = con.execute("""
    SELECT action_history
    FROM webprm
    WHERE task_id = '397'
    """).fetchone()[0]

print(action_history)


assert task_exists == len(action_history)

44
["click('391')", "fill('413', 'Fortnite')", "press('413', 'Enter')", "click('412')", "click('491')", "click('500')", 'scroll(0, 700)', 'scroll(0, 300)', "select_option('840', 'PC')", "select_option('853', 'Lowest Score')", "click('860')", 'scroll(0, -300)', "click('1489')", "click('418')", 'scroll(0, 500)', "send_msg_to_user('The first user, JtCole, has an average rating of 1.0/10 on GameSpot, with only one review submitted. The review, posted on January 16, 2020, is for Fortnite (PC) and gives the game a 1-star rating. In the review, JtCole harshly criticizes the game, calling it the “crappiest game on the planet.” They complain about the FPS performance, claim the game is “fake and set up,” and suggest that intentional mechanics frustrate players. The review also mentions software issues requiring redownloading. 1 out of 1 user found this review helpful.')", 'go_back()', 'go_back()', "click('558')", "click('425')", 'scroll(0, 500)', 'scroll(0, 500)', 'send_msg_to_user(\'Second use

分析chosen决策

In [34]:
chosen_sample = con.execute("""
    SELECT chosen, typeof(chosen) as chosen_type
    FROM webprm 
    """).fetchone()

chosen = chosen_sample[0]

print("Thought: ", chosen['thought'])
print("Action: ", chosen['action'])

Thought:  The user is currently viewing the Chicago Bulls tickets page, which lists upcoming games. To find the game scheduled for the next weekend, the user needs to filter the events by date. The 'All Dates' dropdown is visible and seems to offer filtering options, including 'This Weekend.' Selecting this option will refine the list to show only the relevant games for the next weekend, helping the user identify the opponent team easily. The dropdown is clearly marked and is the logical next step in achieving the user's goal.
Action:  click('4383')


In [35]:

# 核心分析：任务397的Thoughts和Action关系分析
print(f"\n3. 任务397的Thoughts和Action关系分析:")
    
    
# 基础信息
task_info = con.execute("""
    SELECT COUNT(DISTINCT step_id) as step_count, 
           ANY_VALUE(intent) as intent, 
           ANY_VALUE(website_name) as website_name,
           ANY_VALUE(current_url) as current_url
    FROM webprm 
    WHERE task_id = '397'
    GROUP BY task_id
    LIMIT 5
""").fetchall()
    
if task_info:
    step_count, intent, website, current_url = task_info[0]
    print(f"   任务397: {website} - {step_count} 个步骤")
    print(f"   目标: {intent}")
    print(f"   当前URL: {current_url}")
    
    # 获取每个step的详细信息
    step_details = con.execute("""
        SELECT 
            step_id,
            intent,
            website_name,
            current_url,
            thought_history,
            action_history,
            chosen,
            rejected
        FROM webprm 
        WHERE task_id = '397'
        ORDER BY CAST(step_id AS INTEGER)
    """).fetchall()


3. 任务397的Thoughts和Action关系分析:
   任务397: gamespot - 44 个步骤
   目标: Summarize the review history of the top 5 Fortnite PC players with the lowest scores, including the number and types of reviews they have left. Use Gamespot website.
   当前URL: https://www.gamespot.com/


In [36]:
# 提取公共数据
task_397_actions = con.execute("""
    SELECT action_history
    FROM webprm
    WHERE task_id = '397'
    """).fetchone()[0]

task_397_thoughts = con.execute("""
    SELECT thought_history
    FROM webprm
    WHERE task_id = '397'
    """).fetchone()[0]

print(f"任务397包含 {len(task_397_actions)} 个actions和 {len(task_397_thoughts)} 个thoughts")

for step_idx in range(len(task_397_actions)):
    step_num = step_idx + 1
    action = task_397_actions[step_idx]
    thought = task_397_thoughts[step_idx]
    
    print(f"\n步骤 {step_num}:")
    print(f"  Action: {action}")
    print(f"  Thought: {thought}")

任务397包含 44 个actions和 44 个thoughts

步骤 1:
  Action: click('391')
  Thought: The current webpage displays various game-related articles and navigation links, but does not directly address the goal of reviewing Fortnite PC players. To progress towards the user's goal, interaction with the site search functionality is necessary to locate relevant Fortnite reviews and player information. The search bar at the top of the webpage is a logical target for this action, as it allows for inputting specific queries related to Fortnite PC players and their reviews. Activating the search bar will enable the user to input search terms and retrieve the required information efficiently.

步骤 2:
  Action: fill('413', 'Fortnite')
  Thought: The previous action successfully opened the search bar, and the current webpage shows the search input field ready for interaction. To achieve the user's goal of summarizing reviews for the top 5 Fortnite PC players with the lowest scores, the next step involves enterin

检查chosen和rejected的关系

In [37]:
# 查看任务397的前10条记录
first_10_records = con.execute("""
    SELECT step_id, website_name, intent, 
           chosen, rejected, current_url, text_observation
    FROM webprm 
    WHERE task_id = '397'
    ORDER BY CAST(step_id AS INTEGER)
    LIMIT 10
""").fetchall()

print("📋 任务397前10条记录分析:")
print("=" * 80)

for i, record in enumerate(first_10_records):
    step_id, website, intent, chosen, rejected, current_url, text_obs = record
    
    print(f"\n🔍 步骤 {step_id}:")
    print(f"  网站: {website}")
    print(f"  意图: {intent[:80]}...")
    print(f"  URL: {current_url}")
    
    # 分析chosen
    print(f"\n  ✅ Chosen决策:")
    if chosen:
        chosen_type = type(chosen).__name__
        print(f"    类型: {chosen_type}")
        if isinstance(chosen, dict):
            for key, value in chosen.items():
                if isinstance(value, str) and len(value) > 100:
                    print(f"    {key}: {value[:100]}...")
                else:
                    print(f"    {key}: {value}")
        else:
            chosen_str = str(chosen)
            if len(chosen_str) > 100:
                print(f"    内容: {chosen_str[:100]}...")
            else:
                print(f"    内容: {chosen_str}")
    else:
        print("    无chosen数据")
    
    # 分析rejected
    print(f"\n  ❌ Rejected决策:")
    if rejected and len(rejected) > 0:
        print(f"    数量: {len(rejected)} 个")
        for j, reject in enumerate(rejected):
            print(f"    Rejected {j+1}:")
            if isinstance(reject, dict):
                for key, value in reject.items():
                    if isinstance(value, str) and len(value) > 80:
                        print(f"      {key}: {value[:80]}...")
                    else:
                        print(f"      {key}: {value}")
            else:
                reject_str = str(reject)
                if len(reject_str) > 80:
                    print(f"      内容: {reject_str[:80]}...")
                else:
                    print(f"      内容: {reject_str}")
    else:
        print("    无rejected数据")
    
    print("-" * 80)

📋 任务397前10条记录分析:

🔍 步骤 0:
  网站: gamespot
  意图: Summarize the review history of the top 5 Fortnite PC players with the lowest sc...
  URL: https://www.gamespot.com/

  ✅ Chosen决策:
    类型: dict
    thought: The current webpage displays various game-related articles and navigation links, but does not direct...
    action: click('391')

  ❌ Rejected决策:
    数量: 2 个
    Rejected 1:
      thought: The current webpage is filled with various articles and links related to gaming ...
      action: click('121')
    Rejected 2:
      thought: The current webpage is the main page of GameSpot and does not provide direct acc...
      action: click('186')
--------------------------------------------------------------------------------

🔍 步骤 1:
  网站: gamespot
  意图: Summarize the review history of the top 5 Fortnite PC players with the lowest sc...
  URL: https://www.gamespot.com/

  ✅ Chosen决策:
    类型: dict
    thought: The previous action successfully opened the search bar, and the current webpage show

In [38]:
# 打印任务397的checklist内容（只打印一次，因为每步都一样）
checklist_content = con.execute("""
    SELECT checklist, checklist_target_list
    FROM webprm 
    WHERE task_id = '397'
    LIMIT 1
""").fetchone()

if checklist_content:
    checklist, checklist_target = checklist_content
    
    print("📋 任务397的Checklist内容:")
    print("=" * 80)
    print(f"📝 Checklist:")
    print(f"{checklist}")
    print(f"\n🎯 Checklist Target List:")
    print(f"{checklist_target}")
    print("=" * 80)
    
    print(f"\n📊 Checklist Target List 随步骤变化:")
    print("-" * 60)
    
    # 查看checklist_target_list的变化
    target_changes = con.execute("""
        SELECT step_id, checklist_target_list
        FROM webprm 
        WHERE task_id = '397'
        ORDER BY CAST(step_id AS INTEGER)
    """).fetchall()
    
    for step_id, target_list in target_changes:
        print(f"步骤 {step_id}: {target_list}")

📋 任务397的Checklist内容:
📝 Checklist:
Checklist 1: Search for Fortnite Reviews
- Goal: Access the Fortnite page on GameSpot to locate relevant reviews.

Checklist 2: Filter and Sort Reviews
- Goal: Filter reviews by the PC platform and sort them by the lowest score.

Checklist 3: Access and Analyze User Profiles
- Goal: Access the profiles of the top 5 players with the lowest scores to analyze their review history.

Checklist 4: Summarize Review Histories
- Goal: Compile and summarize the review history of each of the top 5 players, focusing on their opinions on Fortnite.

🎯 Checklist Target List:
['In Progress', 'No', 'No', 'No']

📊 Checklist Target List 随步骤变化:
------------------------------------------------------------
步骤 0: ['In Progress', 'No', 'No', 'No']
步骤 1: ['In Progress', 'No', 'No', 'No']
步骤 2: ['In Progress', 'No', 'No', 'No']
步骤 3: ['Yes', 'No', 'No', 'No']
步骤 4: ['Yes', 'In Progress', 'No', 'No']
步骤 5: ['Yes', 'In Progress', 'No', 'No']
步骤 6: ['Yes', 'In Progress', 'No', 'No

In [None]:
con.close()