In [116]:
import json
import re
from typing import Dict, List, Optional, Any

def parse_sense_title(sense_title: str) -> Dict[str, Optional[str]]:
    """
    解析sense_title，提取括号前的主标题和括号内的内容
    
    Args:
        sense_title: 例如 "well (HEALTHY)" 或 "well"
        
    Returns:
        dict: {"main_title": "well", "parenthetical": "HEALTHY"}
    """
    match = re.match(r'^([^(]+)(?:\s*\(([^)]+)\))?', sense_title)
    if match:
        return {
            "main_title": match.group(1).strip(),
            "parenthetical": match.group(2).strip() if match.group(2) else None
        }
    return {
        "main_title": sense_title.strip(),
        "parenthetical": None
    }

def convert_word_data_basic(word_item: Dict[str, Any]) -> Dict[str, Any]:
    """
    转换单个词条数据
    
    Args:
        word_item: 原始词条数据
        
    Returns:
        dict: 转换后的词条数据
    """
    # 保留一级字段
    word_title = word_item.get("word")
    word_pos = word_item.get("part_of_speech")
    word_level = word_item.get("level")
    word_guideword = word_item.get("guideword", "").strip()
    result = {
        "word": word_title,
        "guideword": word_guideword,
        "level": word_level,
        "part_of_speech": word_pos,
        "topic": word_item.get("topic"),
        "definition": None
    }
   
    # 如果definitions为空或不存在，直接返回
    definitions = word_item.get("definitions", [])

    # if not definitions:
    #     return result
    # result["definitions_len"] = len(definitions)
    # 遍历definitions数组
    candidate_match = []
    for def_group in definitions:
        pos_header = def_group.get("pos_header", {})

        # 如果position不匹配，跳过
        if word_pos != pos_header.get("position"):
            continue
        # 暂存headword
       
        headword = pos_header.get("headword", "").strip()
        headword_pos = pos_header.get("position", "").strip() if pos_header else None
        headword_phonetic = pos_header.get("written", "").strip() if pos_header else None
        pos_items_len = len(def_group.get("pos_items", []))

        # 遍历pos_items
        pos_items = def_group.get("pos_items", [])
        for pos_item in pos_items:
            # 解析sense_title
            sense_title = pos_item.get("sense_title", "")
            parsed_title = parse_sense_title(sense_title)
            
            # 进行三项匹配
            # 1. sense_title括号前部分对比word
            word_match = (parsed_title["main_title"].lower() == 
                         word_title.lower())

            level_match = pos_item.get("level") == word_item.get("level")
            # print(f"word_pos: {word_pos}, headword_pos: {headword_pos}")

            pos_match = (word_pos.lower() == headword_pos.lower())

            

            if word_match and level_match and pos_match:
                if parsed_title["parenthetical"] and word_guideword and parsed_title["parenthetical"].upper() == word_guideword.upper():
                    result["guideword_match"] = 1
                    result["headword"] = headword
                    result["headword_pos"] = headword_pos
                    result["headword_phonetic"] = headword_phonetic
                    result["definition"] = pos_item.get("definition")
                    result["dictionary_examples"] = " | ".join(pos_item.get("dictionary_examples", []))
                    result["learner_examples"] = " | ".join(pos_item.get("learner_examples", []))
                    result["match_type"] = "exact"
                    result["candidate_match"] = []
                    # 直接返回，因为找到了完全匹配的结果
                    return result
                else:
                    result["guideword_match"] = 0
                    candidate_match.append({
                    "headword": headword,
                    "headword_pos": headword_pos,
                    "headword_phonetic": headword_phonetic,
                    "definition": pos_item.get("definition"),
                    "dictionary_examples": " | ".join(pos_item.get("dictionary_examples", [])),
                    "learner_examples": " | ".join(pos_item.get("learner_examples", []))
                })
            else:
                continue
    result["candidate_match"] = candidate_match
    if not result["candidate_match"]:
        result["match_type"] = "error"
    elif result["guideword_match"] == 0 and len(candidate_match) == 1:
        result["headword"] = candidate_match[0]["headword"]
        result["headword_pos"] = candidate_match[0]["headword_pos"]
        result["headword_phonetic"] = candidate_match[0]["headword_phonetic"]
        result["definition"] = candidate_match[0]["definition"]
        result["dictionary_examples"] = candidate_match[0]["dictionary_examples"]
        result["learner_examples"] = candidate_match[0]["learner_examples"]
        result["match_type"] = "partial"
    else:
        # 不匹配
        result["match_type"] = "none"

    return result

def convert_word_data_phrase(word_item: Dict[str, Any]) -> Dict[str, Any]:
    """
    转换单个词条数据
    
    Args:
        word_item: 原始词条数据
        
    Returns:
        dict: 转换后的词条数据
    """
    # 保留一级字段
    word_title = word_item.get("word").strip()
    word_pos = word_item.get("part_of_speech")
    word_level = word_item.get("level")
    word_guideword = word_item.get("guideword", "").strip()
    result = {
        "word": word_title,
        "guideword": word_guideword,
        "level": word_level,
        "part_of_speech": word_pos,
        "topic": word_item.get("topic"),
        "definition": None
    }
   
    # 如果definitions为空或不存在，直接返回
    definitions = word_item.get("definitions", [])

    # if not definitions:
    #     return result
    # result["definitions_len"] = len(definitions)
    # 遍历definitions数组
    candidate_match = []
    if word_title == "at least":
        print("processing at least")
    for def_group in definitions:
        pos_header = def_group.get("pos_header", {})

        # 如果position不匹配，跳过
        # if word_pos != pos_header.get("position"):
        #     continue
        # 暂存headword
       
        headword = pos_header.get("headword", "").strip()
        
        headword_pos = pos_header.get("position", "") if pos_header else None
        headword_phonetic = pos_header.get("written", "").strip() if pos_header else None
        pos_items_len = len(def_group.get("pos_items", []))

        # 遍历pos_items
        pos_items = def_group.get("pos_items", [])
        for pos_item in pos_items:
            # 解析sense_title
            sense_title = pos_item.get("sense_title", "")
            parsed_title = parse_sense_title(sense_title)
            
            # 进行三项匹配
            # 1. sense_title括号前部分对比word
            word_match = (parsed_title["main_title"].lower() == 
                         word_title.lower())

            level_match = pos_item.get("level") == word_item.get("level")
            # print(f"word_pos: {word_pos}, headword_pos: {headword_pos}")

            # pos_match = (word_pos.lower() == headword_pos.lower())

            if word_title == "at least":
                print(f"word_level: {word_item.get('level')}")
                print(f"pos_level: {pos_item.get('level')}")

            if word_match and level_match:
                if parsed_title["parenthetical"] and word_guideword and parsed_title["parenthetical"].upper() == word_guideword.upper():
                    result["guideword_match"] = 1
                    result["headword"] = headword
                    result["headword_pos"] = headword_pos
                    result["headword_phonetic"] = headword_phonetic
                    result["definition"] = pos_item.get("definition")
                    result["dictionary_examples"] = " | ".join(pos_item.get("dictionary_examples", []))
                    result["learner_examples"] = " | ".join(pos_item.get("learner_examples", []))
                    result["match_type"] = "exact"
                    result["candidate_match"] = []
                    if word_title == "at least":
                        print(f"Found exact match for '{word_title}'")
                    # 直接返回，因为找到了完全匹配的结果
                    return result
                else:
                    if word_title == "at least":
                        print(f"Found partial match for '{word_title}'")
                    candidate_match.append({
                    "headword": headword,
                    "headword_pos": headword_pos,
                    "headword_phonetic": headword_phonetic,
                    "definition": pos_item.get("definition"),
                    "dictionary_examples": " | ".join(pos_item.get("dictionary_examples", [])),
                    "learner_examples": " | ".join(pos_item.get("learner_examples", []))
                    })
            else:
                continue
    result["candidate_match"] = candidate_match
    if not result["candidate_match"]:
        result["match_type"] = "error"
    elif len(candidate_match) == 1:
        result["headword"] = candidate_match[0]["headword"]
        result["headword_pos"] = candidate_match[0]["headword_pos"]
        result["headword_phonetic"] = candidate_match[0]["headword_phonetic"]
        result["definition"] = candidate_match[0]["definition"]
        result["dictionary_examples"] = candidate_match[0]["dictionary_examples"]
        result["learner_examples"] = candidate_match[0]["learner_examples"]
        result["match_type"] = "partial"
    else:
        # 不匹配
        result["match_type"] = "none"

    return result

def convert_phrase_list(phrase_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """
    批量转换短语列表

    Args:
        phrase_list: 原始短语列表

    Returns:
        list: 转换后的短语列表
    """
    return [convert_word_data_phrase(phrase_item) for phrase_item in phrase_list]

def convert_word_list(word_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """
    批量转换词条列表
    
    Args:
        word_list: 原始词条列表
        
    Returns:
        list: 转换后的词条列表
    """
    return [convert_word_data_basic(word_item) for word_item in word_list]

def load_and_convert_json(input_file: str, output_file: str = None) -> List[Dict[str, Any]]:
    """
    从JSON文件加载数据并转换
    
    Args:
        input_file: 输入JSON文件路径
        output_file: 输出JSON文件路径（可选）
        
    Returns:
        list: 转换后的数据列表
    """
    try:
        with open(input_file, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        # 如果数据是单个词条，转换为列表
        if isinstance(data, dict):
            data = [data]
        
        # 转换数据
        converted_data = convert_word_list(data)
        
        # 如果指定了输出文件，保存结果
        if output_file:
            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(converted_data, f, ensure_ascii=False, indent=2)
            print(f"转换完成，结果已保存到 {output_file}")
        
        return converted_data
        
    except FileNotFoundError:
        print(f"文件 {input_file} 不存在")
        return []
    except json.JSONDecodeError as e:
        print(f"JSON格式错误: {e}")
        return []
    except Exception as e:
        print(f"处理过程中发生错误: {e}")
        return []

# 测试用例
def test_conversion():
    """测试转换功能"""
    test_data = {
        "word": "well",
        "guideword": "HEALTHY",
        "level": "A1",
        "part_of_speech": "adjective",
        "topic": "body and health",
        "details": "/british-english/words/detail/6606",
        "definitions": [
            {
                "pos_header": {
                    "headword": "well",
                    "position": "adverb",
                    "audio": "http://www.englishprofile.org/evp/audio/UKWELDE004.mp3",
                    "written": "/wel/"
                },
                "pos_items": [
                    {
                        "sense_title": "well (IN A GOOD WAY)",
                        "level": "A1",
                        "definition": "in a successful or satisfactory way",
                        "dictionary_examples": [
                            "I thought the team played well.",
                            "He's doing very well at school."
                        ]
                    }
                ]
            },
            {
                "pos_header": {
                    "headword": "well",
                    "position": "adjective",
                    "audio": "http://www.englishprofile.org/evp/audio/UKWELDE004.mp3",
                    "written": "/wel/"
                },
                "pos_items": [
                    {
                        "sense_title": "well (HEALTHY)",
                        "level": "A1",
                        "definition": "healthy; not ill",
                        "dictionary_examples": [
                            "You look well! - Thanks, I feel well.",
                            "He hasn't been too well lately."
                        ]
                    }
                ]
            }
        ]
    }
    
    print("原始数据:")
    print(json.dumps(test_data, indent=2, ensure_ascii=False))
    
    print("\n转换后数据:")
    converted = convert_word_data_basic(test_data)
    print(json.dumps(converted, indent=2, ensure_ascii=False))

# if __name__ == "__main__":
#     # 运行测试
#     test_conversion()
    
    # 使用示例：
    # 1. 转换单个JSON文件
    # converted_data = load_and_convert_json("input.json", "output.json")
    
    # 2. 只转换不保存文件
    # converted_data = load_and_convert_json("input.json")
    
    # 3. 直接使用转换函数
    # with open("your_data.json", "r", encoding="utf-8") as f:
    #     data = json.load(f)
    # converted = convert_word_list(data)

In [127]:
import os

# Read in the adjective.json file from pos_data directory

# Define the file path
file_path = os.path.join('pos_data', 'empty.json')

try:
  # Load the JSON file
  with open(file_path, 'r', encoding='utf-8') as f:
    loaded_data = json.load(f)

  # Print basic information about the data
  if isinstance(loaded_data, list):
    print(f"Loaded {len(loaded_data)}  entries")
    print(f"Sample entry: {json.dumps(loaded_data[0], indent=2, ensure_ascii=False)[:500]}...")
  elif isinstance(loaded_data, dict):
    print(f"Loaded  dictionary with {len(loaded_data.keys())} keys")
    print(f"Keys: {list(loaded_data.keys())}")
  else:
    print(f"Loaded data of type: {type(loaded_data)}")
    
except FileNotFoundError:
  print(f"Error: File '{file_path}' not found. Please check the path.")
except json.JSONDecodeError as e:
  print(f"Error: Invalid JSON format in '{file_path}'. Details: {e}")
except Exception as e:
  print(f"Error while reading the file: {e}")

Loaded 111  entries
Sample entry: {
  "word": "cattle",
  "guideword": "",
  "level": "B1",
  "part_of_speech": "",
  "topic": "animals",
  "details": "/british-english/words/detail/817",
  "definitions": [
    {
      "pos_header": {
        "headword": "cattle",
        "position": "noun",
        "audio": "http://www.englishprofile.org/evp/audio/UKCATSU005.mp3",
        "written": "/ˈkæt.l ̩/"
      },
      "pos_items": [
        {
          "sense_title": "cattle",
          "level": "B1",
          "definition": "male and ...


In [128]:
parsed_data = convert_word_list(loaded_data)


In [129]:
# Analyze the parsed_data to count guideword_match values and collect errors
total_count = len(parsed_data)
match_counts = {1: 0, 0: 0, -1: 0}
exact_match_data = []
partial_match_data = []
none_match_data = []
error_data = []
# Count each type of match_type and collect error items
for item in parsed_data:
  match_type = item.get('match_type')


  # Collect items with match_type 0 or -1
  if match_type == "exact":
    exact_match_data.append(item)
  elif match_type == "partial":
    partial_match_data.append(item)
  elif match_type == "none":
    none_match_data.append(item)
  elif match_type == "error":
    error_data.append(item)

print(f"\nNumber of entries with match_type 'exact': {len(exact_match_data)}")
print(f"Number of entries with match_type 'partial': {len(partial_match_data)}")
print(f"Number of entries with match_type 'none': {len(none_match_data)}")
print(f"Number of entries with match_type 'error': {len(error_data)}")
print(f"Total number of entries: {total_count}")


Number of entries with match_type 'exact': 0
Number of entries with match_type 'partial': 0
Number of entries with match_type 'none': 0
Number of entries with match_type 'error': 111
Total number of entries: 111


In [124]:

print(json.dumps(partial_match_data, ensure_ascii=False, indent=2))

[
  {
    "word": "have",
    "guideword": "",
    "level": "A2",
    "part_of_speech": "auxiliary verb",
    "topic": "",
    "definition": "used with the past participle of another verb to form the present and past perfect tenses",
    "guideword_match": 0,
    "candidate_match": [
      {
        "headword": "have",
        "headword_pos": "auxiliary verb",
        "headword_phonetic": "/hæv/",
        "definition": "used with the past participle of another verb to form the present and past perfect tenses",
        "dictionary_examples": " Have you seen Roz?  |  I've passed my test.  |  He hasn't visited London yet.  |  I'd met his wife before.  |  It would have been better to tell the truth.  |  He's been working in France for two years now. ",
        "learner_examples": "I haven't seen you for ages. (Key English Test; A2; Russian)"
      }
    ],
    "headword": "have",
    "headword_pos": "auxiliary verb",
    "headword_phonetic": "/hæv/",
    "dictionary_examples": " Have you s

In [125]:
# Save the parsed_data to a JSON file
output_file = 'auxiliary_verb_parsed.json'

with open(output_file, 'w', encoding='utf-8') as f:
  json.dump(parsed_data, f, ensure_ascii=False, indent=2)

print(f"Parsed data saved to {output_file}")

Parsed data saved to auxiliary_verb_parsed.json


In [32]:
print(json.dumps(partial_match_data[15:40], ensure_ascii=False, indent=2))

[
  {
    "word": "amused",
    "guideword": "FUNNY",
    "level": "B2",
    "part_of_speech": "adjective",
    "topic": "people: personality",
    "definition": "showing that you think something is funny",
    "guideword_match": 0,
    "candidate_match": [
      {
        "headword": "amused",
        "headword_pos": "adjective",
        "headword_phonetic": "/əˈmjuːzd/",
        "definition": "showing that you think something is funny",
        "dictionary_examples": "an amused smile | She was very amused by/at your comments.",
        "learner_examples": "When the taxi arrived, he was so amused about the news tha[t] an old woman took it. (First Certificate in English; B2; Portuguese)"
      }
    ],
    "headword": "amused",
    "headword_pos": "adjective",
    "headword_phonetic": "/əˈmjuːzd/",
    "dictionary_examples": "an amused smile | She was very amused by/at your comments.",
    "learner_examples": "When the taxi arrived, he was so amused about the news tha[t] an old woman t

In [16]:
print(json.dumps(partial_match_data[:15], ensure_ascii=False, indent=2))

[
  {
    "word": "boiling",
    "guideword": "",
    "level": "B2",
    "part_of_speech": "adjective",
    "topic": "natural world",
    "definition": "very hot",
    "guideword_match": 0,
    "headword": "boiling",
    "headword_pos": "adjective",
    "headword_phonetic": "/ˈbɔɪ.lɪŋ/",
    "dictionary_examples": "It's boiling in this room! | It's boiling hot outside.",
    "learner_examples": "I wouldn't like a job like that, especially in the summer, when it's boiling hot. (First Certificate in English; B2; Greek)",
    "match_type": "partial"
  },
  {
    "word": "adjacent",
    "guideword": "",
    "level": "C2",
    "part_of_speech": "adjective",
    "topic": "describing things",
    "definition": "If two things are adjacent, they are next to each other.",
    "guideword_match": 0,
    "headword": "adjacent",
    "headword_pos": "adjective",
    "headword_phonetic": "/əˈdʒeɪ.sənt/",
    "dictionary_examples": " The fire started in an adjacent building. |  They live in a house adj

In [14]:
print(json.dumps(good_error_data[:20], indent=2))

[
  {
    "word": "boiling",
    "guideword": "",
    "level": "B2",
    "part_of_speech": "adjective",
    "topic": "natural world",
    "definition": "very hot",
    "guideword_match": 0,
    "headword": "boiling",
    "headword_pos": "adjective",
    "headword_phonetic": "/\u02c8b\u0254\u026a.l\u026a\u014b/",
    "dictionary_examples": "It's boiling in this room! | It's boiling hot outside.",
    "learner_examples": "I wouldn't like a job like that, especially in the summer, when it's boiling hot. (First Certificate in English; B2; Greek)"
  },
  {
    "word": "adjacent",
    "guideword": "",
    "level": "C2",
    "part_of_speech": "adjective",
    "topic": "describing things",
    "definition": "If two things are adjacent, they are next to each other.",
    "guideword_match": 0,
    "headword": "adjacent",
    "headword_pos": "adjective",
    "headword_phonetic": "/\u0259\u02c8d\u0292e\u026a.s\u0259nt/",
    "dictionary_examples": " The fire started in an adjacent building. |  The

In [2]:
import json
# Read in the evp_british_english_list and filter phrasal verbs
evp_file_path = 'evp_british_english_list.json'

try:
  with open(evp_file_path, 'r', encoding='utf-8') as f:
    evp_data = json.load(f)
  
  # Filter entries with part_of_speech = "phrasal verb"
  pv_list = [item for item in evp_data if item.get('part_of_speech') == 'phrasal verb']
  
  print(f"Total entries in evp_british_english_list: {len(evp_data)}")
  print(f"Phrasal verb entries found: {len(pv_list)}")
  
  if pv_list:
    print(f"\nSample phrasal verb entry:")
    print(json.dumps(pv_list[0], indent=2, ensure_ascii=False))

except FileNotFoundError:
  print(f"Error: File '{evp_file_path}' not found.")
except json.JSONDecodeError as e:
  print(f"Error: Invalid JSON format. Details: {e}")
except Exception as e:
  print(f"Error while processing the file: {e}")

Total entries in evp_british_english_list: 15696
Phrasal verb entries found: 728

Sample phrasal verb entry:
{
  "word": "back up sth or back sth up",
  "guideword": "",
  "level": "C2",
  "part_of_speech": "phrasal verb",
  "topic": "",
  "details": "/british-english/words/detail/414"
}


In [1]:
import json
# Read the phrasal verb list from the saved JSON file
with open("evp_british_english_pv_list.json", 'r', encoding='utf-8') as f:
  pv_list = json.load(f)

print(f"Loaded {len(pv_list)} phrasal verbs from evp_british_english_pv_list.json")

Loaded 728 phrasal verbs from evp_british_english_pv_list.json


In [5]:
import requests
from bs4 import BeautifulSoup
from random import randint
from collections import defaultdict
import re
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# url = "https://englishprofile.org/?menu=evp-online&refid=ID_00006872"

# 使用Service类指定ChromeDriver的路径
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)
# driver.get(url)
# 等待页面加载并选择第5个下拉框
# dropdowns = WebDriverWait(driver, 10).until(
#     EC.presence_of_all_elements_located((By.CSS_SELECTOR, "select.bubble-element.Dropdown.dropdown-chevron"))
# )

# pos_select = Select(dropdowns[1])
# pos_select.select_by_visible_text("phrasal verb")

# 第5个下拉框（索引4）
# select = Select(dropdowns[4])
# select.select_by_visible_text("All")

# print("第5个下拉框已设置为All")
time.sleep(5)


In [19]:
# 更精确的解析函数 - 基于样式和结构
def parse_dictionary_entry_advanced(html_content):
    """
    基于HTML结构和样式的高级解析
    """
    soup = BeautifulSoup(html_content, 'html.parser')
    entry = {}
    
    # 查找主容器
    main_container = soup.find('div', class_='bubble-element group-item')
    if not main_container:
        return entry
    
    # 解析顶部信息行
    header_row = main_container.find('div', class_='bubble-element Group baTaJaYaR')
    if header_row:
        # 短语
        phrase_elem = header_row.find('div', style=lambda x: x and 'font-weight: bold' in x)
        if phrase_elem:
            entry['phrase'] = phrase_elem.text.strip()
        
        # 等级标签 (背景色的div)
        level_elem = header_row.find('div', style=lambda x: x and 'background: none rgb' in x)
        if level_elem:
            entry['level'] = level_elem.text.strip()
            # 根据背景色判断等级类型
            style = level_elem.get('style', '')
            if 'rgb(0, 128, 64)' in style:  # 绿色
                entry['level_type'] = 'intermediate'
            elif 'rgb(160, 48, 160)' in style:  # 紫色
                entry['level_type'] = 'advanced'
        
        # 定义
        definition_elem = header_row.find('div', style=lambda x: x and 'margin: 0px 0px 0px 10px' in x and 'font-weight: 400' in x)
        if definition_elem:
            entry['definition'] = definition_elem.text.strip()
    
    # 解析例句部分
    examples_container = main_container.find('div', class_='bubble-element Group baTaJaYv')
    if examples_container:
        # 字典例句
        dict_example_elem = examples_container.find('div', text='Dictionary examples:')
        if dict_example_elem:
            dict_example = dict_example_elem.find_next_sibling('div')
            if dict_example:
                entry['dictionary_example'] = dict_example.text.strip()
        
        # 学习者例句
        learner_example_elem = examples_container.find('div', text='Learner example:')
        if learner_example_elem:
            learner_example = learner_example_elem.find_next_sibling('div')
            if learner_example:
                entry['learner_example'] = learner_example.text.strip()
    
    return entry

In [31]:
for item in pv_list[:2]:
  url = item["details"]
  driver.get(url)
# 查找并点击"Show full view"按钮
  time.sleep(5)
  try:
      show_full_button = driver.find_element(By.CSS_SELECTOR, "button.clickable-element.bubble-element.Button.baTaJaZf")
      if show_full_button.text.strip() == "Show full view":
          print(f"Found 'Show full view' button, clicking...")
          show_full_button.click()
          time.sleep(3)
          # 再次检查按钮文本确认切换成功
          if show_full_button.text.strip() == "Show summary view":
              print("Successfully switched to full view")
          else:
              print(f"Button text after click: '{show_full_button.text.strip()}'")
      else:
          print(f"Button text is not 'Show full view', current text: '{show_full_button.text.strip()}'")
  except Exception as e:
      print(f"Error finding or clicking button: {e}")
  
  # 解析页面内容
  soup = BeautifulSoup(driver.page_source, "html.parser")
  # 提取短语动词的定义
# 找到class为bubble-element RepeatingGroup baTaJaXd bubble-rg的div，应该只有一个
  rg_div = soup.find("div", class_="bubble-element RepeatingGroup baTaJaXd bubble-rg")
  if rg_div:
    print(f"Found RepeatingGroup div for {item['word']}")
    # 在rg_div中寻找一级div子元素，class以bubble-element group-item bubble-r-container flex column开头
    group_items = rg_div.find_all("div", class_=lambda x: x and x.startswith("bubble-element group-item bubble-r-container flex column"), recursive=False)
    print(f"Found {len(group_items)} group items")
    for group_item in group_items:
        sense_pos_text = ""
        sense_headword_text = ""
        sense_phonetic_text = ""
        # 在group_item中寻找sense_title
        group_item_header = group_item.find("div", class_="bubble-element Group baTaJaXj bubble-r-container flex row")
        group_item_content = group_item.find("div", class_="bubble-element RepeatingGroup baTaJaYaH bubble-rg")
        if group_item_header:
            sense_headword = group_item_header.find("h4",class_="bubble-element Text baTaJaXp bubble-r-vertical-center")
            sense_pos = group_item_header.find("h6", class_="bubble-element Text baTaJaXv bubble-r-vertical-center")
            sense_phonetic = group_item_header.find("div", class_="bubble-element Text baTaJaYaB bubble-r-vertical-center")
            if sense_headword:
                sense_headword_text = sense_headword.get_text(strip=True)
                print(f"Found headword: {sense_headword_text[:-2]}")
            else:
                print("Sense headword not found")
            if sense_pos:
                sense_pos_text = sense_pos.get_text(strip=True)
                print(f"Found position: {sense_pos_text}")
            else:
                print("Sense position not found")
            if sense_phonetic:
                sense_phonetic_text = sense_phonetic.get_text(strip=True)
                print(f"Found headword phonetic: {sense_phonetic_text}")
            else:
                print("Sense phonetic not found")
            print(f"Found group item header")
            # 在这里可以进一步处理group_item_header中的内容
        else:
            print(f"Group item header not found in group item")
        if group_item_content:
            # 在这里可以进一步处理group_item_content中的内容
            content_group_items = group_item_content.find_all("div", class_=lambda x: x and x.startswith("bubble-element group-item bubble-r-container flex column entry-"), recursive=False)
            print(f"Found {len(content_group_items)} content group items")
            print(f"Found group item content")
            for content_group_item in content_group_items:
                # 在这里可以进一步处理content_group_item中的内容
                parsed_item = {}
                level_def = content_group_item.find('div', class_='bubble-element Group baTaJaYaR bubble-r-container flex row')
                examples = content_group_item.find('div', class_='bubble-element Group baTaJaYv bubble-r-container flex column')
                if level_def:
                    word_ = level_def.find('div', class_='bubble-element Group baTaKaAr bubble-r-container flex row')
                    if word_:
                        word = word_.find('div', class_='bubble-element Text baTaKaAl bubble-r-vertical-center')
                        if word:
                            parsed_item['word'] = word.get_text(strip=True)
                        guideword = word_.find('div', class_='bubble-element Text baTaKaBo bubble-r-vertical-center')
                        if guideword:
                            parsed_item['guideword'] = guideword.get_text(strip=True)
                        else:
                            parsed_item['guideword'] = None
                        usage = word_.find('div', class_='bubble-element Text baTaKaBaW bubble-r-vertical-center')
                        if usage:
                            parsed_item['usage'] = usage.get_text(strip=True)
                        else:
                            parsed_item['usage'] = None
                    level = level_def.find('div', class_='bubble-element Text baTaJaYp bubble-r-vertical-center')
                    if level:
                        parsed_item['level'] = level.get_text(strip=True)
                    definition = level_def.find('div', class_='bubble-element Text baTaJaYaT bubble-r-vertical-center')
                    if definition:
                        parsed_item['definition'] = definition.get_text(strip=True)
                if examples:
                    dict_examples = examples.find('div', class_='bubble-element Text baTaJaZaT bubble-r-vertical-center')
                    if dict_examples:
                        # parsed_item['dictionary_examples'] = dict_examples.get_text(strip=True)
                        parsed_item['dictionary_examples'] = dict_examples.get_text(strip=True).split('\n')
                    learner_examples = examples.findAll('div', class_='bubble-element Text baTaJaZaZ bubble-r-vertical-center')
                    if learner_examples:
                        parsed_item['learner_examples'] = [elem.get_text(strip=True) for elem in learner_examples]
                print(json.dumps(parsed_item, ensure_ascii=False))
        else:
            print(f"Group item content not found")
  
  else:
      print(f"RepeatingGroup div not found for {item['word']}")
  #   definition = soup.select_one("div.definition")
  #   if definition:
  #       item["definition"] = definition.get_text(strip=True)
  #   else:
  #       item["definition"] = "未找到定义"
  time.sleep(3)

Found 'Show full view' button, clicking...
Successfully switched to full view
Found RepeatingGroup div for back up sth or back sth up
Found 6 group items
Found headword: back
Found position: adverb
Found headword phonetic: /bæk/
Found group item header
Found 6 content group items
Found group item content
{"word": "back", "guideword": "(RETURNING)", "usage": null, "level": "A1", "definition": "where someone or something was before", "dictionary_examples": ["When do you go back to college?", "I put the tin back in the cupboard."], "learner_examples": ["She [went] back home at once."]}
{"word": "back", "guideword": "(REPLY)", "usage": null, "level": "A2", "definition": "as a reply or reaction to something", "dictionary_examples": ["I'm busy at the moment - can I call you back?", "I waved to her and she waved back."], "learner_examples": ["Write back to me soon."]}
{"word": "back", "guideword": "(BEHIND)", "usage": null, "level": "B1", "definition": "in a direction behind you", "dictionary

  learner_examples = examples.findAll('div', class_='bubble-element Text baTaJaZaZ bubble-r-vertical-center')


Found 'Show full view' button, clicking...
Successfully switched to full view
Found RepeatingGroup div for catch up with sb
Found 3 group items
Found headword: catch
Found position: phrasal verb
Found headword phonetic: /kætʃ/
Found group item header
Found 9 content group items
Found group item content
{"word": "catch on", "guideword": "(BECOME POPULAR)", "usage": null, "level": "C1", "definition": "to become popular", "dictionary_examples": ["I wonder if the game will catch on with young people?"], "learner_examples": ["The commercial succes[s] of \"Super agent\" has resulted in introducing \"Super agent II\", the game which is sup[p]osed to present further adventures of the agent. Unfortunately, I don't think that it will catch on."]}
{"word": "catch up (sb) or catch (sb) up", "guideword": "(REACH SOMEONE)", "usage": null, "level": "B2", "definition": "to reach someone in front of you by going faster than them", "dictionary_examples": ["I ran after her and managed to catch up with he