In [9]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('chengyu_data.csv')

# Display the first few rows to inspect the data
print(df.head())

  Chengyu         Pinyin                                        Explanation
0    阿鼻地狱     ā bí dì yù  Meaning阿鼻：梵语的译音，意译为“无间”，即痛苦无有间断之意。常用来比喻黑暗的社会和严...
1    阿狗阿猫    ā gǒu ā māo  Meaning旧时人们常用的小名。引申为任何轻贱的，不值得重视的人或著作。Context鲁迅...
2    阿家阿翁    ā gū ā wēng  Meaning阿：名词的前缀。家：通“姑”，丈夫的母亲。翁：丈夫的父亲。指公公婆婆。Cont...
3    阿姑阿翁    ā gū ā wēng  Meaning阿：名词的前缀。姑：丈夫的母亲。翁：丈夫的父亲。指公公婆婆。Context见：...
4    阿娇金屋  ā jiāo jīn wū  Meaning阿娇：指汉武帝刘彻的姑母（长公主刘娇）的女儿。原指汉武帝刘彻要用金屋接纳阿娇为...


In [10]:
import re

def parse_explanation(explanation):
    components = {
        'Meaning': '',
        'Context': '',
        'Example': '',
        'Synonyms': '',
        'Antonyms': '',
        'Grammar': ''
    }
    
    # Define regex patterns to match different components
    meaning_pattern = r"Meaning([^ContextExampleSynonymsAntonymsGrammar]*)"
    context_pattern = r"Context([^ExampleSynonymsAntonymsGrammar]*)"
    example_pattern = r"Example([^SynonymsAntonymsGrammar]*)"
    synonyms_pattern = r"Synonyms([^AntonymsGrammar]*)"
    antonyms_pattern = r"Antonyms([^Grammar]*)"
    grammar_pattern = r"Grammar(.*)"
    
    # Extract each component using regex
    meaning_match = re.search(meaning_pattern, explanation)
    context_match = re.search(context_pattern, explanation)
    example_match = re.search(example_pattern, explanation)
    synonyms_match = re.search(synonyms_pattern, explanation)
    antonyms_match = re.search(antonyms_pattern, explanation)
    grammar_match = re.search(grammar_pattern, explanation)
    
    if meaning_match:
        components['Meaning'] = meaning_match.group(1).strip()
    if context_match:
        components['Context'] = context_match.group(1).strip()
    if example_match:
        components['Example'] = example_match.group(1).strip()
    if synonyms_match:
        components['Synonyms'] = synonyms_match.group(1).strip()
    if antonyms_match:
        components['Antonyms'] = antonyms_match.group(1).strip()
    if grammar_match:
        components['Grammar'] = grammar_match.group(1).strip()
    
    return components

In [11]:
# Apply the parsing function to the Explanation column
parsed_components = df['Explanation'].apply(parse_explanation)

# Convert the parsed components into a DataFrame
parsed_df = pd.DataFrame(parsed_components.tolist())

# Combine the parsed components with the original DataFrame
df = pd.concat([df, parsed_df], axis=1)

# Drop the original Explanation column if no longer needed
df = df.drop(columns=['Explanation'])

# Display the DataFrame to check the results
print(df.head())

  Chengyu         Pinyin                                            Meaning  \
0    阿鼻地狱     ā bí dì yù  阿鼻：梵语的译音，意译为“无间”，即痛苦无有间断之意。常用来比喻黑暗的社会和严酷的牢狱。又比...   
1    阿狗阿猫    ā gǒu ā māo                     旧时人们常用的小名。引申为任何轻贱的，不值得重视的人或著作。   
2    阿家阿翁    ā gū ā wēng                阿：名词的前缀。家：通“姑”，丈夫的母亲。翁：丈夫的父亲。指公公婆婆。   
3    阿姑阿翁    ā gū ā wēng                     阿：名词的前缀。姑：丈夫的母亲。翁：丈夫的父亲。指公公婆婆。   
4    阿娇金屋  ā jiāo jīn wū  阿娇：指汉武帝刘彻的姑母（长公主刘娇）的女儿。原指汉武帝刘彻要用金屋接纳阿娇为妇。这里泛指美...   

                                             Context  \
0                            语出《法华经·法师功德品》：“下至阿鼻地狱。”   
1     鲁迅《我们要批评家》：“然而新的批评家不开口，类似批评家之流便趁势一笔抹杀：‘阿狗阿猫’。”   
2  唐·赵璘《因话录》卷一：“郭暖尝与升平公主琴瑟不调。尚父拘暖，自诣朝童结罪。上召而慰之曰：‘...   
3                                          见：“阿家阿翁”。   
4  语出旧题汉·班固《汉武故事》：汉武帝幼时曾对姑母长公主说：“若得阿娇(姑母之女)作妇，当以金...   

                                 Example Synonyms Antonyms           Grammar  
0  但也有少数意志薄弱的……逐步上当，终至堕入～。 ◎《上饶集中营·炼狱杂记》                                      
1                             

In [12]:
# Save the cleaned DataFrame to a new CSV file
df.to_csv('chinese_chengyu_cleaned.csv', index=False, encoding='utf-8-sig')

print("Data has been successfully cleaned and saved to 'chinese_chengyu_cleaned.csv'")

Data has been successfully cleaned and saved to 'chinese_chengyu_cleaned.csv'
