In [1]:
import os
import json
from collections import defaultdict
from glob import glob

# ✅ 格位對照表
gender_map = {
    "m sg": "陽性單數",
    "f sg": "陰性單數",
    "n sg": "中性單數",
    "m pl": "陽性複數",
    "f pl": "陰性複數",
    "n pl": "中性複數",
    "pl": "複數"
}

case_map = {
    "Akkusativ": "第四格",
    "Dativ": "第三格",
    "Genitiv": "第二格"
}

# ✅ 格式轉換
def convert_gender_number(gn):
    return gender_map.get(gn, gn)

def convert_case(case):
    return case_map.get(case, case)

# ✅ 格式化例句中的成分
def write_components(components, md_lines):
    subject = components.get("subject")
    if subject:
        md_lines.append(f"   - 主語：{subject['de']}（{subject['zh']}）")
        if "gender_number" in subject:
            md_lines.append(f"       * 性質：{convert_gender_number(subject['gender_number'])}")
        if subject.get("original_nominative_form") and subject['de'] != subject['original_nominative_form']:
            md_lines.append(f"       * 原形：{subject['original_nominative_form']}")

    if "verb_form" in components:
        md_lines.append(f"   - 動詞形態：{components['verb_form']}")

    for obj in components.get("objects", []):
        line = f"   - 受詞：{obj['de']}（{obj['zh']}, {convert_case(obj['case'])}）"
        md_lines.append(line)
        if "gender_number" in obj:
            md_lines.append(f"       * 性質：{convert_gender_number(obj['gender_number'])}")
        if obj.get("original_nominative_form"):
            md_lines.append(f"       * 原形：{obj['original_nominative_form']}")
    md_lines.append("")

# ✅ 合併 JSON 中相同 lemma 的動詞
def merge_json_files(input_folder):
    lemma_dict = defaultdict(list)
    for filepath in glob(os.path.join(input_folder, "*.json")):
        with open(filepath, "r", encoding="utf-8") as f:
            data = json.load(f)
            for vp in data.get("verb_phrases", []):
                lemma = vp["lemma"]
                lemma_dict[lemma].append(vp)
    return lemma_dict

def merge_entries(lemma_dict):
    merged_verbs = []
    for lemma, entries in lemma_dict.items():
        base = entries[0]
        all_examples = []
        all_prep_phrases = []
        for e in entries:
            all_examples.extend(e.get("examples", []))
            all_prep_phrases.extend(e.get("prepositional_phrases", []))

        seen_de = set()
        unique_examples = []
        for ex in all_examples:
            if ex["de"] not in seen_de:
                unique_examples.append(ex)
                seen_de.add(ex["de"])

        base["examples"] = unique_examples
        base["prepositional_phrases"] = all_prep_phrases
        merged_verbs.append(base)
    return merged_verbs

# ✅ 輸出成 Markdown
def jsons_to_detailed_markdown(input_folder, output_file):
    lemma_dict = merge_json_files(input_folder)
    merged_verbs = merge_entries(lemma_dict)

    md_lines = []
    md_lines.append("# **判決中的動詞合併整理（含介詞支配）** {#verbs-in-judgments}\n\n")


    for vp in sorted(merged_verbs, key=lambda x: x["lemma"]):
        lemma = vp["lemma"]
        meanings = vp.get("zh_meanings", {})
        grammar = vp.get("grammar", {})

        md_lines.append(f"## {lemma}\n")
        md_lines.append("- **中文含義**：")
        if "original" in meanings:
            md_lines.append(f"  - 原意：{', '.join(meanings['original'])}")
        if "contextual" in meanings:
            md_lines.append(f"  - 語境意義：{meanings['contextual']}")
        if grammar.get("case_requirement"):
            md_lines.append(f"- **受詞格位需求**：{convert_case(grammar['case_requirement'])}")
        if grammar.get("is_reflexive"):
            md_lines.append("- **特性**：反身動詞")
        if grammar.get("is_modal"):
            md_lines.append("- **特性**：情態動詞")
        if grammar.get("is_separable_verb"):
            md_lines.append("- **特性**：分離動詞")
        if grammar.get("gov_preposition"):
            md_lines.append(f"- **主要支配介詞**：`{grammar['gov_preposition']}`")
        md_lines.append("")

        examples = vp.get("examples", [])
        if examples:
            md_lines.append("### 一般例句\n")
            for idx, ex in enumerate(examples, 1):
                md_lines.append(f"{idx}. **德文例句**：`{ex['de']}`")
                md_lines.append(f"   - 中文翻譯：{ex['zh']}")
                write_components(ex.get("components", {}), md_lines)

        preps = vp.get("prepositional_phrases", [])
        if preps:
            md_lines.append("### 介詞支配與搭配\n")
            for prep in preps:
                prep_case = convert_case(prep.get("case", ""))
                md_lines.append(f"- **支配介詞結構**：`{prep['preposition']} + {prep_case}`")
                example = prep.get("example")
                if example:
                    md_lines.append(f"  - 例句：`{example['de']}`")
                    md_lines.append(f"    - 翻譯：{example['zh']}")
                    write_components(example.get("components", {}), md_lines)

        md_lines.append("\n***\n")

    with open(output_file, 'w', encoding='utf-8') as f:
        f.write("\n".join(md_lines))

# ✅ 執行範例
if __name__ == "__main__":
    input_folder = "/Users/iw/Documents/NTU/1132/1132_German_Legal/_anki"               # 所有 JSON 檔放置資料夾
    output_file = "/Users/iw/Documents/NTU/1132/1132_German_Legal/動詞合併整理/merged_verbs.md"        # 最終輸出 Markdown 檔案
    jsons_to_detailed_markdown(input_folder, output_file)
    print(f"✅ 合併完成，已輸出至 {output_file}")

✅ 合併完成，已輸出至 /Users/iw/Documents/NTU/1132/1132_German_Legal/動詞合併整理/merged_verbs.md
