In [4]:
#!/usr/bin/env python3
"""
专门用于查找和标记缺失字段的脚本
输出格式更易读，适合生成报告
"""

import os
import json
import json5
from pathlib import Path
from collections import defaultdict, Counter
import csv


class MissingFieldDetector:

    def __init__(self, folder_path: str = "recipes"):
        self.folder_path = Path(folder_path)

        # 需要特别关注的字段
        self.critical_fields = ["id", "label", "actionId", "description"]
        self.important_fields = [
            "startdescription", "requirements", "effects", "aspects",
            "craftable"
        ]

        # 结果存储
        self.results = {
            "files_processed": 0,
            "recipes_processed": 0,
            "missing_fields": defaultdict(list),
            "recipes_without_label": [],
            "recipes_without_id": [],
            "file_summary": defaultdict(lambda: defaultdict(int))
        }

    def run_detection(self):
        """运行检测"""
        print("开始检测缺失字段...")
        print(f"目标文件夹: {self.folder_path}")
        print("=" * 80)

        if not self.folder_path.exists():
            print(f"错误: 文件夹不存在!")
            return

        # 遍历所有JSON文件
        json_files = list(self.folder_path.glob("**/*.json"))
        print(f"找到 {len(json_files)} 个JSON文件")

        for json_file in json_files:
            self.process_file(json_file)

        # 生成报告
        self.generate_report()

    def process_file(self, file_path: Path):
        """处理单个文件"""
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()

            # 尝试解析
            try:
                data = json5.loads(content)
            except:
                data = json.loads(content)

            if "recipes" not in data or not isinstance(data["recipes"], list):
                return

            recipes = data["recipes"]
            self.results["files_processed"] += 1
            file_name = file_path.name

            for recipe in recipes:
                if not isinstance(recipe, dict):
                    continue

                self.results["recipes_processed"] += 1
                recipe_id = recipe.get("id", "无ID")

                # 检查id字段
                if recipe_id == "无ID":
                    self.results["recipes_without_id"].append({
                        "file":
                        file_name,
                        "actionId":
                        recipe.get("actionId", "未知")
                    })

                # 检查所有重要字段
                for field in self.critical_fields + self.important_fields:
                    if field not in recipe:
                        self.results["missing_fields"][field].append({
                            "file":
                            file_name,
                            "recipe_id":
                            recipe_id,
                            "actionId":
                            recipe.get("actionId", "未知")
                        })
                        self.results["file_summary"][file_name][field] += 1

                # 特别关注label字段
                if "label" not in recipe:
                    self.results["recipes_without_label"].append({
                        "file":
                        file_name,
                        "recipe_id":
                        recipe_id,
                        "actionId":
                        recipe.get("actionId", "未知")
                    })

        except Exception as e:
            print(f"处理文件 {file_path} 时出错: {e}")

    def generate_report(self):
        """生成报告"""
        print("\n" + "=" * 80)
        print("缺失字段检测报告")
        print("=" * 80)

        print(f"\n处理统计:")
        print(f"  处理文件数: {self.results['files_processed']}")
        print(f"  处理配方数: {self.results['recipes_processed']}")

        # 1. 没有ID的配方
        if self.results["recipes_without_id"]:
            print(
                f"\n⚠ 发现 {len(self.results['recipes_without_id'])} 个没有ID的配方:")
            for recipe in self.results["recipes_without_id"][:10]:  # 最多显示10个
                print(
                    f"  文件: {recipe['file']}, ActionId: {recipe['actionId']}")
            if len(self.results["recipes_without_id"]) > 10:
                print(
                    f"  ... 还有 {len(self.results['recipes_without_id']) - 10} 个"
                )

        # 2. 没有label的配方（特别关注）
        if self.results["recipes_without_label"]:
            print(
                f"\n❌ 严重问题: 发现 {len(self.results['recipes_without_label'])} 个没有label的配方!"
            )
            print("这些配方在游戏中可能无法正确显示:")

            # 按文件分组
            by_file = defaultdict(list)
            for recipe in self.results["recipes_without_label"]:
                by_file[recipe["file"]].append(recipe)

            for file_name, recipes in by_file.items():
                print(f"\n  文件: {file_name} ({len(recipes)} 个配方):")
                for i, recipe in enumerate(recipes[:5]):  # 每个文件最多显示5个
                    print(
                        f"    {i+1}. ID: {recipe['recipe_id']}, ActionId: {recipe['actionId']}"
                    )
                if len(recipes) > 5:
                    print(f"    ... 还有 {len(recipes) - 5} 个")

        # 3. 所有缺失字段统计
        if self.results["missing_fields"]:
            print(f"\n缺失字段总体统计:")
            print("-" * 80)

            for field in self.critical_fields + self.important_fields:
                if field in self.results["missing_fields"]:
                    missing_count = len(self.results["missing_fields"][field])
                    percentage = (missing_count /
                                  self.results["recipes_processed"]) * 100
                    severity = "❌ 严重" if field in self.critical_fields else "⚠ 警告"
                    print(
                        f"  {severity} {field:20s}: {missing_count:4d} 缺失 ({percentage:5.1f}%)"
                    )

        # 4. 按文件统计
        if self.results["file_summary"]:
            print(f"\n各文件缺失情况统计:")
            print("-" * 80)

            # 按缺失总数排序
            file_stats = []
            for file_name, fields in self.results["file_summary"].items():
                total_missing = sum(fields.values())
                critical_missing = sum(1 for f in self.critical_fields
                                       if fields.get(f, 0) > 0)
                file_stats.append(
                    (file_name, total_missing, critical_missing, fields))

            file_stats.sort(key=lambda x: (x[2], x[1]), reverse=True)

            for file_name, total, critical, fields in file_stats[:
                                                                 10]:  # 显示前10个问题最多的文件
                print(f"\n  {file_name}:")
                print(f"    总缺失: {total}, 严重缺失: {critical}")
                for field in self.critical_fields + self.important_fields:
                    if fields.get(field, 0) > 0:
                        severity = "❌" if field in self.critical_fields else "⚠"
                        print(f"    {severity} {field}: {fields[field]} 个")

        # 5. 生成CSV报告
        self.generate_csv_report()

    def generate_csv_report(self):
        """生成CSV格式的详细报告"""
        csv_file = "missing_fields_report.csv"

        with open(csv_file, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)

            # 写入标题
            writer.writerow(["文件", "配方ID", "ActionId", "缺失字段", "严重程度"])

            # 写入数据
            for field in self.critical_fields + self.important_fields:
                if field in self.results["missing_fields"]:
                    severity = "严重" if field in self.critical_fields else "警告"
                    for recipe in self.results["missing_fields"][field]:
                        writer.writerow([
                            recipe["file"], recipe["recipe_id"],
                            recipe["actionId"], field, severity
                        ])

        print(f"\n详细报告已保存到: {csv_file}")


def main():
    """主函数"""

    path = os.path.join('origin_resources', 'StreamingAssets', 'content',
                        'core', 'recipes')
    detector = MissingFieldDetector(path)
    detector.run_detection()


if __name__ == "__main__":
    try:
        import json5
    except ImportError:
        print("请先安装json5: pip install json5")
        exit(1)

    main()

开始检测缺失字段...
目标文件夹: origin_resources\StreamingAssets\content\core\recipes
找到 90 个JSON文件

缺失字段检测报告

处理统计:
  处理文件数: 90
  处理配方数: 2736

❌ 严重问题: 发现 107 个没有label的配方!
这些配方在游戏中可能无法正确显示:

  文件: acquisition.json (2 个配方):
    1. ID: auctionbidmatched.grailapostlepillar3, ActionId: 未知
    2. ID: auctionfailureclear, ActionId: 未知

  文件: culting.json (8 个配方):
    1. ID: edge_exalt_obtained, ActionId: 未知
    2. ID: forge_exalt_obtained, ActionId: 未知
    3. ID: grail_exalt_obtained, ActionId: 未知
    4. ID: heart_exalt_obtained, ActionId: 未知
    5. ID: knock_exalt_obtained, ActionId: 未知
    ... 还有 3 个

  文件: DLC_EXILE_exile_recipes.json (10 个配方):
    1. ID: _cities, ActionId: 未知
    2. ID: _domiciles, ActionId: 未知
    3. ID: opx.caper.end, ActionId: 未知
    4. ID: travel.depart.purge, ActionId: 未知
    5. ID: turn.check.traces, ActionId: 未知
    ... 还有 5 个

  文件: DLC_EXILE_exile_rkx_foe.json (1 个配方):
    1. ID: spit.foe.attack.failure.associate.killed.undo.wound, ActionId: 未知

  文件: dream_general.json (1 个