In [5]:
import json
def analyze_json(filepath):
    # 读取JSON文件
    with open(filepath, 'r', encoding='utf-8') as file:
        data = json.load(file)

    # 初始化计数器
    total_checks = 0
    total_results = 0
    total_yes = 0

    # 遍历每个元素，累计数值
    for item in data:
        # 统计 checks 的键值对数量
        total_checks += len(item.get('checks_once', {}))
        total_checks += len(item.get('checks_range', {}))
        total_checks += len(item.get('checks_periodic', {}))
        
        # 统计 results 的键值对数量
        results_once = item.get('results_once', {})
        results_range = item.get('results_range', {})
        results_periodic = item.get('results_periodic', {})
        
        total_results += len(results_once)
        total_results += len(results_range)
        total_results += len(results_periodic)

        # 累加值为 'yes' 的 results 的数量
        total_yes += sum(1 for key in results_once if results_once[key] == 'yes')
        total_yes += sum(1 for key in results_range if results_range.get(key, '') == 'yes')
        total_yes += sum(1 for key in results_periodic if results_periodic.get(key, '') == 'yes')

    # 输出结果
    print("Total checks key-value pairs:", total_checks)
    print("Total results key-value pairs:", total_results)
    print("Total 'yes' occurrences in results:", total_yes)

# 用实际的文件路径替换 'yourfile.json'
analyze_json('/home/yuhao/THREADING-THE-NEEDLE/Evalution/results/LongWriter-llama3.1-8b_maxlen16000.json')


Total checks key-value pairs: 6205
Total results key-value pairs: 2700
Total 'yes' occurrences in results: 610


In [3]:
import json
import os
import csv

def analyze_json(filepath):
    # 初始化计数器
    total_checks = 0
    total_results = 0
    total_yes = 0
    total_word_count = 0
    word_count_entries = 0
    
    # 读取JSON文件
    with open(filepath, 'r', encoding='utf-8') as file:
        data = json.load(file)
    
    # 遍历每个元素，累计数值
    for item in data:
        # 统计 checks 的键值对数量
        total_checks += len(item.get('checks_once', {}))
        total_checks += len(item.get('checks_range', {}))
        total_checks += len(item.get('checks_periodic', {}))
        
        # 统计 results 的键值对数量
        results_once = item.get('results_once', {})
        results_range = item.get('results_range', {})
        results_periodic = item.get('results_periodic', {})
        
        total_results += len(results_once)
        total_results += len(results_range)
        total_results += len(results_periodic)

        # 累加值为 'yes' 的 results 的数量
        total_yes += sum(1 for key in results_once if results_once[key] == 'yes')
        total_yes += sum(1 for key in results_range if results_range.get(key, '') == 'yes')
        total_yes += sum(1 for key in results_periodic if results_periodic.get(key, '') == 'yes')
        
        # 累加符合条件的 word_count
        if 'output_blocks' in item and len(item['output_blocks']) > 40:
            if 'word_count' in item and item['word_count'] > 2000:
                total_word_count += item['word_count']
                word_count_entries += 1

    # 计算比率
    ratio_results_checks = total_results / total_checks if total_checks > 0 else None
    ratio_yes_results = total_yes / total_results if total_results > 0 else None
    ratio_yes_checks = total_yes / total_checks if total_checks > 0 else None

    # 计算平均 word_count
    average_word_count = total_word_count / word_count_entries if word_count_entries > 0 else None

    return [total_checks, total_results, total_yes, ratio_results_checks, ratio_yes_results, ratio_yes_checks, average_word_count]

def process_directory(directory_path):
    results = []
    # 遍历目录中的所有文件
    for filename in os.listdir(directory_path):
        if filename.endswith(".json"):
            filepath = os.path.join(directory_path, filename)
            result = analyze_json(filepath)
            results.append([filename] + result)
    
    # 将结果保存到 CSV 文件
    with open('static.csv', 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Filename', 'Total Checks', 'Total Results', 'Total Yes', 'Ratio Results/Checks', 'Ratio Yes/Results', 'Ratio Yes/Checks', 'Average Word Count'])
        writer.writerows(results)

# 用实际的文件夹路径替换 'path_to_your_directory'
process_directory('/home/yuhao/THREADING-THE-NEEDLE/Evalution/results')
