In [1]:
import csv
import json

def csv_to_json(input_file, output_file):
    with open(input_file, newline='', encoding='utf-8') as infile:
        reader = csv.reader(infile)
        rows = list(reader)
    
    corpus_data = []  # JSONデータを保持するリスト
    
    current_set = {}  # 現在処理中のセットを保持する変数
    text_entries = []  # テキストエントリを保持するリスト

    for i, row in enumerate(rows):
        if not any(row):  # 空行の検出
            if current_set:  # 既存のセットがある場合
                # 空のエントリを除外
                current_set["text"] = [entry for entry in text_entries if any(entry.values())]
                corpus_data.append(current_set)
                current_set = {}
                text_entries = []
            continue
        
        if i % 7 == 0:
            current_set["id"] = row[0]
        elif i % 7 == 1:
            unicode_list = row
        elif i % 7 == 2:
            character_list = row
        elif i % 7 == 3:
            pronunciation_list = row
        elif i % 7 == 4:
            chinese_list = row
        elif i % 7 == 5:
            current_set["metadata"] = {"meaning": row[0]}
            for u, c, p, ch in zip(unicode_list, character_list, pronunciation_list, chinese_list):
                entry = {
                    "unicode": u,
                    "character": c,
                    "pronunciation": p,
                    "chinese": ch
                }
                text_entries.append(entry)
    
    if current_set:  # 最後のセットを追加
        current_set["text"] = [entry for entry in text_entries if any(entry.values())]
        corpus_data.append(current_set)
    
    with open(output_file, 'w', encoding='utf-8') as outfile:
        json.dump({"corpus_data": corpus_data}, outfile, ensure_ascii=False, indent=4)

# 使用例
input_file = '論語全解.csv'
output_file = '論語全解2.json'
csv_to_json(input_file, output_file)

print(f"Transformed JSON saved to {output_file}")

Transformed JSON saved to 論語全解2.json
