In [11]:
import urllib.request
import zipfile
import io
import re
import os
import sys

# --- 設定と実装の詳細 ---
UNIHAN_URL = "https://www.unicode.org/Public/UCD/latest/ucd/Unihan.zip"
OUTPUT_FILE = "cjk_full_busyu_ja.html" # 日本語のみ

# 214の康熙部首一覧
KANGXI_RADICALS = [
    "一", "丨", "丶", "丿", "乙", "亅", "二", "亠", "人", "儿", "入", "八", "冂", "冖", "冫", "几", "凵", "刀", "力", "勹", "匕", "匚", "匸", "十", "卜", "卩", "厂", "厶", "又",
    "口", "囗", "土", "士", "夂", "夊", "夕", "大", "女", "子", "宀", "寸", "小", "尢", "尸", "屮", "山", "巛", "工", "己", "巾", "干", "幺", "广", "廴", "廾", "弋", "弓", "彐", "彡", "彳",
    "心", "戈", "戶", "手", "支", "攴", "文", "斗", "斤", "方", "无", "日", "曰", "月", "木", "欠", "止", "歹", "殳", "毋", "比", "毛", "氏", "气", "水", "火", "爪", "父", "爻", "爿", "片", "牙", "牛", "犬",
    "玄", "玉", "瓜", "瓦", "甘", "生", "用", "田", "疋", "疒", "癶", "白", "皮", "皿", "目", "矛", "矢", "石", "示", "禸", "禾", "穴", "立", "竹", "米", "糸", "缶", "网", "羊", "羽", "老", "而", "耒", "耳", "聿", "肉", "臣", "自", "至", "臼", "舌", "舛", "舟", "艮", "色", "艸", "虍", "虫", "血", "行", "衣", "襾",
    "見", "角", "言", "谷", "豆", "豕", "豸", "貝", "赤", "走", "足", "身", "車", "辛", "辰", "辵", "邑", "酉", "釆", "里",
    "金", "長", "門", "阜", "隶", "隹", "雨", "青", "非",
    "面", "革", "韋", "韭", "音", "頁", "風", "飛", "食", "首", "香",
    "馬", "骨", "高", "髟", "鬥", "鬯", "鬲", "鬼",
    "魚", "鳥", "鹵", "鹿", "麥", "麻",
    "黃", "黍", "黑", "黹",
    "黽", "鼎", "鼓", "鼠",
    "鼻", "齊",
    "齒",
    "龍", "龜",
    "龠"
]

def download_and_extract():
    print(f"1. Downloading {UNIHAN_URL}...")
    try:
        req = urllib.request.Request(
            UNIHAN_URL,
            data=None,
            headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
        )
        response = urllib.request.urlopen(req)
        zip_data = response.read()
        print(f"   Download complete ({len(zip_data)/1024/1024:.2f} MB).")
        return zip_data
    except Exception as e:
        print(f"   CRITICAL ERROR: Cannot download Unihan. {e}")
        return None

def parse_unihan(zip_bytes):
    print("2. Scanning ALL files in ZIP...")
    cjk_map = []

    with zipfile.ZipFile(io.BytesIO(zip_bytes)) as z:
        # テキストを含む可能性のある全ファイルのリストアップ
        file_list = [n for n in z.namelist() if not n.endswith('/') and not n.startswith('__MACOSX') and not '/.' in n]

        print(f"   {len(file_list)} files found in archive.")

        for filename in file_list:
            if "ReadMe" in filename or "History" in filename:
                continue

            print(f"   -> Inspecting: {filename}")

            with z.open(filename) as f:
                krs_in_this_file = 0
                debug_lines = []

                for line in f:
                    try:
                        line_str = line.decode('utf-8').strip()
                    except:
                        continue

                    if not line_str or line_str.startswith('#'):
                        continue

                    if len(debug_lines) < 3:
                        debug_lines.append(line_str)

                    # kRSUnicode プロパティの検索
                    if 'kRSUnicode' in line_str:
                        krs_in_this_file += 1

                        parts = line_str.split()
                        try:
                            idx = parts.index('kRSUnicode')

                            code_str = parts[0].replace('U+', '')
                            code_point = int(code_str, 16)
                            char = chr(code_point)

                            if len(parts) > idx + 1:
                                rs_data = parts[idx + 1]
                                match = re.match(r"(\d+)'?\.(-?\d+)", rs_data)
                                if match:
                                    radical = int(match.group(1))
                                    strokes = int(match.group(2))

                                    cjk_map.append({
                                        'rad': radical,
                                        'str': strokes,
                                        'cp': code_point,
                                        'char': char
                                    })
                        except:
                            continue

                if krs_in_this_file > 0:
                    print(f"      SUCCESS! {krs_in_this_file} entries found in {filename}.")

    print(f"   TOTAL: {len(cjk_map)} characters extracted.")
    return cjk_map

def generate_html(data):
    if not data:
        print("   ERROR: No data to generate.")
        return

    print("3. Sorting data...")
    data.sort(key=lambda x: (x['rad'], x['str'], x['cp']))

    print(f"4. Generating {OUTPUT_FILE}...")

    # HTMLを日本語のみに設定
    html = """<!DOCTYPE html>
<html lang="ja">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>全訳CJK統合漢字辞典 (部首順)</title> <!-- Japanese only -->
    <style>
        @import url('https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@400;700&family=Noto+Serif+JP:wght@400;700&display=swap');

        :root {
            --bg: #0f172a;
            --text: #e2e8f0;
            --accent: #3b82f6;
            --border: #1e293b;
            --rad-bg: #1e293b;
            --sidebar-bg: #020617;
        }

        body {
            font-family: "HanaMinA", "HanaMinB", "SimSun-ExtB", "Noto Serif JP", "Noto Sans CJK JP", serif;
            background: var(--bg);
            color: var(--text);
            margin: 0;
            padding: 0;
        }

        /* Sidebar */
        .sidebar {
            position: fixed; top: 0; left: 0; width: 220px; height: 100vh;
            overflow-y: auto; background: var(--sidebar-bg);
            border-right: 1px solid var(--border);
            padding: 10px; font-size: 13px; font-family: "Noto Sans JP", sans-serif;
        }
        .sidebar::-webkit-scrollbar { width: 6px; background: var(--sidebar-bg); }
        .sidebar::-webkit-scrollbar-thumb { background: #334155; border-radius: 3px; }

        .sidebar-header {
            padding: 10px 5px; color: #fff; font-weight: bold; font-size: 14px;
            border-bottom: 1px solid #334155; margin-bottom: 10px;
        }

        .sidebar a {
            display: block; color: #94a3b8; text-decoration: none;
            padding: 4px 8px; border-radius: 4px; margin-bottom: 1px;
            transition: background 0.2s;
        }
        .sidebar a:hover { background: var(--accent); color: #fff; }

        /* Main Content */
        .main { margin-left: 220px; padding: 40px; }

        h1 {
            font-weight: 300; color: #fff;
            border-bottom: 1px solid var(--border); padding-bottom: 20px;
            font-family: "Noto Sans JP", sans-serif;
        }

        .desc {
            color: #94a3b8; margin-bottom: 40px; font-family: "Noto Sans JP", sans-serif; font-size: 0.9em;
        }

        /* Radical Section */
        .radical-section { margin-bottom: 60px; scroll-margin-top: 20px; }

        .rad-header {
            background: var(--rad-bg); padding: 15px 25px; border-radius: 8px;
            font-size: 2.2em; margin-bottom: 20px;
            display: flex; align-items: center; gap: 20px;
            border-left: 5px solid var(--accent);
        }

        .rad-info {
            display: flex; flex-direction: column;
        }
        .rad-info-main { font-size: 0.4em; color: #fff; font-weight: bold; }
        .rad-info-sub { font-size: 0.3em; color: #94a3b8; font-family: "Noto Sans JP", sans-serif; }

        /* Strokes */
        .stroke-group { margin-bottom: 25px; }
        .stroke-label {
            color: #64748b; font-size: 0.9em; margin-bottom: 10px;
            font-weight: bold; border-bottom: 1px solid #1e293b; padding-bottom: 4px;
            font-family: "Noto Sans JP", sans-serif;
        }

        /* Grid */
        .grid {
            display: grid; grid-template-columns: repeat(auto-fill, minmax(52px, 1fr)); gap: 8px;
        }
        .char-box {
            aspect-ratio: 1; display: flex; align-items: center; justify-content: center;
            font-size: 32px; background: #1e293b; border-radius: 6px;
            transition: transform 0.1s, background 0.1s; cursor: pointer; color: #cbd5e1;
        }
        .char-box:hover {
            background: var(--accent); color: white; transform: scale(1.15);
            z-index: 10; box-shadow: 0 4px 12px rgba(0,0,0,0.5);
        }
    </style>
</head>
<body>
    <div class="sidebar">
        <div class="sidebar-header">部首索引</div> <!-- 日本語のみ -->
    """

    # サイドバー
    for i, rad_char in enumerate(KANGXI_RADICALS):
        rad_num = i + 1
        html += f'<a href="#rad-{rad_num}">R{rad_num} {rad_char}</a>'

    html += """
    </div>
    <div class="main">
        <h1>
            CJK統合漢字 (基本 + 拡張A-J) <!-- 日本語のみ -->
        </h1>
        <p class="desc">
            Unicode公式データベース生成。<br> <!-- 日本語のみ -->
            ソート順: 康熙部首 → 画数 → コードポイント。 <!-- 日本語のみ -->
        </p>
    """

    current_rad = -1
    current_stroke = -999

    for item in data:
        rad = item['rad']
        strokes = item['str']
        char = item['char']
        hex_code = hex(item['cp']).upper().replace('0X', '')

        # 新規部首
        if rad != current_rad:
            if current_rad != -1:
                html += '</div></div></div>'
            current_rad = rad
            current_stroke = -999
            rad_char = KANGXI_RADICALS[rad-1] if 0 < rad <= 214 else f"R{rad}"

            html += f"""
            <div id="rad-{rad}" class="radical-section">
                <div class="rad-header">
                    <span>{rad_char}</span>
                    <div class="rad-info">
                        <span class="rad-info-main">部首 {rad}</span> <!-- 日本語のみ -->
                    </div>
                </div>
            """

        # 新規画数
        if strokes != current_stroke:
            if current_stroke != -999:
                html += '</div></div>'
            current_stroke = strokes

            # バイリンガル表記のラベル
            if strokes == 0:
                label = "部首のみ (0)" # 日本語のみ
            else:
                label = f"+{strokes} 画" # 日本語のみ

            html += f"""
            <div class="stroke-group">
                <div class="stroke-label">{label}</div>
                <div class="grid">
            """

        html += f'<div class="char-box" title="U+{hex_code}">{char}</div>'

    html += "</div></div></div></div></body></html>"

    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        f.write(html)

    print(f"Finished! Open '{OUTPUT_FILE}' to see the result.")

if __name__ == "__main__":
    zip_data = download_and_extract()
    if zip_data:
        data = parse_unihan(zip_data)
        if data:
            generate_html(data)

1. Downloading https://www.unicode.org/Public/UCD/latest/ucd/Unihan.zip...
   Download complete (8.12 MB).
2. Scanning ALL files in ZIP...
   8 files found in archive.
   -> Inspecting: Unihan_DictionaryIndices.txt
   -> Inspecting: Unihan_DictionaryLikeData.txt
   -> Inspecting: Unihan_IRGSources.txt
      SUCCESS! 102998 entries found in Unihan_IRGSources.txt.
   -> Inspecting: Unihan_NumericValues.txt
   -> Inspecting: Unihan_OtherMappings.txt
   -> Inspecting: Unihan_RadicalStrokeCounts.txt
   -> Inspecting: Unihan_Readings.txt
   -> Inspecting: Unihan_Variants.txt
   TOTAL: 102944 characters extracted.
3. Sorting data...
4. Generating cjk_full_busyu_ja.html...
Finished! Open 'cjk_full_busyu_ja.html' to see the result.


# 課題 (Task)
CJK文字データ処理をモジュール化してください。具体的には、`data_processor.py` モジュールを作成して "https://www.unicode.org/Public/UCD/latest/ucd/Unihan.zip" からデータをダウンロードおよび解析し、続いてCSVエクスポート用の `generate_csv.py` スクリプト、FlaskまたはFastAPIを使用したバックエンドAPI、およびデータを表示するためのReactフロントエンドアプリケーションを実装してください。


## データ抽出のリファクタリング

### サブタスク:
現在のスクリプトから、データのダウンロードおよび解析ロジック (`download_and_extract`, `parse_unihan`) を、独立したPythonモジュール (e.g., `data_processor.py`) に抽出してください。これにより、APIやCSV生成コンポーネントで再利用できるようになります。このモジュールは、処理済みのCJK文字データを返します。


**Reasoning**:
The first step is to create the `data_processor.py` file and move the specified variables, functions, and imports into it, including the new `get_cjk_data` function that orchestrates the data processing.



In [7]:
%%writefile data_processor.py
import urllib.request
import zipfile
import io
import re
import os

# --- 設定と実装の詳細 ---
UNIHAN_URL = "https://www.unicode.org/Public/UCD/latest/ucd/Unihan.zip"

# 214の康熙部首一覧
KANGXI_RADICALS = [
    "一", "丨", "丶", "丿", "乙", "亅", "二", "亠", "人", "儿", "入", "八", "冂", "冖", "冫", "几", "凵", "刀", "力", "勹", "匕", "匚", "匸", "十", "卜", "卩", "厂", "厶", "又",
    "口", "囗", "土", "士", "夂", "夊", "夕", "大", "女", "子", "宀", "寸", "小", "尢", "尸", "屮", "山", "巛", "工", "己", "巾", "干", "幺", "广", "廴", "廾", "弋", "弓", "彐", "彡", "彳",
    "心", "戈", "戶", "手", "支", "攴", "文", "斗", "斤", "方", "无", "日", "曰", "月", "木", "欠", "止", "歹", "殳", "毋", "比", "毛", "氏", "气", "水", "火", "爪", "父", "爻", "爿", "片", "牙", "牛", "犬",
    "玄", "玉", "瓜", "瓦", "甘", "生", "用", "田", "疋", "疒", "癶", "白", "皮", "皿", "目", "矛", "矢", "石", "示", "禸", "禾", "穴", "立", "竹", "米", "糸", "缶", "网", "羊", "羽", "老", "而", "耒", "耳", "聿", "肉", "臣", "自", "至", "臼", "舌", "舛", "舟", "艮", "色", "艸", "虍", "虫", "血", "行", "衣", "襾",
    "見", "角", "言", "谷", "豆", "豕", "豸", "貝", "赤", "走", "足", "身", "車", "辛", "辰", "辵", "邑", "酉", "釆", "里",
    "金", "長", "門", "阜", "隶", "隹", "雨", "青", "非",
    "面", "革", "韋", "韭", "音", "頁", "風", "飛", "食", "首", "香",
    "馬", "骨", "高", "髟", "鬥", "鬯", "鬲", "鬼",
    "魚", "鳥", "鹵", "鹿", "麥", "麻",
    "黃", "黍", "黑", "黹",
    "黽", "鼎", "鼓", "鼠",
    "鼻", "齊",
    "齒",
    "龍", "龜",
    "龠"
]

def download_and_extract():
    print(f"1. Downloading {UNIHAN_URL}...")
    try:
        req = urllib.request.Request(
            UNIHAN_URL,
            data=None,
            headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
        )
        response = urllib.request.urlopen(req)
        zip_data = response.read()
        print(f"   Download complete ({len(zip_data)/1024/1024:.2f} MB).")
        return zip_data
    except Exception as e:
        print(f"   CRITICAL ERROR: Cannot download Unihan. {e}")
        return None

def parse_unihan(zip_bytes):
    print("2. Scanning ALL files in ZIP...")
    cjk_map = []

    with zipfile.ZipFile(io.BytesIO(zip_bytes)) as z:
        # テキストを含む可能性のある全ファイルのリストアップ
        file_list = [n for n in z.namelist() if not n.endswith('/') and not n.startswith('__MACOSX') and not '/.' in n]

        print(f"   {len(file_list)} files found in archive.")

        for filename in file_list:
            if "ReadMe" in filename or "History" in filename:
                continue

            print(f"   -> Inspecting: {filename}")

            with z.open(filename) as f:
                krs_in_this_file = 0
                debug_lines = []

                for line in f:
                    try:
                        line_str = line.decode('utf-8').strip()
                    except:
                        continue

                    if not line_str or line_str.startswith('#'):
                        continue

                    if len(debug_lines) < 3:
                        debug_lines.append(line_str)

                    # kRSUnicode プロパティの検索
                    if 'kRSUnicode' in line_str:
                        krs_in_this_file += 1

                        parts = line_str.split()
                        try:
                            idx = parts.index('kRSUnicode')

                            code_str = parts[0].replace('U+', '')
                            code_point = int(code_str, 16)
                            char = chr(code_point)

                            if len(parts) > idx + 1:
                                rs_data = parts[idx + 1]
                                match = re.match(r"(\d+)'?\.(-?\d+)", rs_data)
                                if match:
                                    radical = int(match.group(1))
                                    strokes = int(match.group(2))

                                    cjk_map.append({
                                        'rad': radical,
                                        'str': strokes,
                                        'cp': code_point,
                                        'char': char
                                    })
                        except:
                            continue

                if krs_in_this_file > 0:
                    print(f"      SUCCESS! {krs_in_this_file} entries found in {filename}.")

    print(f"   TOTAL: {len(cjk_map)} characters extracted.")
    return cjk_map

def get_cjk_data():
    zip_data = download_and_extract()
    if zip_data:
        return parse_unihan(zip_data)
    return None


Overwriting data_processor.py


**推論 (Reasoning)**:
`data_processor.py` ファイルの作成が完了したため、このモジュールの機能を実演します。具体的には、モジュールをインポートし、 `get_cjk_data` 関数を呼び出すことで、処理済みのCJK文字データを取得します。



In [8]:
import data_processor

print("--- Testing data_processor.py ---")
cjk_data = data_processor.get_cjk_data()

if cjk_data:
    print(f"Successfully retrieved {len(cjk_data)} CJK characters from data_processor.py.")
    print("First 5 entries:")
    for i in range(min(5, len(cjk_data))):
        print(cjk_data[i])
else:
    print("Failed to retrieve CJK data.")

# KANGXI_RADICALS の移動と検証
print(f"KANGXI_RADICALS list length: {len(data_processor.KANGXI_RADICALS)}")

--- Testing data_processor.py ---
1. Downloading https://www.unicode.org/Public/UCD/latest/ucd/Unihan.zip...
   Download complete (8.12 MB).
2. Scanning ALL files in ZIP...
   8 files found in archive.
   -> Inspecting: Unihan_DictionaryIndices.txt
   -> Inspecting: Unihan_DictionaryLikeData.txt
   -> Inspecting: Unihan_IRGSources.txt
      SUCCESS! 102998 entries found in Unihan_IRGSources.txt.
   -> Inspecting: Unihan_NumericValues.txt
   -> Inspecting: Unihan_OtherMappings.txt
   -> Inspecting: Unihan_RadicalStrokeCounts.txt
   -> Inspecting: Unihan_Readings.txt
   -> Inspecting: Unihan_Variants.txt
   TOTAL: 102944 characters extracted.
Successfully retrieved 102944 CJK characters from data_processor.py.
First 5 entries:
{'rad': 1, 'str': 4, 'cp': 13312, 'char': '㐀'}
{'rad': 1, 'str': 5, 'cp': 13313, 'char': '㐁'}
{'rad': 1, 'str': 5, 'cp': 13314, 'char': '㐂'}
{'rad': 2, 'str': 2, 'cp': 13315, 'char': '㐃'}
{'rad': 2, 'str': 2, 'cp': 13316, 'char': '㐄'}
KANGXI_RADICALS list length: 2

## ご提示いただいた内容の日本語訳です。

### サブタスク:
リファクタリングされた`data_processor.py` を利用してCJK文字データを取得し、それをCSVファイルにエクスポートする新しいPythonスクリプト `generate_csv.py` を作成してください。このCSVには、部首番号（radical number）、画数（stroke count）、コードポイント（code point）、および文字自体（character）などの列を含めるものとします。


**推論 (Reasoning)**:
このサブタスクでは、`data_processor` をインポートし、CJK文字データをCSVファイルにエクスポートする新しいPythonスクリプト `generate_csv.py` を作成する必要があります。以下のコードブロックでは、必要なロジックを実装し、指定されたファイルを生成します。



In [9]:
%%writefile generate_csv.py
import csv
import data_processor

# --- 設定と実装の詳細 ---
OUTPUT_CSV_FILE = "cjk_characters.csv"

def generate_csv():
    print("1. Retrieving CJK data...")
    cjk_data = data_processor.get_cjk_data()

    if not cjk_data:
        print("   ERROR: Failed to retrieve CJK data. CSV generation aborted.")
        return

    print(f"   Successfully retrieved {len(cjk_data)} CJK characters.")
    print(f"2. Writing data to {OUTPUT_CSV_FILE}...")

    try:
        with open(OUTPUT_CSV_FILE, 'w', newline='', encoding='utf-8') as csvfile:
            # 辞書のキーに基づいたフィールド名の定義
            fieldnames = ['rad', 'str', 'cp', 'char']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            writer.writeheader() # ヘッダー行の書き込み
            for item in cjk_data:
                writer.writerow(item) # 各文字を1行ずつ書き込み

        print(f"   Successfully generated '{OUTPUT_CSV_FILE}' with {len(cjk_data)} entries.")
    except Exception as e:
        print(f"   CRITICAL ERROR: Failed to write CSV file. {e}")

if __name__ == "__main__":
    generate_csv()

Overwriting generate_csv.py


**推論 (Reasoning)**:
前のステップで `generate_csv.py` スクリプトが作成されました。次に、このサブタスクの要件に従い、スクリプトを実行して`cjk_characters.csv` ファイルを生成する必要があります。



In [5]:
!python generate_csv.py


1. Retrieving CJK data...
1. Downloading https://www.unicode.org/Public/UCD/latest/ucd/Unihan.zip...
   Download complete (8.12 MB).
2. Scanning ALL files in ZIP...
   8 files found in archive.
   -> Inspecting: Unihan_DictionaryIndices.txt
   -> Inspecting: Unihan_DictionaryLikeData.txt
   -> Inspecting: Unihan_IRGSources.txt
      SUCCESS! 102998 entries found in Unihan_IRGSources.txt.
   -> Inspecting: Unihan_NumericValues.txt
   -> Inspecting: Unihan_OtherMappings.txt
   -> Inspecting: Unihan_RadicalStrokeCounts.txt
   -> Inspecting: Unihan_Readings.txt
   -> Inspecting: Unihan_Variants.txt
   TOTAL: 102944 characters extracted.
   Successfully retrieved 102944 CJK characters.
2. Writing data to cjk_characters.csv...
   Successfully generated 'cjk_characters.csv' with 102944 entries.


## バックエンドAPIの開発 (Flask/FastAPI)

### サブタスク:
FastAPIを使用して、RESTful APIエンドポイント経由でCJK文字データを提供するPython Webアプリケーションを作成してください。このAPIは、起動時に `data_processor.py` を使用してデータを読み込みます。エンドポイントには、 `/api/characters` （部首や画数によるオプションのフィルター付き）や `/api/radicals`などを含めてください。


**推論 (Reasoning)**:
バックエンドAPIの開発を開始するために、新しいPythonファイル `api.py` を作成します。指示に従い、必要なFastAPIのセットアップ（インポート、アプリの初期化、起動時のデータ読み込み、フィルタリング機能付きの `/api/characters` エンドポイント、 `/api/radicals` エンドポイント、およびCORSミドルウェア）を実装します。これにより、必要なコンテンツを含む `api.py` ファイルを作成するという最初の指示に直接対応します。


In [6]:
%%writefile api.py
from fastapi import FastAPI, Query
from fastapi.middleware.cors import CORSMiddleware
import uvicorn
import data_processor
from typing import Optional, List

app = FastAPI(
    title="CJK Character API",
    description="API for CJK Unified Ideographs based on Unihan data.",
    version="1.0.0"
)

# CJKデータを格納するグローバル変数
cjk_data = []

# 開発用のCORS設定（全オリジンを許可）
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"], # 全オリジンの許可
    allow_credentials=True,
    allow_methods=["*"], # 全メソッドの許可
    allow_headers=["*"], # 全ヘッダーの許可
)

@app.on_event("startup")
async def load_cjk_data():
    global cjk_data
    print("API: Loading CJK data on startup...")
    data = data_processor.get_cjk_data()
    if data:
        # 起動時のデータソートによるフィルタリングの最適化
        data.sort(key=lambda x: (x['rad'], x['str'], x['cp']))
        cjk_data = data
        print(f"API: Loaded {len(cjk_data)} CJK characters.")
    else:
        print("API: Failed to load CJK data.")

@app.get("/api/characters", response_model=List[dict], summary="Get CJK Characters")
async def get_characters(
    radical: Optional[int] = Query(None, description="Filter by Kangxi radical number (1-214)"),
    strokes: Optional[int] = Query(None, description="Filter by additional stroke count")
):
    """
    Retrieve a list of CJK characters.

    You can filter the results by `radical` number and `strokes` count.
    """
    if not cjk_data:
        return []

    filtered_characters = cjk_data

    if radical is not None:
        filtered_characters = [char for char in filtered_characters if char['rad'] == radical]

    if strokes is not None:
        filtered_characters = [char for char in filtered_characters if char['str'] == strokes]

    return filtered_characters

@app.get("/api/radicals", response_model=List[str], summary="Get Kangxi Radicals")
async def get_radicals():
    """
    Retrieve the list of 214 Kangxi Radicals.
    """
    return data_processor.KANGXI_RADICALS

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)

Writing api.py


FastAPIバックエンド用の `api.py` スクリプトが正常に作成されました。

**FastAPIアプリケーションの実行方法:**

1.  **新しいターミナルまたはコマンドプロンプトを開きます。** (Google Colab の場合は、 `ランタイム > ランタイムのタイプを変更` and で `T4 GPU` を選択した後、 `ターミナル` セクションからターミナルを使用できます。)
2.  `api.py` が保存されているディレクトリ（例：`/content/`）に移動します。
3.  次のコマンドを実行します： `uvicorn api:app --host 0.0.0.0 --port 8000`

サーバーが起動すると、 `http://localhost:8000/docs` で API ドキュメント（Swagger UI）にアクセスできます（外部実行でパブリック IP が提供されている場合は、`localhost` をその IP に置き換えてください）。

**API エンドポイントの例:**

*   **全文字の取得:** `http://localhost:8000/api/characters`
*   **部首 1 でフィルタリング:** `http://localhost:8000/api/characters?radical=1`
*   **部首 1 かつ 4 画でフィルタリング:** `http://localhost:8000/api/characters?radical=1&strokes=4`
*   **康熙部首の一覧取得:** `http://localhost:8000/api/radicals`

**注意:** Colab のセルで `!python api.py` を直接実行すると、そのセルが無限にブロックされます。サーバーは別のプロセスまたはターミナルで実行することをお勧めします。

## Reactフロントエンドアプリケーションの開発

### サブタスク:
新しい React.js プロジェクトをセットアップし、Python API からデータを受信（コンシューム）するための準備を行ってください。
