# 03. Gold Layer: 有病率・治療パターン分析 (SQL版)

## 概要
Silverデータを用いて論文の主要結果を再現します。
本ノートブックはDuckDBを使用してSQLで処理を実行します。

## 分析内容
1. **有病率の推定** (Table 1相当)
2. **年齢層別有病率と性別比** (Table 2相当)
3. **年齢層別薬剤使用パターン** (Table 3相当)
4. **手術・検査実施率** (Table 4相当)
5. **結果の可視化と論文との比較**

In [None]:
import duckdb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['font.family'] = 'DejaVu Sans'
import os

# データディレクトリ
BRONZE_DIR = "../data/bronze"
SILVER_DIR = "../data/silver"
GOLD_DIR = "../data/gold"
os.makedirs(GOLD_DIR, exist_ok=True)

# DuckDB接続
con = duckdb.connect()

print("Gold Layer分析を開始します... (SQL版)")

## 1. データの読み込み

In [None]:
# データをビューとして登録
con.execute(f"CREATE OR REPLACE VIEW ra_master AS SELECT * FROM read_parquet('{SILVER_DIR}/ra_patients_def3.parquet')")
con.execute(f"CREATE OR REPLACE VIEW ra_definitions AS SELECT * FROM read_parquet('{SILVER_DIR}/ra_definitions_summary.parquet')")
con.execute(f"CREATE OR REPLACE VIEW patients AS SELECT * FROM read_parquet('{BRONZE_DIR}/patients.parquet')")

# レコード数を確認
ra_count = con.execute("SELECT COUNT(*) FROM ra_master").fetchone()[0]
total_count = con.execute("SELECT COUNT(*) FROM patients").fetchone()[0]

print(f"RA患者数 (Definition 3): {ra_count:,}")
print(f"総患者数: {total_count:,}")

## 2. Table 1相当: RA定義別有病率

In [None]:
# 論文のTable 1の値（参考）
paper_table1 = {
    'Definition 0': {'n_patients': 1116122, 'prevalence': 0.88},
    'Definition 2': {'n_patients': 869340, 'prevalence': 0.69},
    'Definition 3': {'n_patients': 825772, 'prevalence': 0.65},
    'Definition 4': {'n_patients': 583137, 'prevalence': 0.46}
}

# 総患者数を取得
total_population = con.execute("SELECT COUNT(*) FROM patients").fetchone()[0]

# 定義別の患者数と有病率を取得
df_definitions = con.execute("""
    SELECT 
        definition,
        n_patients,
        prevalence_pct
    FROM ra_definitions
""").fetchdf()

# 結果テーブルを作成
table1_data = []
for _, row in df_definitions.iterrows():
    def_name = row['definition'].replace('def_', 'Definition ')
    n_patients = int(row['n_patients'])
    prevalence = row['prevalence_pct']
    
    paper_val = paper_table1.get(def_name, {'n_patients': 'N/A', 'prevalence': 'N/A'})
    
    table1_data.append({
        'Definition': def_name,
        'N_patients (Reprod)': n_patients,
        'Prevalence % (Reprod)': f"{prevalence:.2f}",
        'Prevalence % (Paper)': paper_val['prevalence']
    })

df_table1 = pd.DataFrame(table1_data)
print("=" * 70)
print("Table 1: RA定義別の患者数と有病率")
print("=" * 70)
print(df_table1.to_string(index=False))
print(f"\n注: 論文は日本人口約1.27億人を分母、再現は{total_population:,}人を分母")

## 3. Table 2相当: 年齢層別有病率と性別比

In [None]:
# 年齢群の順序
age_group_order = ["16-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80-84", "85+"]

# 論文Table 2の値（参考）
paper_table2 = {
    "16-19": {"n": 4389, "pct": 0.5, "female_pct": 64.0, "fm_ratio": 1.77, "prevalence": 0.03},
    "20-29": {"n": 12253, "pct": 1.5, "female_pct": 79.8, "fm_ratio": 3.94, "prevalence": 0.07},
    "30-39": {"n": 39519, "pct": 4.8, "female_pct": 80.7, "fm_ratio": 3.61, "prevalence": 0.27},
    "40-49": {"n": 83519, "pct": 10.1, "female_pct": 80.7, "fm_ratio": 4.18, "prevalence": 0.44},
    "50-59": {"n": 123201, "pct": 14.9, "female_pct": 79.3, "fm_ratio": 3.84, "prevalence": 0.79},
    "60-69": {"n": 218326, "pct": 26.4, "female_pct": 76.9, "fm_ratio": 3.33, "prevalence": 1.23},
    "70-79": {"n": 236137, "pct": 28.6, "female_pct": 74.3, "fm_ratio": 2.89, "prevalence": 1.63},
    "80-84": {"n": 50558, "pct": 6.1, "female_pct": 74.2, "fm_ratio": 2.88, "prevalence": 1.14},
    "85+": {"n": 57870, "pct": 7.0, "female_pct": 77.7, "fm_ratio": 3.49, "prevalence": 0.89}
}

# SQLで年齢層別統計を計算
df_table2_raw = con.execute("""
    WITH ra_stats AS (
        SELECT 
            age_group,
            COUNT(*) AS n_ra,
            SUM(CASE WHEN sex = '2' THEN 1 ELSE 0 END) AS n_female,
            SUM(CASE WHEN sex = '1' THEN 1 ELSE 0 END) AS n_male
        FROM ra_master
        GROUP BY age_group
    ),
    pop_stats AS (
        SELECT 
            age_group,
            COUNT(*) AS n_all
        FROM patients
        GROUP BY age_group
    ),
    total_ra AS (
        SELECT COUNT(*) AS total FROM ra_master
    )
    SELECT 
        rs.age_group,
        rs.n_ra,
        ROUND(rs.n_ra * 100.0 / tr.total, 1) AS pct_of_total,
        ROUND(rs.n_female * 100.0 / rs.n_ra, 1) AS female_pct,
        ROUND(rs.n_female * 1.0 / NULLIF(rs.n_male, 0), 2) AS fm_ratio,
        ROUND(rs.n_ra * 100.0 / NULLIF(ps.n_all, 0), 2) AS prevalence
    FROM ra_stats rs
    LEFT JOIN pop_stats ps ON rs.age_group = ps.age_group
    CROSS JOIN total_ra tr
    ORDER BY 
        CASE rs.age_group 
            WHEN '16-19' THEN 1 WHEN '20-29' THEN 2 WHEN '30-39' THEN 3
            WHEN '40-49' THEN 4 WHEN '50-59' THEN 5 WHEN '60-69' THEN 6
            WHEN '70-79' THEN 7 WHEN '80-84' THEN 8 WHEN '85+' THEN 9
        END
""").fetchdf()

# 合計行を追加
total_stats = con.execute("""
    SELECT 
        'Total' AS age_group,
        COUNT(*) AS n_ra,
        100.0 AS pct_of_total,
        ROUND(SUM(CASE WHEN sex = '2' THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 1) AS female_pct,
        ROUND(SUM(CASE WHEN sex = '2' THEN 1 ELSE 0 END) * 1.0 / 
              NULLIF(SUM(CASE WHEN sex = '1' THEN 1 ELSE 0 END), 0), 2) AS fm_ratio,
        ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM patients), 2) AS prevalence
    FROM ra_master
""").fetchdf()

df_table2_raw = pd.concat([df_table2_raw, total_stats], ignore_index=True)

# 論文の値と比較するためのフォーマット
table2_data = []
for _, row in df_table2_raw.iterrows():
    ag = row['age_group']
    paper = paper_table2.get(ag, {})
    
    table2_data.append({
        'Age Group': ag,
        'N': int(row['n_ra']),
        '% of Total': f"{row['pct_of_total']:.1f}",
        'Female %': f"{row['female_pct']:.1f}",
        'F/M Ratio': f"{row['fm_ratio']:.2f}" if row['fm_ratio'] else 'N/A',
        'Prevalence %': f"{row['prevalence']:.2f}",
        '(Paper) %': paper.get('pct', '100.0' if ag == 'Total' else 'N/A'),
        '(Paper) Prevalence': paper.get('prevalence', '0.65' if ag == 'Total' else 'N/A')
    })

df_table2 = pd.DataFrame(table2_data)

print("=" * 90)
print("Table 2: 年齢層別RA患者数、性別比、有病率")
print("=" * 90)
print(df_table2.to_string(index=False))

## 4. Table 3相当: 年齢層別薬剤使用パターン

In [None]:
# 論文Table 3の値（参考、一部抜粋）
paper_table3_mtx = {
    "16-19": 59.9, "20-29": 60.9, "30-39": 64.7, "40-49": 69.9,
    "50-59": 73.1, "60-69": 70.9, "70-79": 60.4, "80-84": 50.5, "85+": 38.2
}

paper_table3_bdmard = {
    "16-19": 50.9, "20-29": 42.9, "30-39": 34.8, "40-49": 30.9,
    "50-59": 27.9, "60-69": 22.4, "70-79": 17.8, "80-84": 15.0, "85+": 13.7
}

# SQLで年齢層別薬剤使用率を計算
df_table3_raw = con.execute("""
    SELECT 
        age_group,
        COUNT(*) AS N,
        ROUND(AVG(MTX) * 100, 1) AS MTX,
        ROUND(AVG(SSZ) * 100, 1) AS SSZ,
        ROUND(AVG(BUC) * 100, 1) AS BUC,
        ROUND(AVG(TAC) * 100, 1) AS TAC,
        ROUND(AVG(IGT) * 100, 1) AS IGT,
        ROUND(AVG(LEF) * 100, 1) AS LEF,
        ROUND(AVG(TNFI) * 100, 1) AS TNFI,
        ROUND(AVG(IL6I) * 100, 1) AS IL6I,
        ROUND(AVG(ABT) * 100, 1) AS ABT,
        ROUND(AVG(JAKi) * 100, 1) AS JAKi,
        ROUND(AVG(CS) * 100, 1) AS CS,
        ROUND(AVG(bDMARDs) * 100, 1) AS bDMARDs,
        ROUND(AVG(TNFI) / NULLIF(AVG(ABT), 0), 1) AS TNFI_ABT_ratio
    FROM ra_master
    GROUP BY age_group
    ORDER BY 
        CASE age_group 
            WHEN '16-19' THEN 1 WHEN '20-29' THEN 2 WHEN '30-39' THEN 3
            WHEN '40-49' THEN 4 WHEN '50-59' THEN 5 WHEN '60-69' THEN 6
            WHEN '70-79' THEN 7 WHEN '80-84' THEN 8 WHEN '85+' THEN 9
        END
""").fetchdf()

# 合計行を追加
total_drugs = con.execute("""
    SELECT 
        'Total' AS age_group,
        COUNT(*) AS N,
        ROUND(AVG(MTX) * 100, 1) AS MTX,
        ROUND(AVG(SSZ) * 100, 1) AS SSZ,
        ROUND(AVG(BUC) * 100, 1) AS BUC,
        ROUND(AVG(TAC) * 100, 1) AS TAC,
        ROUND(AVG(IGT) * 100, 1) AS IGT,
        ROUND(AVG(LEF) * 100, 1) AS LEF,
        ROUND(AVG(TNFI) * 100, 1) AS TNFI,
        ROUND(AVG(IL6I) * 100, 1) AS IL6I,
        ROUND(AVG(ABT) * 100, 1) AS ABT,
        ROUND(AVG(JAKi) * 100, 1) AS JAKi,
        ROUND(AVG(CS) * 100, 1) AS CS,
        ROUND(AVG(bDMARDs) * 100, 1) AS bDMARDs,
        ROUND(AVG(TNFI) / NULLIF(AVG(ABT), 0), 1) AS TNFI_ABT_ratio
    FROM ra_master
""").fetchdf()

df_table3_raw = pd.concat([df_table3_raw, total_drugs], ignore_index=True)

# 表示用にフォーマット
df_table3 = df_table3_raw.copy()
df_table3.columns = ['Age Group', 'N', 'MTX', 'SSZ', 'BUC', 'TAC', 'IGT', 'LEF', 
                      'TNFI', 'IL6I', 'ABT', 'JAKi', 'CS', 'bDMARDs', 'TNFI/ABT']

print("=" * 120)
print("Table 3: 年齢層別薬剤使用率 (%)")
print("=" * 120)
display_cols = ['Age Group', 'N', 'MTX', 'SSZ', 'BUC', 'TNFI', 'IL6I', 'ABT', 'JAKi', 'CS', 'bDMARDs', 'TNFI/ABT']
print(df_table3[display_cols].to_string(index=False))

print("\n【論文との比較（MTX使用率）】")
for ag in age_group_order:
    reprod = df_table3[df_table3['Age Group'] == ag]['MTX'].values[0]
    paper = paper_table3_mtx.get(ag, 'N/A')
    print(f"  {ag}: 再現={reprod}%, 論文={paper}%")

## 5. Table 4相当: 手術・検査実施率

In [None]:
# 論文Table 4の値（参考）
paper_table4 = {
    'TJR': 0.93,
    'ARTHROPLASTY': 0.32,
    'SYNOVECTOMY': 0.13,
    'Total_Surgery': 1.35
}

# SQLで年齢層別手術・検査実施率を計算
df_table4_raw = con.execute("""
    SELECT 
        age_group,
        COUNT(*) AS N,
        ROUND(AVG(TJR) * 100, 2) AS TJR,
        ROUND(AVG(ARTHROPLASTY) * 100, 2) AS ARTHROPLASTY,
        ROUND(AVG(SYNOVECTOMY) * 100, 2) AS SYNOVECTOMY,
        ROUND(AVG(ULTRASOUND) * 100, 2) AS ULTRASOUND,
        ROUND(AVG(BMD) * 100, 2) AS BMD,
        ROUND(AVG(any_RA_surgery) * 100, 2) AS any_RA_surgery
    FROM ra_master
    GROUP BY age_group
    ORDER BY 
        CASE age_group 
            WHEN '16-19' THEN 1 WHEN '20-29' THEN 2 WHEN '30-39' THEN 3
            WHEN '40-49' THEN 4 WHEN '50-59' THEN 5 WHEN '60-69' THEN 6
            WHEN '70-79' THEN 7 WHEN '80-84' THEN 8 WHEN '85+' THEN 9
        END
""").fetchdf()

# 合計行を追加
total_procs = con.execute("""
    SELECT 
        'Total' AS age_group,
        COUNT(*) AS N,
        ROUND(AVG(TJR) * 100, 2) AS TJR,
        ROUND(AVG(ARTHROPLASTY) * 100, 2) AS ARTHROPLASTY,
        ROUND(AVG(SYNOVECTOMY) * 100, 2) AS SYNOVECTOMY,
        ROUND(AVG(ULTRASOUND) * 100, 2) AS ULTRASOUND,
        ROUND(AVG(BMD) * 100, 2) AS BMD,
        ROUND(AVG(any_RA_surgery) * 100, 2) AS any_RA_surgery
    FROM ra_master
""").fetchdf()

df_table4_raw = pd.concat([df_table4_raw, total_procs], ignore_index=True)

# カラム名を整形
df_table4 = df_table4_raw.copy()
df_table4.columns = ['Age Group', 'N', 'TJR', 'ARTHROPLASTY', 'SYNOVECTOMY', 'ULTRASOUND', 'BMD', 'any_RA_surgery']

print("=" * 100)
print("Table 4: 年齢層別手術・検査実施率 (%)")
print("=" * 100)
print(df_table4.to_string(index=False))

print("\n【論文との比較（全年齢）】")
for proc, paper_val in paper_table4.items():
    if proc in df_table4.columns:
        reprod_val = df_table4[df_table4['Age Group'] == 'Total'][proc].values[0]
        print(f"  {proc}: 再現={reprod_val}%, 論文={paper_val}%")

## 6. 結果の可視化

In [None]:
# 可視化用のデータを取得
df_viz = con.execute("""
    SELECT 
        age_group,
        COUNT(*) AS n,
        AVG(MTX) * 100 AS mtx_rate,
        AVG(bDMARDs) * 100 AS bdmard_rate,
        AVG(TNFI) / NULLIF(AVG(ABT), 0) AS tnfi_abt_ratio
    FROM ra_master
    GROUP BY age_group
    ORDER BY 
        CASE age_group 
            WHEN '16-19' THEN 1 WHEN '20-29' THEN 2 WHEN '30-39' THEN 3
            WHEN '40-49' THEN 4 WHEN '50-59' THEN 5 WHEN '60-69' THEN 6
            WHEN '70-79' THEN 7 WHEN '80-84' THEN 8 WHEN '85+' THEN 9
        END
""").fetchdf()

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# 1. 年齢層別RA患者分布
ax1 = axes[0, 0]
ax1.bar(range(len(age_group_order)), df_viz['n'].values, color='steelblue', alpha=0.7)
ax1.set_xticks(range(len(age_group_order)))
ax1.set_xticklabels(age_group_order, rotation=45)
ax1.set_xlabel('Age Group')
ax1.set_ylabel('Number of RA Patients')
ax1.set_title('Age Distribution of RA Patients (Definition 3)')
ax1.grid(axis='y', alpha=0.3)

# 2. 年齢層別MTX使用率（論文との比較）
ax2 = axes[0, 1]
mtx_reprod = df_viz['mtx_rate'].values
mtx_paper = [paper_table3_mtx.get(ag, 0) for ag in age_group_order]

x = np.arange(len(age_group_order))
width = 0.35
ax2.bar(x - width/2, mtx_reprod, width, label='Reproduced', color='steelblue', alpha=0.7)
ax2.bar(x + width/2, mtx_paper, width, label='Paper', color='coral', alpha=0.7)
ax2.set_xticks(x)
ax2.set_xticklabels(age_group_order, rotation=45)
ax2.set_xlabel('Age Group')
ax2.set_ylabel('MTX Usage Rate (%)')
ax2.set_title('MTX Usage by Age Group')
ax2.legend()
ax2.grid(axis='y', alpha=0.3)

# 3. 年齢層別bDMARDs使用率
ax3 = axes[1, 0]
bdmard_reprod = df_viz['bdmard_rate'].values
bdmard_paper = [paper_table3_bdmard.get(ag, 0) for ag in age_group_order]

ax3.bar(x - width/2, bdmard_reprod, width, label='Reproduced', color='steelblue', alpha=0.7)
ax3.bar(x + width/2, bdmard_paper, width, label='Paper', color='coral', alpha=0.7)
ax3.set_xticks(x)
ax3.set_xticklabels(age_group_order, rotation=45)
ax3.set_xlabel('Age Group')
ax3.set_ylabel('bDMARDs Usage Rate (%)')
ax3.set_title('bDMARDs Usage by Age Group')
ax3.legend()
ax3.grid(axis='y', alpha=0.3)

# 4. TNFI/ABT比率の年齢変化
ax4 = axes[1, 1]
tnfi_abt_ratio = df_viz['tnfi_abt_ratio'].fillna(0).values
ax4.plot(age_group_order, tnfi_abt_ratio, 'o-', color='darkgreen', linewidth=2, markersize=8)
ax4.set_xlabel('Age Group')
ax4.set_ylabel('TNFI/ABT Ratio')
ax4.set_title('TNFI to ABT Usage Ratio by Age')
ax4.tick_params(axis='x', rotation=45)
ax4.grid(alpha=0.3)

plt.tight_layout()
plt.savefig(f"{GOLD_DIR}/analysis_results.png", dpi=150, bbox_inches='tight')
plt.show()

print(f"\n図を保存しました: {GOLD_DIR}/analysis_results.png")

## 7. 主要結果のサマリー

In [None]:
# SQLで主要指標を一括計算
summary_stats = con.execute("""
    SELECT 
        COUNT(*) AS total_ra,
        (SELECT COUNT(*) FROM patients) AS total_pop,
        ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM patients), 2) AS prevalence,
        ROUND(SUM(CASE WHEN sex = '2' THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 1) AS female_ratio,
        ROUND(SUM(CASE WHEN age >= 65 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 1) AS elderly_65_ratio,
        ROUND(SUM(CASE WHEN age >= 85 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 1) AS elderly_85_ratio,
        ROUND(AVG(MTX) * 100, 1) AS mtx_rate,
        ROUND(AVG(bDMARDs) * 100, 1) AS bdmard_rate,
        ROUND(AVG(CS) * 100, 1) AS cs_rate,
        ROUND(AVG(any_RA_surgery) * 100, 2) AS surgery_rate
    FROM ra_master
""").fetchone()

# サマリーテーブル
summary_data = {
    'Metric': [
        'Total RA Patients (Definition 3)',
        'Prevalence (%)',
        'Female Ratio (%)',
        'Age >= 65 years (%)',
        'Age >= 85 years (%)',
        'MTX Usage (%)',
        'bDMARDs Usage (%)',
        'Corticosteroid Usage (%)',
        'RA Surgery Rate (%)'
    ],
    'Reproduced': [
        f"{summary_stats[0]:,}",
        f"{summary_stats[2]}",
        f"{summary_stats[3]}",
        f"{summary_stats[4]}",
        f"{summary_stats[5]}",
        f"{summary_stats[6]}",
        f"{summary_stats[7]}",
        f"{summary_stats[8]}",
        f"{summary_stats[9]}"
    ],
    'Paper': [
        '825,772',
        '0.65',
        '76.3',
        '60.8',
        '7.0',
        '63.4',
        '22.9',
        '~45',
        '1.35'
    ]
}

df_summary = pd.DataFrame(summary_data)

print("=" * 70)
print("主要結果サマリー: 再現 vs 論文")
print("=" * 70)
print(df_summary.to_string(index=False))

## 8. Goldデータの保存

In [None]:
# 結果テーブルの保存
df_table1.to_parquet(f"{GOLD_DIR}/table1_definitions.parquet", index=False)
df_table2.to_parquet(f"{GOLD_DIR}/table2_age_distribution.parquet", index=False)
df_table3.to_parquet(f"{GOLD_DIR}/table3_medication.parquet", index=False)
df_table4.to_parquet(f"{GOLD_DIR}/table4_procedures.parquet", index=False)
df_summary.to_parquet(f"{GOLD_DIR}/summary.parquet", index=False)

# CSVでも保存（確認用）
df_table1.to_csv(f"{GOLD_DIR}/table1_definitions.csv", index=False)
df_table2.to_csv(f"{GOLD_DIR}/table2_age_distribution.csv", index=False)
df_table3.to_csv(f"{GOLD_DIR}/table3_medication.csv", index=False)
df_table4.to_csv(f"{GOLD_DIR}/table4_procedures.csv", index=False)
df_summary.to_csv(f"{GOLD_DIR}/summary.csv", index=False)

print("Goldデータを保存しました:")
print(f"  - table1_definitions.parquet/csv")
print(f"  - table2_age_distribution.parquet/csv")
print(f"  - table3_medication.parquet/csv")
print(f"  - table4_procedures.parquet/csv")
print(f"  - summary.parquet/csv")
print(f"  - analysis_results.png")

## 9. 論文の主要な発見との対応

In [None]:
print("=" * 70)
print("論文の主要な発見と再現結果の対応")
print("=" * 70)

findings = [
    {
        "finding": "1. RA有病率は0.65%",
        "paper": "825,772人、有病率0.65%",
        "reproduced": f"{summary_stats[0]:,}人、有病率{summary_stats[2]}%"
    },
    {
        "finding": "2. 女性が76.3%を占める",
        "paper": "76.3%",
        "reproduced": f"{summary_stats[3]}%"
    },
    {
        "finding": "3. 65歳以上が60.8%",
        "paper": "60.8%",
        "reproduced": f"{summary_stats[4]}%"
    },
    {
        "finding": "4. 70-79歳群で最高の有病率1.63%",
        "paper": "1.63%",
        "reproduced": "(計算済み - Table 2参照)"
    },
    {
        "finding": "5. MTX使用率は年齢とともに減少",
        "paper": "40-49歳: 69.9% -> 85歳以上: 38.2%",
        "reproduced": "(グラフで確認可能)"
    },
    {
        "finding": "6. bDMARDs使用率は若年で高い",
        "paper": "16-19歳: 50.9% -> 85歳以上: 13.7%",
        "reproduced": "(グラフで確認可能)"
    },
    {
        "finding": "7. TNFI/ABT比は年齢とともに減少",
        "paper": "16-19歳: 24.0:1 -> 85歳以上: 1.7:1",
        "reproduced": "(グラフで確認可能)"
    }
]

for f in findings:
    print(f"\n{f['finding']}")
    print(f"  論文: {f['paper']}")
    print(f"  再現: {f['reproduced']}")

print("\n" + "=" * 70)
print("Gold Layer 分析完了 (SQL版)")
print("=" * 70)

## 10. 勘所のまとめ

### このノートブックで学べること

#### データフローの勘所
1. **Bronze -> Silver**: 生データに対して「定義」を適用して分析対象を絞り込む
   - ICD-10コードによる疾患同定
   - 処方月数による患者定義の精緻化

2. **Silver -> Gold**: 分析単位での集計と指標算出
   - 年齢群別の集計
   - 使用率・実施率の計算

#### NDBデータ解析の勘所
1. **患者同定**: 診断コード（ICD-10）だけでなく、治療薬処方も組み合わせる
2. **処方月数**: 「2ヶ月以上」などの閾値で患者定義を厳格化
3. **年齢層別分析**: 高齢者の治療パターンは若年者と異なる
4. **性別比**: RAは女性に多い（F/M比で評価）

#### 論文再現のポイント
1. 論文の「定義」を正確に理解・実装する
2. 複数の定義を比較して妥当性を確認
3. 結果を論文の表と照合して検証

In [None]:
# 接続を閉じる
con.close()