# Statistical Significance Test (Wilcoxon Signed-Rank)
This notebook performs significance tests for model comparisons.


In [1]:
from scipy import stats
import json
import numpy as np
from pathlib import Path

evaluate_results_path = Path("./outputs/evaluate_result.jsonl")
evaluate_result = json.loads(evaluate_results_path.read_text(encoding="utf-8"))

# Model v2 的得分
scores_v2_semantic = evaluate_result["Modelv2"]["semantic"]
scores_v2_style_valid = evaluate_result["Modelv2"]["valid_style"]

# Baseline C 的得分
scores_baseC_semantic = evaluate_result["BaselineC"]["semantic"]
scores_baseC_style_valid = evaluate_result["BaselineC"]["valid_style"]


def check_significance(name, list1, list2):
    # 使用 Wilcoxon Signed-Rank Test (因为是同一组测试集上的成对比较，且非正态分布)
    list1 = np.asarray(list1, dtype=float)
    list2 = np.asarray(list2, dtype=float)

    stat, p_value = stats.wilcoxon(list1, list2)

    # ===== 手工计算 Z 值 =====
    d = list1 - list2
    d = d[d != 0]          # 去掉 0 差值
    n = len(d)

    if n == 0:
        Z = 0.0
    else:
        mu_W = n * (n + 1) / 4
        sigma_W = np.sqrt(n * (n + 1) * (2 * n + 1) / 24)

        # 连续性校正（推荐，与 SPSS 更接近）
        Z = (stat - mu_W + 0.5) / sigma_W

    print(f"--- {name} ---")
    print(f"Statistic: {stat}")
    print(f"Z-value: {Z:.4f}")
    print(f"P-value: {p_value}")

    if p_value < 0.05:
        print("Result: Significant Difference (显著差异)")
    else:
        print("Result: No Significant Difference (无显著差异)")
    print("\n")

print("=== Significance Test: Model v2 vs Baseline C ===\n")
check_significance("Semantic Score", scores_v2_semantic, scores_baseC_semantic)
check_significance("Valid Style Score", scores_v2_style_valid, scores_baseC_style_valid)

=== Significance Test: Model v2 vs Baseline C ===

--- Semantic Score ---
Statistic: 39519.0
Z-value: -17.0665
P-value: 2.6321583467485274e-65
Result: Significant Difference (显著差异)


--- Valid Style Score ---
Statistic: 96719.0
Z-value: -7.4291
P-value: 1.0929604330162161e-13
Result: Significant Difference (显著差异)


