# AB test 통계 검정 코드 template
## abtest_auto() -> 이 함수는 AB test group 수에 따른 통계 검정 방식 자동 대응

1. AB test group 2 개인 경우 -> Z-test, chi-square, bayesian
2. AB test group 3 개인 경우 -> Z-test(다중보정), chi-square, bayesian

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import chi2_contingency, beta
from statsmodels.stats.proportion import proportions_ztest
from statsmodels.stats.multitest import multipletests

# ✅ 전체 float 출력 포맷 소수점 4자리로 고정 (현재 세션에만 적용. python 다시 시작하면 다시 설정해줘야 함.)
pd.set_option('display.float_format', '{:.4f}'.format)

def abtest_auto(groups, successes, trials, metric_name="Metric", samples=100_000):
    failures = trials - successes
    conversion_rates = successes / trials

    print("\n" + "#"*70)
    print(f"📊 {metric_name}\n" + "#"*70)

    group_summary = pd.DataFrame({
        'Group': groups,
        'Successes': successes,
        'Trials': trials,
        'Conversion Rate': pd.Series(conversion_rates).map(lambda x: f"{x*100:.2f}%")
    })
    print("\n✅ Group Summary (Successes, Trials, Conversion Rate)")
    display(group_summary)
    print(f"\n{'='*60}")

    if len(groups) == 2:
        # ✅ Z-test (2개 그룹)
        count = np.array([successes[0], successes[1]])
        nobs = np.array([trials[0], trials[1]])
        zstat, pval = proportions_ztest(count, nobs)
        print("\n✅ Z-test (2 groups)")
        print(f"- Z statistic: {zstat:.4f}")
        print(f"- p-value: {pval:.4f}")
        print(f"\n{'='*60}")

        # ✅ Chi-square test (2x2 contingency table)
        contingency = np.array([
            [successes[0], failures[0]],
            [successes[1], failures[1]]
        ])
        chi2, chi_p, _, _ = chi2_contingency(contingency)
        print("\n✅ Chi-square Test (2x2)")
        print(f"- Chi2 statistic: {chi2:.4f}")
        print(f"- p-value: {chi_p:.4f}")
        print(f"\n{'='*60}")

    else:
        # ✅ Chi-square test (3개 이상)
        contingency_table = np.array([successes, failures]).T
        chi2, p_value, _, _ = chi2_contingency(contingency_table)
        print("\n✅ Chi-square Test")
        print(f"- Chi2 statistic: {chi2:.4f}")
        print(f"- p-value: {p_value:.4f}")
        print(f"\n{'='*60}")

        # ✅ 사후 Pairwise Z-test (Holm-Bonferroni 보정, 빈도주의 보수적 방식)
        pairwise_results, pairs = [], []
        for i in range(len(groups)):
            for j in range(i + 1, len(groups)):
                count = np.array([successes[i], successes[j]])
                nobs = np.array([trials[i], trials[j]])
                _, p = proportions_ztest(count, nobs)
                pairwise_results.append(p)
                pairs.append(f"{groups[i]} vs {groups[j]}")

        reject, pvals_corrected, _, _ = multipletests(pairwise_results, alpha=0.05, method='holm')
        results_df = pd.DataFrame({
            'Comparison': pairs,
            'Uncorrected p-value': pairwise_results,
            'Corrected p-value (Holm)': pvals_corrected,
            'Significant': reject
        })
        print("\n✅ Pairwise Z-test (Holm-Bonferroni):")
        display(results_df)
        print(f"\n{'='*60}")


        # ✅ 사후 Pairwise Z-test (FDR 보정, 빈도주의 유연한 방식)
        reject_fdr, pvals_fdr_corrected, _, _ = multipletests(pairwise_results, alpha=0.05, method='fdr_bh')
        results_df_fdr = pd.DataFrame({
            'Comparison': pairs,
            'Uncorrected p-value': pairwise_results,
            'Corrected p-value (FDR)': pvals_fdr_corrected,
            'Significant (FDR)': reject_fdr
        })
        print("\n✅ Pairwise Z-test (FDR - Benjamini-Hochberg):")
        display(results_df_fdr)
        print(f"\n{'='*60}")

    # ✅ Bayesian analysis (공통)
    posterior_samples = {
        group: np.random.beta(1 + success, 1 + fail, samples)
        for group, success, fail in zip(groups, successes, failures)
    }

    plt.figure(figsize=(12, 7))
    for group in groups:
        sns.kdeplot(posterior_samples[group], label=f'Group {group}')
    plt.title('Posterior Distributions of Conversion Rates', fontsize=16)
    plt.xlabel('Conversion Rate')
    plt.ylabel('Density')
    plt.legend()
    plt.grid(True)
    plt.show()
    print(f"\n{'='*60}")

    # ✅ 각 그룹이 best일 확률
    best_group_count = {group: 0 for group in groups}
    for i in range(samples):
        best_group = max(posterior_samples, key=lambda g: posterior_samples[g][i])
        best_group_count[best_group] += 1

    print("\n✅ Bayesian: Probability that each group is best")
    for group, count in best_group_count.items():
        prob = count / samples
        print(f"Probability that {group} is the best: {prob:.2%}")
    print(f"\n{'='*60}")

    # ✅ 쌍별 우위 확률
    pairwise_bayes = []
    for i in range(len(groups)):
        for j in range(i + 1, len(groups)):
            g1, g2 = groups[i], groups[j]
            prob = np.mean(posterior_samples[g1] > posterior_samples[g2])
            pairwise_bayes.append({
                'Comparison': f"{g1} > {g2}",
                'P(Group1 > Group2)': prob,
                'P(Group1 < Group2)': 1 - prob
            })
    df_bayes = pd.DataFrame(pairwise_bayes)
    print("\n✅ Bayesian Pairwise Superiority Probabilities:")
    display(df_bayes)
    print(f"\n{'='*60}")



# ✅ 아래와 같이 세팅하여 사용하면 됨 ✅
# groups = ['A', 'B']
# groups = ['A', 'B', 'C']
# TOTAL_PCID = np.array([385312, 42586])
# SELL_HOME = np.array([32007, 40703])
# abtest_auto(groups, SELL_HOME, TOTAL_PCID, metric_name="Total PCID to Sell Home CVR")
