In [6]:
import json
import scipy.stats as st
import numpy as np

def compute_accuracy_with_ci(predictions, confidence=0.95):
    """
    Compute accuracy and confidence intervals using both Wald and Wilson methods.

    :param predictions: List of binary predictions (0 or 1)
    :param confidence: Confidence level (default is 0.95 for 95% CI)
    :return: Dictionary with accuracy, Wald CI, and Wilson CI
    """
    n = len(predictions)
    if n == 0:
        raise ValueError("Prediction list cannot be empty")

    correct_predictions = sum(predictions)
    p_hat = correct_predictions / n

    # Z-score for the confidence interval
    z = st.norm.ppf(1 - (1 - confidence) / 2)

    # Wald Confidence Interval
    wald_margin = z * np.sqrt((p_hat * (1 - p_hat)) / n)
    wald_ci = (max(0, p_hat - wald_margin), min(1, p_hat + wald_margin))

    # Wilson Score Interval
    denominator = 1 + (z**2 / n)
    center_adjusted_probability = p_hat + (z**2 / (2 * n))
    adjusted_standard_error = np.sqrt((p_hat * (1 - p_hat) / n) + (z**2 / (4 * n**2)))

    lower_bound = (center_adjusted_probability - z * adjusted_standard_error) / denominator
    upper_bound = (center_adjusted_probability + z * adjusted_standard_error) / denominator
    wilson_ci = (max(0, lower_bound), min(1, upper_bound))

    return {
        "accuracy": p_hat,
        "wald_ci": wald_ci,
        "wilson_ci": wilson_ci
    }

# Example usage
predictions = [0, 1, 0,0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1,0, 1, 1, 1, 1]
result = compute_accuracy_with_ci(predictions)
print(json.dumps(result, indent=4))


{
    "accuracy": 0.6,
    "wald_ci": [
        0.4246954918846837,
        0.7753045081153163
    ],
    "wilson_ci": [
        0.4232036025332248,
        0.7540937188319814
    ]
}
