Sign test

In [4]:
import numpy as np
from scipy.stats import chi2_contingency

# Sample data: observed frequencies in a 4x4 contingency table
observed = np.array([[15, 20, 25, 30],
                     [30, 25, 20, 15],
                     [25, 30, 15, 20],
                     [20, 15, 30, 25]])

# Step 1: Calculate the marginal sums
n_i = observed.sum(axis=1)  # Row sums
n_j = observed.sum(axis=0)  # Column sums
n = observed.sum()  # Total sum

# Step 2: Calculate the expected frequencies assuming independence
expected = np.outer(n_i, n_j) / n

# Step 3: Calculate the Chi-Square test statistic by hand
chi2_statistic_by_hand = ((observed - expected)**2 / expected).sum()

# Degrees of freedom
df_by_hand = (observed.shape[0] - 1) * (observed.shape[1] - 1)

# Step 4: Calculate the p-value by hand
from scipy.stats import chi2
p_value_by_hand = 1 - chi2.cdf(chi2_statistic_by_hand, df_by_hand)

# Step 5: Using scipy to perform the chi-square test
chi2_statistic_lib, p_value_lib, df_lib, expected_lib = chi2_contingency(observed)

# Print the results
print("Chi-Square statistic (by hand):", chi2_statistic_by_hand)
print("Degrees of freedom (by hand):", df_by_hand)
print("P-value (by hand):", p_value_by_hand)
print("\nChi-Square statistic (scipy):", chi2_statistic_lib)
print("Degrees of freedom (scipy):", df_lib)
print("P-value (scipy):", p_value_lib)
print("\nExpected frequencies (scipy):\n", expected_lib)

# Step 6: Check if the manual calculation matches the library output
if (np.isclose(chi2_statistic_by_hand, chi2_statistic_lib) and 
    df_by_hand == df_lib and 
    np.isclose(p_value_by_hand, p_value_lib)):
    print("\nThe manual calculation matches the scipy output!")
else:
    print("\nThere is a discrepancy between the manual calculation and the scipy output.")

Chi-Square statistic (by hand): 22.22222222222222
Degrees of freedom (by hand): 9
P-value (by hand): 0.008200748379270095

Chi-Square statistic (scipy): 22.22222222222222
Degrees of freedom (scipy): 9
P-value (scipy): 0.008200748379270048

Expected frequencies (scipy):
 [[22.5 22.5 22.5 22.5]
 [22.5 22.5 22.5 22.5]
 [22.5 22.5 22.5 22.5]
 [22.5 22.5 22.5 22.5]]

The manual calculation matches the scipy output!
