Shapiro-Wilks Test

In [10]:
import numpy as np
from scipy.stats import shapiro
from scipy.stats import norm

In [11]:
# Sample data
data = np.array([12.7, 15.1, 14.2, 10.3, 18.4, 11.2, 17.5])
data_sorted = np.sort(data)
mean_data = np.mean(data)
print("Data :", data)
print("Sorted data: ", data_sorted)

Data : [12.7 15.1 14.2 10.3 18.4 11.2 17.5]
Sorted data:  [10.3 11.2 12.7 14.2 15.1 17.5 18.4]


Get a(i) coefficients (element of Shapiro-Wilks statistic):

In [12]:
# Step 3: Get the expected values under normal distribution
n = len(data_sorted)
expected_values = norm.ppf((np.arange(1, n + 1) - 0.375) / (n + 0.25))

# Step 4: Compute the a(i) coefficients (using a specific approximation)
a_i = expected_values / np.sqrt(np.sum(expected_values ** 2))

Get the Shapiro-Wilks statistic:

In [13]:
# Step 5: Compute W statistic manually
numerator = (np.sum(a_i * data_sorted)) ** 2
denominator = np.sum((data_sorted - mean_data) ** 2)
W_hand = numerator / denominator

In [14]:
# Step 6: Compute W and p-value using scipy for comparison
W_scipy, p_value = shapiro(data)

# Step 7: Compare the results
print(f"Manual calculation of W: {W_hand:.5f}")
print(f"Scipy calculation of W: {W_scipy:.5f}")
print(f"Scipy p-value: {p_value:.5f}")

# Step 8: Verify whether the values match
if np.isclose(W_hand, W_scipy, atol=1e-5):
    print("The manually calculated W matches the scipy calculated W.")
else:
    print("There is a discrepancy between the manually calculated W and the scipy calculated W.")

Manual calculation of W: 0.97292
Scipy calculation of W: 0.95368
Scipy p-value: 0.76307
There is a discrepancy between the manually calculated W and the scipy calculated W.
