Sign test

In [1]:
import numpy as np
import scipy.stats as stats

# Step 1: Generate Samples
np.random.seed(42)
sample1 = np.random.normal(0, 1, 100)  # Sample from a standard normal distribution
sample2 = np.random.normal(0.5, 1, 100)  # Sample from a normal distribution with a different mean

# Step 2: Sort the Samples
sample1_sorted = np.sort(sample1)
sample2_sorted = np.sort(sample2)

# Step 3: Compute the Empirical Distribution Function (EDF) for Sample1
n = len(sample1_sorted)
edf_sample1 = np.arange(1, n+1) / n

# Step 4: Compute the Theoretical CDF for a Standard Normal Distribution
cdf_theoretical = stats.norm.cdf(sample1_sorted, 0, 1)

# Step 5: Compute the One-Sample K-S Test Statistic by Hand
D_n = np.max(np.abs(edf_sample1 - cdf_theoretical))
print(f"Manual One-Sample K-S Test Statistic: {D_n}")

# Step 6: Compute the Empirical Distribution Function (EDF) for Sample2
m = len(sample2_sorted)
edf_sample2 = np.arange(1, m+1) / m

# Step 7: Combine both samples and compute the Two-Sample K-S Test Statistic by Hand
combined_sample = np.sort(np.concatenate((sample1_sorted, sample2_sorted)))
edf_sample1_combined = np.searchsorted(sample1_sorted, combined_sample, side='right') / n
edf_sample2_combined = np.searchsorted(sample2_sorted, combined_sample, side='right') / m

D_nm = np.max(np.abs(edf_sample1_combined - edf_sample2_combined))
print(f"Manual Two-Sample K-S Test Statistic: {D_nm}")

# Step 8: Compare with scipy library results

# One-Sample K-S test using scipy
D_n_scipy, p_value_1 = stats.kstest(sample1, 'norm')
print(f"scipy One-Sample K-S Test Statistic: {D_n_scipy}, p-value: {p_value_1}")

# Two-Sample K-S test using scipy
D_nm_scipy, p_value_2 = stats.ks_2samp(sample1, sample2)
print(f"scipy Two-Sample K-S Test Statistic: {D_nm_scipy}, p-value: {p_value_2}")

# Step 9: Check if manual and scipy results match
if np.isclose(D_n, D_n_scipy) and np.isclose(D_nm, D_nm_scipy):
    print("Manual and scipy results match.")
else:
    print("Manual and scipy results do not match.")

Manual One-Sample K-S Test Statistic: 0.10357070563896065
Manual Two-Sample K-S Test Statistic: 0.34
scipy One-Sample K-S Test Statistic: 0.10357070563896065, p-value: 0.2180555337851624
scipy Two-Sample K-S Test Statistic: 0.34, p-value: 1.605714367105695e-05
Manual and scipy results match.
