In [6]:
import numpy as np
import statsmodels.api as sm
from scipy.stats import norm

In [7]:
# Sample data

n1 = 100  # Sample size of group 1
successes1 = 50  # Number of successes in group 1
p1 = successes1 / n1

n2 = 120  # Sample size of group 2
successes2 = 60  # Number of successes in group 2
p2 = successes2 / n2

# Combined proportion
p_combined = (successes1 + successes2) / (n1 + n2)

In [8]:
# Calculate the standard error
se = np.sqrt(p_combined * (1 - p_combined) * (1/n1 + 1/n2))

# Calculate the z-value manually
z_manual = (p1 - p2) / se # shouldn't it be + (not -)

# Calculate the p-value manually
p_value_manual = 2 * (1 - norm.cdf(abs(z_manual)))

In [9]:
# Now, let's compare it with statsmodels
count = np.array([successes1, successes2])
nobs = np.array([n1, n2])

# Using statsmodels to calculate the test statistic and p-value
z_stat, p_value_lib = sm.stats.proportions_ztest(count, nobs)

In [10]:
# Display the manual results
print(f"Manual Z-value: {z_manual}")
print(f"Manual p-value: {p_value_manual}")

# Display the results from statsmodels
print(f"Library Z-value: {z_stat}")
print(f"Library p-value: {p_value_lib}")

Manual Z-value: 0.0
Manual p-value: 1.0
Library Z-value: 0.0
Library p-value: 1.0
