# Comparison of a Z-test (parametric) and a Simulation Approach

## Average Male Height 
The average height of men in the United States is roughly 70 inches with a standard deviation of 3 inches (5 foot 10, plus or minus 3 inches). Assume we had a population of men who had received some sort of growth hormone that on average made them grow an inch taller.  

Could we reliably detect this difference with a sample size of 30? How about 50, 100, or 150?

In [None]:
import numpy as np
import scipy.stats as stats

# Parameters for the normal distribution
mu = 70  # Mean
sigma = 3  # Standard deviation
sample_size = 150

# Height shift
shift = 1

# Create the normal distribution
normal_dist = stats.norm(loc=mu, scale=sigma)
normal_dist_shifted = stats.norm(loc=mu+shift, scale=sigma)

# Sample 40 values from the normal distribution
sample = normal_dist_shifted.rvs(size=sample_size)


# Calculate the sample mean
sample_mean = np.mean(sample)

print(f"Sample Mean: {sample_mean}")

## Z-score approach

In [None]:
# Z-test (classical approach)
# Calculate the standard error of the mean
standard_error = sigma / np.sqrt(sample_size)

# Calculate the z-score
z_score = (sample_mean - mu) / standard_error

# Calculate the p-value (one-tailed, right-tailed test)
p_value_z = stats.norm.sf(z_score)

print(f"Z-score: {z_score}")
print(f"P-value (Z-test): {p_value_z}")

## Simulation approach

In [None]:
# Resampling approach (permutation test)
num_resamples = 10000
resampled_means = []

for _ in range(num_resamples):
    resampled_sample = normal_dist.rvs(size=sample_size)
    resampled_mean = np.mean(resampled_sample)
    resampled_means.append(resampled_mean)

# Calculate the p-value (proportion of resampled means >= sample mean)
p_value_resampling = np.count_nonzero(np.array(resampled_means) >= sample_mean) / num_resamples

print(f"P-value (Simulation): {p_value_resampling}")

## Plot the results

In [None]:
# Optional: Visualize the results
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))

# Original sample histogram
plt.subplot(1, 2, 1)
plt.hist(sample, bins=10, alpha=0.6, label='Sample')
plt.axvline(sample_mean, color='red', linestyle='dashed', linewidth=2, label=f'Sample Mean: {sample_mean:.2f}')
plt.axvline(mu, color='green', linestyle='dashed', linewidth=2, label=f'Population Mean: {mu}')
plt.title('Sample Distribution')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.legend()

# Resampled means histogram
plt.subplot(1, 2, 2)
plt.hist(resampled_means, bins=30, alpha=0.6, label='Simulation Means')
plt.axvline(sample_mean, color='red', linestyle='dashed', linewidth=2, label=f'Sample Mean: {sample_mean:.2f}')
plt.title('Simulation Means Distribution (Null Hypothesis)')
plt.xlabel('Simulation Mean')
plt.ylabel('Frequency')
plt.legend()

plt.tight_layout()
plt.show()