In [None]:
import numpy as np
from scipy.stats import ttest_1samp

# Parameters
alpha_target = 0.05 # Desired Type I error
beta_target = 0.2 # Desired Type II error (power = 0.8)
mu_null = 0 # Mean under H0
mu_alt = 0.5 # Mean under H1
sigma = 1 # Std Deviation
#n_sim = 10000 # Monte Carlo iterations
n_sim = 500 # Monte Carlo iterations
def simulate_type_errors(n):
    # Simulate Type I Error
    data_H0 = np.random.normal(mu_null, sigma, size=(n_sim, n))
    pvals_H0 = [ttest_1samp(row, mu_null).pvalue for row in data_H0]
    type_I_error = np.mean(np.array(pvals_H0) < alpha_target)

    # Simulate Type II Error
    data_H1 = np.random.normal(mu_alt, sigma, size=(n_sim, n))
    pvals_H1 = [ttest_1samp(row, mu_null).pvalue for row in data_H1]
    type_II_error = np.mean(np.array(pvals_H1) >= alpha_target)

    return type_I_error, type_II_error

# Search for the smallest n meeting both error constraints
for n in range(2,200):
    type_I, type_II = simulate_type_errors(n)
    if type_I <= alpha_target and type_II <= beta_target:
        print(f"Minimum n = {n}")
        print(f"Type I error = ~{type_I:.3f}, Type II error = ~{type_II:.3f}")
        break


In [None]:
import numpy as np
from scipy.stats import ttest_ind

# Parameters
alpha_target = 0.05 # Desired Type I error
beta_target = 0.2 # Desired Type II error (power = 0.8)
mu_null = 0 # Mean under H0
mu_alt = 0.5 # Mean under H1
sigma = 1 # Std Deviation
#n_sim = 10000 # Monte Carlo iterations
n_sim = 500 # Monte Carlo iterations

def simulate_type_errors(n):
    # H0: same mean
    group1_H0 = np.random.normal(mu_null, sigma, size=(n_sim, n))
    group2_H0 = np.random.normal(mu_null, sigma, size=(n_sim, n))
    pvals_H0 = [ttest_ind(g1, g2, equal_var=True).pvalue 
                for g1, g2 in zip(group1_H0, group2_H0)]
    type_I_error = np.mean(np.array(pvals_H0) < alpha_target)

    # H1: different means
    group1_H1 = np.random.normal(mu_null, sigma, size=(n_sim, n))
    group2_H1 = np.random.normal(mu_alt, sigma, size=(n_sim, n))
    pvals_H1 = [ttest_ind(g1, g2, equal_var=True).pvalue 
                for g1, g2 in zip(group1_H1, group2_H1)]
    type_II_error = np.mean(np.array(pvals_H1) >= alpha_target)

    return type_I_error, type_II_error

# Search for the smallest n meeting both error constraints
for n in range(2,200):
    type_I, type_II = simulate_type_errors(n)
    if type_I <= alpha_target and type_II <= beta_target:
        print(f"Minimum n = {n}")
        print(f"Type I error = ~{type_I:.3f}, Type II error = ~{type_II:.3f}")
        break


In [None]:
import numpy as np
from scipy.stats import chi2_contingency
import matplotlib.pyplot as plt

# Parameters
alpha_target = 0.05    # Desired Type I error
beta_target = 0.2      # Desired Type II error (power = 0.8)
n_sim = 500         # Monte Carlo iterations

def simulate_type_errors(n):
    """
    Simulate Type I and Type II error rates for a chi-square test of independence
    on a 2x2 table, with n samples per group.
    """
    # ----- Type I Error -----
    pvals_H0 = []
    for _ in range(n_sim):
        # Under H0: both groups have the same probability distribution
        group1 = np.random.binomial(1, 0.5, n)  # binary outcome (0 or 1)
        group2 = np.random.binomial(1, 0.5, n)
        
        # Build contingency table
        table = np.array([
            [np.sum(group1 == 0), np.sum(group1 == 1)],
            [np.sum(group2 == 0), np.sum(group2 == 1)]
        ], dtype=float) # convert to ifloat so we can add 0.5

        table += 0.5 # continuity correction to avoid zeros
        
        _, p, _, _ = chi2_contingency(table, correction=False)
        pvals_H0.append(p)
    
    type_I_error = np.mean(np.array(pvals_H0) < alpha_target)

    # ----- Type II Error -----
    pvals_H1 = []
    for _ in range(n_sim):
        # Under H1: groups have different distributions
        group1 = np.random.binomial(1, 0.5, n)   # 50% success rate
        group2 = np.random.binomial(1, 0.7, n)   # 70% success rate (shifted)
        
        table = np.array([
            [np.sum(group1 == 0), np.sum(group1 == 1)],
            [np.sum(group2 == 0), np.sum(group2 == 1)]
        ], dtype=float) # convert to float so we can add 0.5

        table += 0.5 # continuity correction to avoid zeros
        
        _, p, _, _ = chi2_contingency(table, correction=False)
        pvals_H1.append(p)
    
    type_II_error = np.mean(np.array(pvals_H1) >= alpha_target)

    return type_I_error, type_II_error

# Search for minimum sample size
for n in range(2,200):  # start at 5 because chi-square needs >0 in each cell
    type_I, type_II = simulate_type_errors(n)
    if type_I <= alpha_target and type_II <= beta_target:
        print(f"Minimum n per group = {n}")
        print(f"Type I error ≈ {type_I:.3f}, Type II error ≈ {type_II:.3f}")
        break



In [None]:
import numpy as np
from scipy.stats import chi2_contingency
import matplotlib.pyplot as plt

# Parameters
alpha_target = 0.05
beta_target = 0.2
n_sim = 2000   # fewer sims for speed during plotting

def simulate_type_errors(n):
    """Return Type I and Type II error rates for chi-square test."""
    pvals_H0 = []
    for _ in range(n_sim):
        g1 = np.random.binomial(1, 0.5, n)
        g2 = np.random.binomial(1, 0.5, n)
        table = np.array([
            [np.sum(g1 == 0), np.sum(g1 == 1)],
            [np.sum(g2 == 0), np.sum(g2 == 1)]
        ], dtype=float)
        table += 0.5  # avoid zero counts
        _, p, _, _ = chi2_contingency(table, correction=False)
        pvals_H0.append(p)
    type_I = np.mean(np.array(pvals_H0) < alpha_target)

    pvals_H1 = []
    for _ in range(n_sim):
        g1 = np.random.binomial(1, 0.5, n)
        g2 = np.random.binomial(1, 0.7, n)  # shifted probability
        table = np.array([
            [np.sum(g1 == 0), np.sum(g1 == 1)],
            [np.sum(g2 == 0), np.sum(g2 == 1)]
        ], dtype=float)
        table += 0.5
        _, p, _, _ = chi2_contingency(table, correction=False)
        pvals_H1.append(p)
    type_II = np.mean(np.array(pvals_H1) >= alpha_target)

    return type_I, type_II

# Store results for plotting
ns = range(5, 100)
typeI_errors = []
typeII_errors = []

for n in ns:
    type_I, type_II = simulate_type_errors(n)
    typeI_errors.append(type_I)
    typeII_errors.append(type_II)

# Plot
plt.figure(figsize=(8, 5))
plt.plot(ns, typeI_errors, label='Type I Error', color='red')
plt.plot(ns, typeII_errors, label='Type II Error', color='blue')
plt.axhline(alpha_target, color='red', linestyle='--', label='Target α')
plt.axhline(beta_target, color='blue', linestyle='--', label='Target β')
plt.xlabel("Sample size per group (n)")
plt.ylabel("Error rate")
plt.title("Monte Carlo Error Rates vs Sample Size")
plt.legend()
plt.grid(True)
plt.show()
