## Week 9 notebook

In [9]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import scipy.stats as stats

In [76]:
def simulate_power_analysis(A=1, B=1, C=10, D=1000, n_simulations=1000):
    """
    Simulate multiple datasets and calculate the probability of detecting 
    a nonzero effect of X on Y (|t-value| > 1.96).
    """
    t_values = []
    coefficients = []
    
    for i in range(n_simulations):
        # Generate data for each simulation
        W = np.random.normal(0, 1, D)
        X = W + np.random.normal(0, B, D)
        Y = A * X - W + np.random.normal(0, C, D)
        
        # Create dataframe and run regression
        df_sim = pd.DataFrame({'Y': Y, 'X': X, 'W': W})
        X_w_const = sm.add_constant(df_sim[['X', 'W']])
        model = sm.OLS(df_sim['Y'], X_w_const).fit()
        
        # Store the t-value for X
        t_value_x = model.tvalues['X']
        t_values.append(t_value_x)

        # Store the coefficient for X
        coefficient_X = model.params['X']
        coefficients.append(coefficient_X)
    
    # Calculate probability
    significant_ts = [abs(t) for t in t_values if abs(t) > 1.96]
    probability = len(significant_ts) / n_simulations

    # Calculate skewness of t-values
    coef_skewness = stats.skew(coefficients)

    return probability, significant_ts, coef_skewness

# Run the power analysis
probability, sig_ts, skewness = simulate_power_analysis(A=2, B=1, C=10, D=100, n_simulations=1000)

print(f"Number of simulations with |t-value| > 1.96: {len(sig_ts)} out of 1000")
print(f"Probability of detecting nonzero effect: {probability:.3f} or {probability*100:.1f}%")
print(f"Skewness of coefficent: {skewness:.3f}")

Number of simulations with |t-value| > 1.96: 519 out of 1000
Probability of detecting nonzero effect: 0.519 or 51.9%
Skewness of coefficent: -0.129
