In [3]:
import numpy as np
import statsmodels.api as sm

num = 1000 
 
event_time = int(num / 2) 
 
R_market = np.random.normal(0, 1, num) + np.arange(num) / num 
 
R_target = 2 + R_market + np.random.normal(0, 1, num) + (np.arange(num) == int(num / 2) + 1) * 2 
 
results = sm.OLS(R_target[:event_time], sm.add_constant(R_market[:event_time])).fit() 
 
alpha, beta = results.params 
 
resid = R_target - results.predict(sm.add_constant(R_market)) 
 
print(resid[event_time + 1] / resid[:event_time].std(ddof = 2))

2.999817842866599


In [6]:
# Monte Carlo Simulation to estimate detection probability
n_simulations = 10000
detected_count = 0

for _ in range(n_simulations):
    num = 1000 
    event_time = int(num / 2) 
    
    R_market = np.random.normal(0, 1, num) + np.arange(num) / num 
    
    # Shock of +2 at event_time + 1
    R_target = 2 + R_market + np.random.normal(0, 1, num) + (np.arange(num) == int(num / 2) + 1) * 2 
    
    results = sm.OLS(R_target[:event_time], sm.add_constant(R_market[:event_time])).fit() 
    
    resid = R_target - results.predict(sm.add_constant(R_market)) 
    
    t_stat = resid[event_time + 1] / resid[:event_time].std(ddof = 2)
    
    # Two-tailed test at 5% significance level (critical value = 1.96)
    if abs(t_stat) > 1.96:
        detected_count += 1

detection_probability = detected_count / n_simulations
print(f"Detection probability (5% significance): {detection_probability:.4f}")
print(f"This is approximately {detection_probability * 100:.2f}%")

Detection probability (5% significance): 0.5130
This is approximately 51.30%


In [7]:
# Placebo tests on a fixed dataset
detected_count = 0
valid_tests = 0
num = 1000
true_event_time = int(num / 2) # 500. Shock at 501.

# Iterate over fictitious event times
# We need some history to train (e.g., start at 10)
# We need to test at t+1, so t+1 < num => t < num - 1
for t in range(10, num - 1):
    # Skip the actual event time (we want placebo tests on non-event times)
    if t == true_event_time:
        continue

    np.random.seed(0) # Fixed dataset
    
    R_market = np.random.normal(0, 1, num) + np.arange(num) / num 
    
    # Shock is always at true_event_time + 1 (501)
    R_target = 2 + R_market + np.random.normal(0, 1, num) + (np.arange(num) == true_event_time + 1) * 2 
    
    # Use t as the fictitious event_time
    event_time = t
    
    results = sm.OLS(R_target[:event_time], sm.add_constant(R_market[:event_time])).fit() 
    
    resid = R_target - results.predict(sm.add_constant(R_market)) 
    
    # Test at event_time + 1
    t_stat = resid[event_time + 1] / resid[:event_time].std(ddof = 2)
    
    if abs(t_stat) > 1.96:
        detected_count += 1
    valid_tests += 1

print(f"Fraction of placebo tests detecting event: {detected_count / valid_tests:.4f}")
print(f"This is approximately {detected_count / valid_tests * 100:.2f}%")

Fraction of placebo tests detecting event: 0.0466
This is approximately 4.66%


In [9]:
# Simulation: Fraction of placebo tests with higher t-value than actual event
n_simulations = 1000
fractions = []

for i in range(n_simulations):
    # Ensure different dataset each time
    np.random.seed(i) 
    
    num = 1000 
    true_event_time = int(num / 2) # 500
    
    R_market = np.random.normal(0, 1, num) + np.arange(num) / num 
    
    # Shock of +2 at true_event_time + 1
    R_target = 2 + R_market + np.random.normal(0, 1, num) + (np.arange(num) == true_event_time + 1) * 2 
    
    # 1. Actual Event Test
    # Train on [:true_event_time], Test at true_event_time + 1
    results = sm.OLS(R_target[:true_event_time], sm.add_constant(R_market[:true_event_time])).fit() 
    resid = R_target - results.predict(sm.add_constant(R_market)) 
    actual_t_stat = resid[true_event_time + 1] / resid[:true_event_time].std(ddof = 2)
    
    placebo_higher_count = 0
    total_placebo = 0
    
    # Define placebo event times: 20 before and 20 after
    # Before: [480, 481, ..., 499]
    # After: [501, 502, ..., 520]
    placebo_times = list(range(true_event_time - 20, true_event_time)) + \
                    list(range(true_event_time + 1, true_event_time + 21))
                    
    for t in placebo_times:
        # Train on [:t], Test at t + 1
        results_p = sm.OLS(R_target[:t], sm.add_constant(R_market[:t])).fit()
        resid_p = R_target - results_p.predict(sm.add_constant(R_market))
        
        # t-stat for prediction error at t + 1
        t_stat_p = resid_p[t + 1] / resid_p[:t].std(ddof = 2)
        
        if t_stat_p > actual_t_stat:
            placebo_higher_count += 1
        total_placebo += 1
        
    fractions.append(placebo_higher_count / total_placebo)

print(f"Average fraction of placebo tests with higher t-value: {np.mean(fractions):.4f}")
print(f"This is approximately {np.mean(fractions) * 100:.2f}%")

Average fraction of placebo tests with higher t-value: 0.0728
This is approximately 7.28%


In [10]:
# Debug: Check a single iteration to understand the distribution
np.random.seed(0)

num = 1000 
true_event_time = int(num / 2) # 500

R_market = np.random.normal(0, 1, num) + np.arange(num) / num 
R_target = 2 + R_market + np.random.normal(0, 1, num) + (np.arange(num) == true_event_time + 1) * 2 

# Actual event test
results = sm.OLS(R_target[:true_event_time], sm.add_constant(R_market[:true_event_time])).fit() 
resid = R_target - results.predict(sm.add_constant(R_market)) 
actual_t_stat = resid[true_event_time + 1] / resid[:true_event_time].std(ddof = 2)

print(f"Actual event t-statistic: {actual_t_stat:.4f}")

# Sample a few placebo tests
placebo_times = list(range(true_event_time - 20, true_event_time)) + \
                list(range(true_event_time + 1, true_event_time + 21))

placebo_t_stats = []
for t in placebo_times:
    results_p = sm.OLS(R_target[:t], sm.add_constant(R_market[:t])).fit()
    resid_p = R_target - results_p.predict(sm.add_constant(R_market))
    t_stat_p = resid_p[t + 1] / resid_p[:t].std(ddof = 2)
    placebo_t_stats.append(t_stat_p)

placebo_t_stats = np.array(placebo_t_stats)
print(f"\nPlacebo t-stats - Mean: {placebo_t_stats.mean():.4f}, Std: {placebo_t_stats.std():.4f}")
print(f"Number of placebos > actual: {np.sum(placebo_t_stats > actual_t_stat)}")
print(f"Fraction: {np.sum(placebo_t_stats > actual_t_stat) / len(placebo_t_stats):.4f}")

Actual event t-statistic: 1.9268

Placebo t-stats - Mean: -0.0477, Std: 0.9789
Number of placebos > actual: 1
Fraction: 0.0250


In [11]:
# Try with ABSOLUTE values (magnitude comparison)
n_simulations = 1000
fractions_abs = []

for i in range(n_simulations):
    np.random.seed(i) 
    
    num = 1000 
    true_event_time = int(num / 2)
    
    R_market = np.random.normal(0, 1, num) + np.arange(num) / num 
    R_target = 2 + R_market + np.random.normal(0, 1, num) + (np.arange(num) == true_event_time + 1) * 2 
    
    results = sm.OLS(R_target[:true_event_time], sm.add_constant(R_market[:true_event_time])).fit() 
    resid = R_target - results.predict(sm.add_constant(R_market)) 
    actual_t_stat = abs(resid[true_event_time + 1] / resid[:true_event_time].std(ddof = 2))
    
    placebo_higher_count = 0
    placebo_times = list(range(true_event_time - 20, true_event_time)) + \
                    list(range(true_event_time + 1, true_event_time + 21))
                    
    for t in placebo_times:
        results_p = sm.OLS(R_target[:t], sm.add_constant(R_market[:t])).fit()
        resid_p = R_target - results_p.predict(sm.add_constant(R_market))
        t_stat_p = abs(resid_p[t + 1] / resid_p[:t].std(ddof = 2))
        
        if t_stat_p > actual_t_stat:
            placebo_higher_count += 1
        
    fractions_abs.append(placebo_higher_count / 40)

print(f"Average fraction (absolute values): {np.mean(fractions_abs):.4f}")
print(f"This is approximately {np.mean(fractions_abs) * 100:.2f}%")

Average fraction (absolute values): 0.1461
This is approximately 14.61%


In [13]:
def make_error(corr_const, num): 
 
 
  sigma = 5 * 1 / np.sqrt((1 - corr_const)**2 / (1 - corr_const**2)) 
 
 
  err = list() 
 
 
  prev = np.random.normal(0, sigma) 
 
 
  for n in range(num): 
 
 
    prev = corr_const * prev + (1 - corr_const) * np.random.normal(0, sigma) 
 
 
    err.append(prev) 
 
 
  return np.array(err)

In [14]:
# Placebo tests with CORRELATED errors (corr_const = 0.9)
detected_count = 0
valid_tests = 0
num = 1000
true_event_time = int(num / 2) # 500. Shock at 501.

# Iterate over fictitious event times
for t in range(10, num - 1):
    # Skip the actual event time (we want placebo tests on non-event times)
    if t == true_event_time:
        continue

    np.random.seed(0) # Fixed dataset
    
    R_market = np.random.normal(0, 1, num) + np.arange(num) / num 
    
    # Use make_error with corr_const = 0.9 instead of independent normal errors
    errors = make_error(0.9, num)
    R_target = 2 + R_market + errors + (np.arange(num) == true_event_time + 1) * 2 
    
    # Use t as the fictitious event_time
    event_time = t
    
    results = sm.OLS(R_target[:event_time], sm.add_constant(R_market[:event_time])).fit() 
    
    resid = R_target - results.predict(sm.add_constant(R_market)) 
    
    # Test at event_time + 1
    t_stat = resid[event_time + 1] / resid[:event_time].std(ddof = 2)
    
    if abs(t_stat) > 1.96:
        detected_count += 1
    valid_tests += 1

print(f"Fraction of placebo tests detecting event (correlated errors): {detected_count / valid_tests:.4f}")
print(f"This is approximately {detected_count / valid_tests * 100:.2f}%")
print(f"\nComparison:")
print(f"  Independent errors: 4.66%")
print(f"  Correlated errors:  {detected_count / valid_tests * 100:.2f}%")

Fraction of placebo tests detecting event (correlated errors): 0.0435
This is approximately 4.35%

Comparison:
  Independent errors: 4.66%
  Correlated errors:  4.35%
