### week 11

In [7]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy.stats import t

question 1

In [8]:
num = 1000 
 
event_time = int(num / 2) 
 
R_market = np.random.normal(0, 1, num) + np.arange(num) / num 
 
R_target = 2 + R_market + np.random.normal(0, 1, num) + (np.arange(num) == int(num / 2) + 1) * 2 
 
results = sm.OLS(R_target[:event_time], sm.add_constant(R_market[:event_time])).fit() 
 
alpha, beta = results.params 
 
resid = R_target - results.predict(sm.add_constant(R_market)) 
 
print(resid[event_time + 1] / resid[:event_time].std(ddof = 2)) 

0.8884447879856584


In [10]:
# params

num_simulations = 10000  # repeat many times for stable probability estimate

# monte carlo simulations
detected = 0
for _ in range(num_simulations):
    R_market = np.random.normal(0, 1, num) + np.arange(num) / num
    R_target = 2 + R_market + np.random.normal(0, 1, num)
    # event effect (+2) at time event_time + 1
    R_target[event_time + 1] += 2
    
    # pre-event regression
    results = sm.OLS(R_target[:event_time], sm.add_constant(R_market[:event_time])).fit()
    
    # residuals
    resid = R_target - results.predict(sm.add_constant(R_market))
    
    # t statistic at event_time + 1
    t_stat = resid[event_time + 1] / resid[:event_time].std(ddof=2)
    
    # check if it would be significant at 5% two-sided level
    # (critical value for large sample ~ 1.96)
    if abs(t_stat) > 1.96:
        detected += 1

# probability
power = detected / num_simulations
print(f"probability of detecting event: {power:.3f}")

probability of detecting event: 0.507


question 2

In [11]:
num = 1000
event_time = int(num / 2)
num_simulations = 1  # only need one dataset since seed is fixed per loop

# fixed dataset with one true event
np.random.seed(0)
R_market = np.random.normal(0, 1, num) + np.arange(num) / num
R_target = 2 + R_market + np.random.normal(0, 1, num)
R_target[event_time + 1] += 2  # true event here

# placebo tests at all possible times
t_stats = []
for fake_event in range(10, num - 10):  # avoid edge cases with too little data
    np.random.seed(0)  # make sure same dataset is used for every test

    # fit using data up to the fake event
    results = sm.OLS(R_target[:fake_event], sm.add_constant(R_market[:fake_event])).fit()
    resid = R_target - results.predict(sm.add_constant(R_market))
    
    # t statistic at the fake event + 1
    t_stat = resid[fake_event + 1] / resid[:fake_event].std(ddof=2)
    t_stats.append(t_stat)

# compute fraction of placebo tests that would be significant
crit_val = 1.96
fraction_detected = np.mean(np.abs(t_stats) > crit_val)

print(f"fraction of placebo tests detecting a (false) event: {fraction_detected:.3f}")


fraction of placebo tests detecting a (false) event: 0.047


question 3

In [12]:
num = 1000
event_time = int(num / 2)
num_simulations = 2000  # number of datasets
window = 20  # 20 before and 20 after

fractions = []

for sim in range(num_simulations):
    # new dataset each time
    np.random.seed(sim)
    R_market = np.random.normal(0, 1, num) + np.arange(num) / num
    R_target = 2 + R_market + np.random.normal(0, 1, num)
    R_target[event_time + 1] += 2  # real event

    # fit pre-event regression
    results = sm.OLS(R_target[:event_time], sm.add_constant(R_market[:event_time])).fit()
    resid = R_target - results.predict(sm.add_constant(R_market))

    # true event t-value
    true_t = resid[event_time + 1] / resid[:event_time].std(ddof=2)

    # placebo tests 20 before and after
    t_placebos = []
    for fake_event in range(event_time - window, event_time + window):
        if fake_event == event_time:
            continue  # skip actual event
        results_fake = sm.OLS(R_target[:fake_event], sm.add_constant(R_market[:fake_event])).fit()
        resid_fake = R_target - results_fake.predict(sm.add_constant(R_market))
        t_fake = resid_fake[fake_event + 1] / resid_fake[:fake_event].std(ddof=2)
        t_placebos.append(t_fake)

    # fraction of placebo tests with higher t than actual event
    fraction_higher = np.mean(np.abs(t_placebos) > abs(true_t))
    fractions.append(fraction_higher)

print(f"average fraction of placebo t-values higher than actual event: {np.mean(fractions):.3f}")


average fraction of placebo t-values higher than actual event: 0.145


question 4

In [13]:
# placebo test with corr_const = 0.9
import numpy as np
import statsmodels.api as sm

def make_error(corr_const, num):
    sigma = 5 * 1 / np.sqrt((1 - corr_const)**2 / (1 - corr_const**2))
    err = list()
    prev = np.random.normal(0, sigma)
    for n in range(num):
        prev = corr_const * prev + (1 - corr_const) * np.random.normal(0, sigma)
        err.append(prev)
    return np.array(err)

# parameters
num = 1000
event_time = int(num / 2)

# fixed dataset (placebo uses same dataset across fake events)
np.random.seed(0)
R_market = np.random.normal(0, 1, num) + np.arange(num) / num
# use autocorrelated errors for R_target
err = make_error(0.9, num)
R_target = 2 + R_market + err
# real event at event_time + 1
R_target[event_time + 1] += 2

# run placebo tests at all possible times (avoid edges)
t_stats = []
for fake_event in range(10, num - 10):
    # fit using data up to the fake event
    results = sm.OLS(R_target[:fake_event], sm.add_constant(R_market[:fake_event])).fit()
    resid = R_target - results.predict(sm.add_constant(R_market))
    # test statistic at fake_event + 1
    t_stat = resid[fake_event + 1] / resid[:fake_event].std(ddof=2)
    t_stats.append(t_stat)

crit_val = 1.96
fraction_detected = np.mean(np.abs(t_stats) > crit_val)

fraction_detected, len(t_stats)


(np.float64(0.04387755102040816), 980)

~ 0.05