# Homework 11

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

In [2]:
num = 1000 
event_time = int(num / 2) 
R_market = np.random.normal(0, 1, num) + np.arange(num) / num 
R_target = 2 + R_market + np.random.normal(0, 1, num) + (np.arange(num) == int(num / 2) + 1) * 2 
results = sm.OLS(R_target[:event_time], sm.add_constant(R_market[:event_time])).fit() 
alpha, beta = results.params 
resid = R_target - results.predict(sm.add_constant(R_market)) 
print(resid[event_time + 1] / resid[:event_time].std(ddof = 2)) 

2.03864374593788


In [3]:
from scipy import stats

# ensure event_time is available (don't overwrite if already defined)
try:
    event_time
except NameError:
    event_time = int(num / 2)

n_sims = 5000
alpha = 0.05
df = event_time - 2
crit_two = stats.t.ppf(1 - alpha/2, df)
crit_one = stats.t.ppf(1 - alpha, df)

count_two = 0
count_one = 0
rng = np.random.default_rng(12345)

for _ in range(n_sims):
    R_market = rng.normal(0, 1, num) + np.arange(num) / num
    noise = rng.normal(0, 1, num)
    R_target = 2 + R_market + noise + (np.arange(num) == (int(num / 2) + 1)) * 2

    res = sm.OLS(R_target[:event_time], sm.add_constant(R_market[:event_time])).fit()
    resid = R_target - res.predict(sm.add_constant(R_market))
    tstat = resid[event_time + 1] / resid[:event_time].std(ddof=2)

    if abs(tstat) > crit_two:
        count_two += 1
    if tstat > crit_one:
        count_one += 1

print("Estimated power (two-sided, alpha=0.05):", count_two / n_sims)
print("Estimated power (one-sided, alpha=0.05):", count_one / n_sims)

Estimated power (two-sided, alpha=0.05): 0.5216
Estimated power (one-sided, alpha=0.05): 0.6474


In [4]:
from scipy import stats

alpha = 0.05
count_two = 0
count_one = 0
t_tests = 0

# allowable fictitious event times: at least 3 observations to estimate (df = t-2 >= 1),
# and we need t+1 < num so we can evaluate the next observation
for t_fict in range(3, num - 1):
    np.random.seed(0)  # ensure the same dataset each iteration (placebo on fixed data)
    R_market = np.random.normal(0, 1, num) + np.arange(num) / num
    noise = np.random.normal(0, 1, num)
    # real event remains at the original single time (int(num/2)+1)
    R_target = 2 + R_market + noise + (np.arange(num) == (int(num / 2) + 1)) * 2

    # train on data up to the fictitious event time
    res_fict = sm.OLS(R_target[:t_fict], sm.add_constant(R_market[:t_fict])).fit()
    resid_fict = R_target - res_fict.predict(sm.add_constant(R_market))

    std_pre = resid_fict[:t_fict].std(ddof=2)
    if std_pre == 0:
        continue

    # compute the test statistic for the observation immediately after the fictitious event
    tstat = resid_fict[t_fict + 1] / std_pre

    df_fict = t_fict - 2
    crit_two = stats.t.ppf(1 - alpha/2, df_fict)
    crit_one = stats.t.ppf(1 - alpha, df_fict)

    if abs(tstat) > crit_two:
        count_two += 1
    if tstat > crit_one:
        count_one += 1
    t_tests += 1

print("Number of placebo tests run:", t_tests)
print("Fraction detecting an event (two-sided, alpha=0.05):", count_two / t_tests)
print("Fraction detecting an event (one-sided, alpha=0.05):", count_one / t_tests)


Number of placebo tests run: 996
Fraction detecting an event (two-sided, alpha=0.05): 0.04618473895582329
Fraction detecting an event (one-sided, alpha=0.05): 0.05923694779116466


In [5]:
n_runs = 1000  # number of times to repeat the experiment for averaging
num_placebos = 20
actual_event = int(num / 2) + 1

fractions = []

for run in range(n_runs):
    # Generate a new dataset for each run
    rng = np.random.default_rng(run)
    R_market = rng.normal(0, 1, num) + np.arange(num) / num
    noise = rng.normal(0, 1, num)
    R_target = 2 + R_market + noise + (np.arange(num) == actual_event) * 2

    # Actual event t-value
    res_actual = sm.OLS(R_target[:actual_event-1], sm.add_constant(R_market[:actual_event-1])).fit()
    resid_actual = R_target - res_actual.predict(sm.add_constant(R_market))
    std_pre_actual = resid_actual[:actual_event-1].std(ddof=2)
    tstat_actual = resid_actual[actual_event] / std_pre_actual

    # Placebo t-values
    tstats_placebo = []
    placebo_times = list(range(actual_event - num_placebos, actual_event)) + \
                    list(range(actual_event + 1, actual_event + 1 + num_placebos))
    for t_fict in placebo_times:
        res_fict = sm.OLS(R_target[:t_fict], sm.add_constant(R_market[:t_fict])).fit()
        resid_fict = R_target - res_fict.predict(sm.add_constant(R_market))
        std_pre = resid_fict[:t_fict].std(ddof=2)
        if std_pre == 0 or t_fict + 1 >= num:
            continue
        tstat_fict = resid_fict[t_fict + 1] / std_pre
        tstats_placebo.append(tstat_fict)

    # Fraction of placebo t-values greater than actual event t-value
    frac = np.mean(np.array(tstats_placebo) > tstat_actual)
    fractions.append(frac)

print("Average fraction of placebo tests with higher t-value than actual event:", np.mean(fractions))

Average fraction of placebo tests with higher t-value than actual event: 0.07790000000000001


In [6]:
def make_error(corr_const, num): 
    sigma = 5 * 1 / np.sqrt((1 - corr_const)**2 / (1 - corr_const**2)) 
    err = list() 
    prev = np.random.normal(0, sigma) 
    for n in range(num): 
        prev = corr_const * prev + (1 - corr_const) * np.random.normal(0, sigma) 
        err.append(prev) 
    return np.array(err) 

In [7]:
# Use make_error with corr_const=0.9 for autocorrelated errors in R_target
corr_const = 0.9
count_two = 0
count_one = 0
t_tests = 0
alpha = 0.05

for t_fict in range(3, num - 1):
    np.random.seed(0)  # ensure reproducibility
    R_market = np.random.normal(0, 1, num) + np.arange(num) / num
    noise = make_error(corr_const, num)
    R_target = 2 + R_market + noise + (np.arange(num) == (int(num / 2) + 1)) * 2

    res_fict = sm.OLS(R_target[:t_fict], sm.add_constant(R_market[:t_fict])).fit()
    resid_fict = R_target - res_fict.predict(sm.add_constant(R_market))

    std_pre = resid_fict[:t_fict].std(ddof=2)
    if std_pre == 0:
        continue

    tstat = resid_fict[t_fict + 1] / std_pre

    df_fict = t_fict - 2
    crit_two = stats.t.ppf(1 - alpha/2, df_fict)
    crit_one = stats.t.ppf(1 - alpha, df_fict)

    if abs(tstat) > crit_two:
        count_two += 1
    if tstat > crit_one:
        count_one += 1
    t_tests += 1

print("Number of placebo tests run (autocorrelated errors):", t_tests)
print("Fraction detecting an event (two-sided, alpha=0.05):", count_two / t_tests)
print("Fraction detecting an event (one-sided, alpha=0.05):", count_one / t_tests)
print("With autocorrelated errors, we expect more false positives in placebo tests due to violation of independence.")

Number of placebo tests run (autocorrelated errors): 996
Fraction detecting an event (two-sided, alpha=0.05): 0.04216867469879518
Fraction detecting an event (one-sided, alpha=0.05): 0.03614457831325301
With autocorrelated errors, we expect more false positives in placebo tests due to violation of independence.
