In [30]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
import seaborn as sns
import statsmodels.api as sm

# Event Studies

In [41]:
t = np.arange(1000)
t0 = 500
Y = 2 * (t < t0) + 1 * (t >= t0) + np.random.normal(0, 1, (1000,))

After = 1 * (t >= t0)
t_After = (t - t0) * After
# intercept (before), After, before trend (t), after trend (t - t0) * After

X = sm.add_constant(pd.DataFrame({"After": After, "t_After": t_After, "t": t}))
results = sm.OLS(Y, X).fit()
results.tvalues

const      21.468782
After      -6.221490
t_After    -0.329245
t          -0.329274
dtype: float64

In [3]:
# Scenario: after a new grocery store opens up, does our (old) store reduce the price of a pound of apples?
# First experiment: yes, it reduces
t = np.arange(1000)
t0 = 500
Y = 2 * (t < t0) + 1 * (t >= t0) + np.random.normal(0, 1, (1000,))
# Test
After = 1 * (t >= 500)
t_After = (t - t0) * After
X = sm.add_constant(pd.DataFrame({"After": After, "t_After": t_After, "t": t}))
results = sm.OLS(Y, X).fit()
results.params

const      1.869648
After     -0.969933
t_After   -0.001333
t          0.000493
dtype: float64

In [4]:
results.tvalues

const      20.731726
After      -7.593641
t_After    -3.011822
t           1.574741
dtype: float64

In [46]:
# Now let's define a function to do this for us.
def event_study(num_times, before_const, after_const, before_slope, after_slope, sigma):
    t = np.arange(num_times)
    t0 = int(num_times / 2)
    Y = before_const * (t < t0) + before_slope * t + after_const * (t >= t0) + (after_slope - before_slope) * (t - t0) * (t >= t0) + np.random.normal(0, sigma, (num_times,))
    # Test
    After = 1 * (t >= t0)
    t_After = (t - t0) * After
    X = sm.add_constant(pd.DataFrame({"After": After, "t_After": t_After, "t": t}))
    results = sm.OLS(Y, X).fit()
    return results

results = event_study(num_times = 1000000, before_const = 2.0, after_const = 1.0, before_slope = 0.01, after_slope = 0, sigma = 1)
results.bse

const      2.825656e-03
After      3.996087e-03
t_After    1.384285e-08
t          9.788375e-09
dtype: float64

In [16]:
results = event_study(num_times = 100000, before_const = 2.0, after_const = 1.9, before_slope = 0, after_slope = 0, sigma = 1)
results.params

const      1.996522e+00
After     -9.923754e-02
t_After   -4.533106e-07
t          9.778994e-08
dtype: float64

In [17]:
results.tvalues

const      223.104221
After       -7.841301
t_After     -1.033992
t            0.315450
dtype: float64

In [10]:
results = event_study(num_times = 1000, before_const = 2.0, after_const = 1.9, before_slope = 0, after_slope = 1, sigma = 1)
results.params

const      1.967959
After     -0.044452
t_After    1.000094
t         -0.000088
dtype: float64

# Differences-in-differences

In [52]:
# Scenario: a new grocery store opens up in town A but not town B.  Do the stores in town A reduce the price of a pound of apples
# because of this event?  Or do they reduce it for other reasons?

# First experiment: yes, it reduces
t = np.arange(1000)
t0 = 500
Y_A = 2 * (t < t0) + 1 * (t >= t0) + np.random.normal(0, 1, (1000,))
Y_B = 1.5 + np.random.normal(0, 1, (1000,))
Y = np.concatenate((Y_A, Y_B))
group_A = np.concatenate((np.ones(1000), np.zeros(1000)))
After = np.tile(1 * (t >= t0), 2)
Treated = group_A * After
X = sm.add_constant(pd.DataFrame({"alpha_group": group_A, "alpha_time": After, "beta_1": Treated}))
results = sm.OLS(Y, X).fit()
results.params

const          1.531035
alpha_group    0.418678
alpha_time    -0.010358
beta_1        -0.960591
dtype: float64

In [53]:
def DID(num_times, before_const_A, after_const_A, before_const_B, after_const_B, sigma):
    t = np.arange(num_times)
    t0 = int(num_times / 2)
    Y_A = before_const_A * (t < t0) + after_const_A * (t >= t0) + np.random.normal(0, sigma, (num_times,))
    Y_B = before_const_B * (t < t0) + after_const_B * (t >= t0) + np.random.normal(0, sigma, (num_times,))
    Y = np.hstack((Y_A, Y_B))
    group_A = np.hstack((np.ones(num_times), np.zeros(num_times)))
    After = np.hstack((1 * (t >= t0), 1 * (t >= t0)))
    X = sm.add_constant(pd.DataFrame({"alpha_group": group_A, "alpha_time": After, "beta_1": group_A * After}))
    results = sm.OLS(Y, X).fit()
    return results

results = DID(num_times = 1000, before_const_A = 1.0, after_const_A = 2.0, before_const_B = 2.0, after_const_B = 3.0, sigma = 1)
results.params

const          1.948211
alpha_group   -0.966374
alpha_time     1.094004
beta_1        -0.140652
dtype: float64

In [22]:
results.tvalues

const          44.911507
alpha_group   -16.342896
alpha_time     16.012549
beta_1         -0.105436
dtype: float64

In [23]:
results = DID(num_times = 1000, before_const_A = 1.0, after_const_A = 3.0, before_const_B = 2.0, after_const_B = 3.0, sigma = 1)
results.params

const          1.982108
alpha_group   -1.070624
alpha_time     0.971736
beta_1         1.183515
dtype: float64