In [14]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import timedelta
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import xgboost as xgb
import matplotlib.pyplot as plt

In [15]:
DIS_earnings_dates = [
    "2026-05-05",
    "2026-02-03",
    "2025-11-12",
    "2025-08-05",
    "2025-05-07",
    "2025-02-05",
    "2024-11-14",
    "2024-08-07",
    "2024-05-07",
    "2024-02-07",
    "2023-11-08",
    "2023-08-09",
    "2023-05-10",
    "2023-02-08",
    "2022-11-08",
    "2022-08-10",
    "2022-05-11",
    "2022-02-09",
    "2021-11-10",
    "2021-08-12",
    "2021-05-13",
    "2021-02-11",
    "2020-11-12",
    "2020-08-04",
    "2020-05-05",
    "2020-02-04",
    "2019-11-07",
    "2019-08-06",
    "2019-05-08",
    "2019-02-05",
    "2018-11-08",
    "2018-08-07",
    "2018-05-08",
    "2018-02-06",
    "2017-11-09",
    "2017-08-08",
    "2017-05-09",
    "2017-02-07",
    "2016-11-10",
    "2016-08-09",
    "2016-05-10",
    "2016-02-09",
    "2015-11-05",
    "2015-08-04",
    "2015-05-05",
    "2015-02-03",
    "2014-11-06",
    "2014-08-05",
    "2014-05-06",
    "2014-02-05",
    "2013-11-07",
    "2013-08-06",
    "2013-05-07",
    "2013-02-05",
    "2012-11-08",
    "2012-08-07",
    "2012-05-08",
    "2012-02-07",
    "2011-11-10",
    "2011-08-09",
    "2011-05-10",
    "2011-02-08",
    "2010-11-11",
    "2010-08-10",
    "2010-05-11",
    "2010-02-09",
]

DIS_earnings_dates = pd.to_datetime(DIS_earnings_dates)

In [16]:
start_date = DIS_earnings_dates[-1] - pd.Timedelta(days=30)
end_date   = DIS_earnings_dates[0] + pd.Timedelta(days=30)

print(f"Fetching data from {start_date} to {end_date}")

Fetching data from 2010-01-10 00:00:00 to 2026-06-04 00:00:00


In [17]:
tickers = ['DIS', '^GSPC']
data = yf.download(
    tickers,
    start=start_date.strftime('%Y-%m-%d'),
    end=end_date.strftime('%Y-%m-%d'),
    progress=False,
    auto_adjust=False,
)

dis_prices = data['Adj Close']['DIS'].rename('DIS_adj')
spx_prices = data['Adj Close']['^GSPC'].rename('SPX_adj')

prices = pd.concat([dis_prices, spx_prices], axis=1).dropna()
prices.tail()

Unnamed: 0_level_0,DIS_adj,SPX_adj
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-05-23,109.720001,5802.819824
2025-05-27,112.360001,5921.540039
2025-05-28,111.519997,5888.549805
2025-05-29,112.019997,5912.169922
2025-05-30,113.040001,5911.689941


In [18]:
returns = prices.pct_change().dropna()
returns.rename(columns={'DIS_adj':'DIS_ret', 'SPX_adj':'SPX_ret'}, inplace=True)
returns.tail()

Unnamed: 0_level_0,DIS_ret,SPX_ret
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-05-23,-0.012688,-0.006708
2025-05-27,0.024061,0.020459
2025-05-28,-0.007476,-0.005571
2025-05-29,0.004484,0.004011
2025-05-30,0.009106,-8.1e-05


In [19]:
window_size = 20
event_list = []

for t0 in DIS_earnings_dates:
    if t0 not in returns.index:
        # If earnings_date falls on a non‐trading day, pick the next trading day
        continue
    for d in range(1, window_size+1):
        day_idx = t0 + pd.Timedelta(days=d)
        # If that day is not a trading day, skip or roll forward
        if day_idx not in returns.index:
            continue
        excess_ret = returns.loc[day_idx, 'DIS_ret'] - returns.loc[day_idx, 'SPX_ret']
        event_list.append({
            'earnings_date': t0,
            'offset_day': d,
            'date': day_idx,
            'excess_ret': excess_ret
        })

events_df = pd.DataFrame(event_list)
events_df.head(n=10)

Unnamed: 0,earnings_date,offset_day,date,excess_ret
0,2025-05-07,1,2025-05-08,0.02388
1,2025-05-07,2,2025-05-09,0.008512
2,2025-05-07,5,2025-05-12,0.01039
3,2025-05-07,6,2025-05-13,0.000807
4,2025-05-07,7,2025-05-14,0.012982
5,2025-05-07,8,2025-05-15,-0.010507
6,2025-05-07,9,2025-05-16,0.003688
7,2025-05-07,12,2025-05-19,-0.007577
8,2025-05-07,13,2025-05-20,0.001217
9,2025-05-07,14,2025-05-21,-0.000775


In [20]:
summary = events_df.groupby('offset_day')['excess_ret'].agg(
    mean_excess = 'mean',
    std_excess  = 'std',
    count       = 'count'
).reset_index()
summary['se_excess'] = summary['std_excess'] / np.sqrt(summary['count'])
# t‐statistic for mean != 0
summary['t_stat'] = summary['mean_excess'] / summary['se_excess']
# approximate two‐sided p‐value
from scipy.stats import t
df_counts = summary['count'] - 1
summary['p_value'] = (1 - t.cdf(np.abs(summary['t_stat']), df=df_counts)) * 2

summary.round(4)



Unnamed: 0,offset_day,mean_excess,std_excess,count,se_excess,t_stat,p_value
0,1,0.0007,0.0436,62,0.0055,0.1194,0.9053
1,2,-0.0016,0.0139,47,0.002,-0.7969,0.4296
2,3,0.002,0.0106,33,0.0018,1.0643,0.2952
3,4,-0.0011,0.0143,14,0.0038,-0.2968,0.7713
4,5,0.0007,0.0111,29,0.0021,0.3506,0.7285
5,6,0.0006,0.0128,60,0.0017,0.3654,0.7161
6,7,-0.0003,0.0091,62,0.0012,-0.2674,0.79
7,8,-0.0016,0.0089,62,0.0011,-1.4399,0.155
8,9,-0.0012,0.0084,47,0.0012,-0.976,0.3342
9,10,0.0012,0.0077,33,0.0013,0.9133,0.3679


The highest average abnormal return is on offset_day 3 (mean_excess = 0.0020), but its t‐stat (1.06) and p-value (0.295) show it is not statistically significant