In [1]:
# Updated code to cleanup data to remove outliers using Z-score

import yfinance as yf
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

ticker = 'AAPL'
stock_data = yf.download(ticker, start='2020-01-01', end='2025-01-01')

# Ensure the index is in datetime format and then convert to yyyy-mm-dd format
data = stock_data['Close']
data.index = pd.to_datetime(data.index).strftime('%Y-%m-%d')
stock_data.index = pd.to_datetime(stock_data.index).strftime('%Y-%m-%d')

# Define the earnings report dates and the day after each report
earnings_dates = [
    '2020-01-28', '2020-01-29', '2020-04-30', '2020-05-01',
    '2020-07-30', '2020-07-31', '2020-10-29', '2020-10-30',
    '2021-01-27', '2021-01-28', '2021-04-28', '2021-04-29',
    '2021-07-27', '2021-07-28', '2021-10-28', '2021-10-29',
    '2022-01-27', '2022-01-28', '2022-04-28', '2022-04-29',
    '2022-07-28', '2022-07-29', '2022-10-27', '2022-10-28',
    '2023-02-02', '2023-02-03', '2023-05-04', '2023-05-05',
    '2023-08-03', '2023-08-04', '2023-11-02', '2023-11-03',
    '2024-02-01', '2024-02-02', '2024-05-02', '2024-05-03',
    '2024-08-01', '2024-08-02', '2024-10-31', '2024-11-01'
]


earnings_dates = pd.to_datetime(earnings_dates).strftime('%Y-%m-%d')

data = data[~data.index.isin(earnings_dates)]
data = data.dropna()
data = data.asfreq('D')  # Set frequency to daily


stock_data['Daily Return'] = (stock_data['Close'] - stock_data['Open']) / stock_data['Open'] * 100


earnings_data = stock_data[stock_data.index.isin(earnings_dates)]
non_earnings_data = stock_data[~stock_data.index.isin(earnings_dates)]


filtered_data = stock_data[(stock_data['Daily Return'] <= 3) & (stock_data['Daily Return'] >= -3)]


z_scores = stats.zscore(stock_data['Daily Return'])
abs_z_scores = np.abs(z_scores)
filtered_entries = (abs_z_scores < 3)  # Keep entries with Z-score less than 3
outliers_cleaned_stock_data = stock_data[filtered_entries]

# Calculate statistics for the actual data
avg_daily_return_actual = stock_data['Daily Return'].mean()
std_daily_return_actual = stock_data['Daily Return'].std()
sharpe_ratio_actual = avg_daily_return_actual / std_daily_return_actual

# Calculate statistics for the filtered data
avg_daily_return_filtered = filtered_data['Daily Return'].mean()
std_daily_return_filtered = filtered_data['Daily Return'].std()
sharpe_ratio_filtered = avg_daily_return_filtered / std_daily_return_filtered

# Calculate statistics for earnings and non-earnings data
avg_daily_return_earnings = earnings_data['Daily Return'].mean()
std_daily_return_earnings = earnings_data['Daily Return'].std()
sharpe_ratio_earnings = avg_daily_return_earnings / std_daily_return_earnings

avg_daily_return_non_earnings = non_earnings_data['Daily Return'].mean()
std_daily_return_non_earnings = non_earnings_data['Daily Return'].std()
sharpe_ratio_non_earnings = avg_daily_return_non_earnings / std_daily_return_non_earnings

# Calculate statistics for the outliers removed data
avg_daily_return_outliers_cleaned = outliers_cleaned_stock_data['Daily Return'].mean()
std_daily_return_outliers_cleaned = outliers_cleaned_stock_data['Daily Return'].std()
sharpe_ratio_outliers_cleaned = avg_daily_return_outliers_cleaned / std_daily_return_outliers_cleaned



print(f"Avg Daily Return (Actual)%: {avg_daily_return_actual:.6f}")
print(f"Std Dev Daily Return (Actual): {std_daily_return_actual:.6f}")
print(f"Sharpe Ratio (Actual): {sharpe_ratio_actual:.6f}\n")

print(f"Avg Daily Return (Earnings)%: {avg_daily_return_earnings:.6f}")
print(f"Std Dev Daily Return (Earnings): {std_daily_return_earnings:.6f}")
print(f"Sharpe Ratio (Earnings): {sharpe_ratio_earnings:.6f}\n")

print(f"Avg Daily Return (Non-Earnings)%: {avg_daily_return_non_earnings:.6f}")
print(f"Std Dev Daily Return (Non-Earnings): {std_daily_return_non_earnings:.6f}")
print(f"Sharpe Ratio (Non-Earnings): {sharpe_ratio_non_earnings:.6f}\n")

print(f"Avg Daily Return (Filtered)%: {avg_daily_return_filtered:.6f}")
print(f"Std Dev Daily Return (Filtered): {std_daily_return_filtered:.6f}")
print(f"Sharpe Ratio (Filtered): {sharpe_ratio_filtered:.6f}\n")

print(f"Avg Daily Return (Outliers Cleaned)%: {avg_daily_return_outliers_cleaned:.6f}")
print(f"Std Dev Daily Return (Outliers Cleaned): {std_daily_return_outliers_cleaned:.6f}")
print(f"Sharpe Ratio (Outliers Cleaned): {sharpe_ratio_outliers_cleaned:.6f}")

[*********************100%***********************]  1 of 1 completed


Avg Daily Return (Actual)%: 0.118113
Std Dev Daily Return (Actual): 1.540278
Sharpe Ratio (Actual): 0.076683

Avg Daily Return (Earnings)%: 0.503767
Std Dev Daily Return (Earnings): 1.987397
Sharpe Ratio (Earnings): 0.253481

Avg Daily Return (Non-Earnings)%: 0.105448
Std Dev Daily Return (Non-Earnings): 1.522764
Sharpe Ratio (Non-Earnings): 0.069248

Avg Daily Return (Filtered)%: 0.114973
Std Dev Daily Return (Filtered): 1.213359
Sharpe Ratio (Filtered): 0.094756

Avg Daily Return (Outliers Cleaned)%: 0.095451
Std Dev Daily Return (Outliers Cleaned): 1.421043
Sharpe Ratio (Outliers Cleaned): 0.067169
