In [None]:
# ## Section 1: Import Libraries and Define Helper Function

import yfinance as yf
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import mplfinance as mpf

In [11]:
#Download Data
def a_rm(x):
    """Removes NA values from a vector."""
    return [val for val in np.array(x).flatten() if pd.notna(val)]

# ## Section 2: Download Dow Jones Industrial Average (DJI) Data

dji_ticker = "^DJI"
dji = yf.Ticker(dji_ticker)
DJI_2023_04_13_2025_04_12 = dji.history(period="max", interval="1d", start="2023-04-10", end="2025-04-10")

print(f"### Head of DJI Data ({dji_ticker}) ###")
print(DJI_2023_04_13_2025_04_12.head(10))
print(f"\n### Tail of DJI Data ({dji_ticker}) ###")
print(DJI_2023_04_13_2025_04_12.tail(10))

# ## Section 3: Data Pre-treatment - Time Difference

start_date_spx = pd.to_datetime("2023-04-13").date()
end_date_spx = pd.to_datetime("2025-04-12").date()
time_difference_spx = end_date_spx - start_date_spx
print(f"\n### Time Difference for SPX Data ###")
print(f"Time difference of {time_difference_spx.days} days")

# ## Section 4: Download S&P 500 (SPX) Data

spx_ticker = "^SPX"
spx = yf.Ticker(spx_ticker)
SPX_2023_04_13_2025_04_12_df = spx.history(period="max", interval="1d", start="2023-04-13", end="2025-04-12")

print(f"\n### Head of SPX Data ({spx_ticker}) ###")
print(SPX_2023_04_13_2025_04_12_df.head(3))
print(f"\n### Tail of SPX Data ({spx_ticker}) ###")
print(SPX_2023_04_13_2025_04_12_df.tail(3))
print(f"\n### Info of SPX Data ({spx_ticker}) ###")
SPX_2023_04_13_2025_04_12_df.info()

### Head of DJI Data (^DJI) ###
                                   Open          High           Low  \
Date                                                                  
2023-04-10 00:00:00-04:00  33425.250000  33590.238281  33343.429688   
2023-04-11 00:00:00-04:00  33586.750000  33781.289062  33586.750000   
2023-04-12 00:00:00-04:00  33764.210938  33895.429688  33593.320312   
2023-04-13 00:00:00-04:00  33668.968750  34054.988281  33605.171875   
2023-04-14 00:00:00-04:00  33981.710938  34082.941406  33730.851562   
2023-04-17 00:00:00-04:00  33930.460938  33991.230469  33796.910156   
2023-04-18 00:00:00-04:00  33965.160156  34018.621094  33791.890625   
2023-04-19 00:00:00-04:00  33889.828125  33957.980469  33814.648438   
2023-04-20 00:00:00-04:00  33740.601562  33875.390625  33677.738281   
2023-04-21 00:00:00-04:00  33793.601562  33858.828125  33688.570312   

                                  Close     Volume  Dividends  Stock Splits  
Date                                 

In [15]:
volume_col_spx = SPX_2023_04_13_2025_04_12_df.pop('Volume')
SPX_2023_04_13_2025_04_12_df.insert(len(SPX_2023_04_13_2025_04_12_df.columns), 'Volume', volume_col_spx)
print(f"\n### SPX Data with Volume Column Relocated ###")
print(SPX_2023_04_13_2025_04_12_df.head(3))


### SPX Data with Volume Column Relocated ###
                                  Open         High          Low        Close  \
Date                                                                            
2023-04-13 00:00:00-04:00  4100.040039  4150.259766  4099.399902  4146.220215   
2023-04-14 00:00:00-04:00  4140.109863  4163.189941  4113.200195  4137.640137   
2023-04-17 00:00:00-04:00  4137.169922  4151.720215  4123.180176  4151.319824   

                           Dividends  Stock Splits      Volume  
Date                                                            
2023-04-13 00:00:00-04:00        0.0           0.0  3596590000  
2023-04-14 00:00:00-04:00        0.0           0.0  3575690000  
2023-04-17 00:00:00-04:00        0.0           0.0  3611180000  


In [14]:

# ## Section 6: Data Pre-treatment - Calculate Returns and Log Returns

Data_df = SPX_2023_04_13_2025_04_12_df
print(f"\n### Sum of NA in SPX Adj Close: {Data_df['Adj Close'].isnull().sum()} ###")
print(Data_df.columns)

adj_close_ret = np.concatenate(([np.nan], np.diff(Data_df['Adj Close'], lag=1, n=1)))
print(f"\n### Head of Adjusted Close Returns ###\n{adj_close_ret[:10]}")

adj_close_ret_alt = np.concatenate(([np.nan], Data_df['Adj Close'].values[1:] - Data_df['Adj Close'].values[:-1]))
print(f"\n### Head of Alternative Adjusted Close Returns ###\n{adj_close_ret_alt[:10]}")

identical_ret = np.array_equal(adj_close_ret, adj_close_ret_alt, equal_nan=True)
print(f"\n### Are Adjusted Close Returns Identical? ### {identical_ret}")
print(f"### First 30 comparisons of Adjusted Close Returns: ###\n{adj_close_ret[:30] == adj_close_ret_alt[:30]}")

adj_close_ret_rate = np.concatenate(([np.nan], adj_close_ret[1:] / Data_df['Adj Close'].values[:-1]))
print(f"\n### Head of Adjusted Close Return Rate ###\n{adj_close_ret_rate[:6]}")

adj_close_ret_rate_perc = 100 * adj_close_ret_rate
print(f"\n### Head of Adjusted Close Return Rate Percentage ###\n{adj_close_ret_rate_perc[:6]}")

adj_close_log = np.log(Data_df['Adj Close'])
print(f"\n### Head of Log of Adjusted Close Price ###\n{adj_close_log[:10]}")

adj_close_log_ret = np.concatenate(([np.nan], np.diff(adj_close_log, lag=1, n=1)))
print(f"\n### Head of Log Returns of Adjusted Close Price ###\n{adj_close_log_ret[:10]}")

adj_close_log_ret_alt = np.concatenate(([np.nan], Data_df['adj_close_log'].values[1:] - Data_df['adj_close_log'].values[:-1]))
print(f"\n### Head of Alternative Log Returns of Adjusted Close Price ###\n{adj_close_log_ret_alt[:10]}")

identical_log_ret = np.array_equal(adj_close_log_ret, adj_close_log_ret_alt, equal_nan=True)
print(f"\n### Are Log Returns Identical? ### {identical_log_ret}")

adj_close_log_ret_perc = 100 * adj_close_log_ret
print(f"\n### Head of Percentage Log Returns of Adjusted Close Price ###\n{adj_close_log_ret_perc[:10]}")

SPX_2023_04_13_2025_04_12_df['adj_close_ret'] = adj_close_ret
SPX_2023_04_13_2025_04_12_df['adj_close_ret_rate'] = adj_close_ret_rate
SPX_2023_04_13_2025_04_12_df['adj_close_ret_rate_perc'] = adj_close_ret_rate_perc
SPX_2023_04_13_2025_04_12_df['adj_close_log'] = adj_close_log
SPX_2023_04_13_2025_04_12_df['adj_close_log_ret'] = adj_close_log_ret
SPX_2023_04_13_2025_04_12_df['adj_close_log_ret_perc'] = adj_close_log_ret_perc

print(f"\n### Head of SPX Dataframe with New Columns ###")
print(SPX_2023_04_13_2025_04_12_df.head(3))
print(f"\n### Tail of SPX Dataframe with New Columns ###")
print(SPX_2023_04_13_2025_04_12_df.tail(3))
print(f"\n### Info of SPX Dataframe with New Columns ###")

KeyError: 'Adj Close'