In [2]:
# Import the essential libraries for data manipulation and financial data sourcing
import pandas as pd
import numpy as np
import yfinance as yf
import os
import pandas_datareader.data as web 

print("Libraries imported successfully.")


Libraries imported successfully.


In [None]:
# --- Define our investment universe and the time period for our analysis ---
tickers = ['AAPL', 'MSFT', 'JPM', 'JNJ', 'XOM', 'PG', 'GOOGL', 'AMZN', 'UNH', 'TSLA']
start_date = "2019-01-01"
end_date = "2023-12-31"

# --- Define file paths for our data pipeline ---
# This is a best practice to avoid "magic strings" in your code.
DATA_DIR = 'data'
RAW_PRICES_FILE = os.path.join(DATA_DIR, 'raw_adj_close_prices.csv')

# --- Create the data directory if it doesn't exist ---
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

print("Universe and file paths defined.")


In [None]:
# --- Phase 1: Data Acquisition ---
# We will check if we already have the raw data. If not, we download it.
# This prevents us from hitting the yfinance API every time we run the script.

if not os.path.exists(RAW_PRICES_FILE):
    print("Raw price data not found. Downloading from yfinance...")
    
    # Download the dividend and split-adjusted prices
    adj_close_prices = yf.download(tickers, 
                                   start=start_date, 
                                   end=end_date, 
                                   auto_adjust=True)['Close']
    
    # Save the raw downloaded data
    adj_close_prices.to_csv(RAW_PRICES_FILE)
    print(f"Data downloaded and saved to {RAW_PRICES_FILE}")
else:
    print(f"Loading raw price data from {RAW_PRICES_FILE}...")
    adj_close_prices = pd.read_csv(RAW_PRICES_FILE, index_col='Date', parse_dates=True)
    print("Data loaded successfully.")

# Display the first 5 rows of our starting data
print("\nAdjusted Close Prices:")
print(adj_close_prices.head())


In [4]:
# The data is already the adjusted close prices, so we just need to handle missing values.
# using ffill() we propagate the last valid observation forward.
adj_close_prices = adj_close_prices.ffill()
adj_close_prices.head()


Ticker,AAPL,AMZN,GOOGL,JNJ,JPM,MSFT,PG,TSLA,UNH,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1


In [6]:
# --- Calculate Returns ---

# Calculate daily returns using the percentage change method
daily_returns = adj_close_prices.pct_change()

# Resample the daily prices to get end-of-month prices
# 'M' stands for Month-End frequency
monthly_prices = adj_close_prices.resample('ME').last()

# Calculate monthly returns from the month-end prices
monthly_returns = monthly_prices.pct_change()

# Drop the first row of each series, as it will be NaN (no prior day/month to compare to)
daily_returns = daily_returns.dropna(axis=0, how='all')
monthly_returns = monthly_returns.dropna(axis=0, how='all')

print("Daily Returns:")
daily_returns.head()

print("\nMonthly Returns:")
monthly_returns.head()

Daily Returns:

Monthly Returns:


Ticker,AAPL,AMZN,GOOGL,JNJ,JPM,MSFT,PG,TSLA,UNH,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1


In [5]:
# For this toy project, I'll use a simple, constant proxy for the risk-free rate.
# In a more advanced version, I would pull this from FRED.
# Let's assume an annualized risk-free rate of 2.5%.
annual_rf_rate = 0.025

# Convert the annual rate to daily and monthly rates using simple division
daily_rf_rate = annual_rf_rate / 252
monthly_rf_rate = annual_rf_rate / 12

# Calculate excess returns by subtracting the risk-free rate
daily_excess_returns = daily_returns - daily_rf_rate
monthly_excess_returns = monthly_returns - monthly_rf_rate

print("Daily Excess Returns:")
daily_excess_returns.head()

NameError: name 'daily_returns' is not defined

In [33]:
print("\nMonthly Excess Returns:")
monthly_excess_returns.head()


Monthly Excess Returns:


Ticker,AAPL,AMZN,GOOGL,JNJ,JPM,MSFT,PG,TSLA,UNH,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-02-28,0.042693,-0.047989,-0.001497,0.031477,0.006226,0.075274,0.019478,0.039803,-0.105636,0.088356
2019-03-31,0.094942,0.083852,0.042602,0.02097,-0.032076,0.05067,0.053726,-0.127193,0.02262,0.020314
2019-04-30,0.054353,0.079775,0.01667,0.008003,0.153088,0.10526,0.028495,-0.149193,-0.059472,-0.008519
2019-05-31,-0.126296,-0.080697,-0.0792,-0.066903,-0.089029,-0.051564,-0.035611,-0.226349,0.035373,-0.110439
2019-06-30,0.128436,0.064708,-0.023502,0.059907,0.053032,0.081034,0.063411,0.204765,0.011526,0.08072


In [12]:
os.getcwd()

'c:\\Users\\hnade\\OneDrive\\0.Git\\Quantitative-Active-Management'

In [41]:
# --- Save our monthly returns CSVs ---
# We'll save them as CSV files in a new 'data' directory.

if not os.path.exists('data'):
    os.makedirs('data')

# Save the key final DataFrames
monthly_prices.to_csv('data/monthly_prices.csv')
monthly_excess_returns.to_csv('data/monthly_excess_returns.csv')

print("Clean data files have been saved to the '/data' directory.")


Clean data files have been saved to the '/data' directory.


In [13]:
daily_returns = adj_close_prices.pct_change()
daily_returns.head()

Ticker,AAPL,AMZN,GOOGL,JNJ,JPM,MSFT,PG,TSLA,UNH,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1


In [29]:
adj_close_prices.resample('ME').last().pct_change()

Ticker,AAPL,AMZN,GOOGL,JNJ,JPM,MSFT,PG,TSLA,UNH,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-01-31,,,,,,,,,,
2019-02-28,0.044777,-0.045906,0.000586,0.033561,0.008309,0.077358,0.021561,0.041886,-0.103553,0.090439
2019-03-31,0.097025,0.085936,0.044685,0.023053,-0.029992,0.052754,0.055809,-0.125109,0.024704,0.022397
2019-04-30,0.056436,0.081859,0.018753,0.010086,0.155171,0.107343,0.030579,-0.147109,-0.057389,-0.006436
2019-05-31,-0.124213,-0.078613,-0.077117,-0.06482,-0.086945,-0.049481,-0.033528,-0.224266,0.037456,-0.108356
2019-06-30,0.130519,0.066792,-0.021419,0.06199,0.055115,0.083117,0.065494,0.206848,0.013609,0.082803
2019-07-31,0.076394,-0.014179,0.125046,-0.065049,0.044913,0.017244,0.083488,0.081223,0.020491,-0.029623
2019-08-31,-0.016461,-0.048474,-0.022714,-0.006897,-0.052931,0.015037,0.018553,-0.066222,-0.060279,-0.067624
2019-09-30,0.072961,-0.022733,0.025711,0.007946,0.071272,0.008486,0.034517,0.067639,-0.066902,0.031104
2019-10-31,0.110684,0.023475,0.03084,0.020559,0.069935,0.031216,0.00744,0.307427,0.162802,-0.043053
