In [4]:
import pandas as pd
import numpy as np
import statsmodels.api as sm # The library for running regressions
import os
import matplotlib.pyplot as plt
import seaborn as sns

print("Libraries imported successfully.")

Libraries imported successfully.


In [5]:
# --- Load the processed data from previous notebooks ---
DATA_DIR = 'data'
RETURNS_FILE = os.path.join(DATA_DIR, 'monthly_excess_returns.csv')
EXPOSURES_FILE = os.path.join(DATA_DIR, 'factor_exposures.csv')

# Load the data
monthly_excess_returns = pd.read_csv(RETURNS_FILE, index_col='Date', parse_dates=True)
# For this illustrative notebook, I'll assume our factor exposures are constant for simplicity.
# A real model would have a different X matrix for each month.
X = pd.read_csv(EXPOSURES_FILE, index_col=0)

# Align the data: Ensure we only use dates and stocks present in both files.
common_dates = monthly_excess_returns.index
common_stocks = X.index
monthly_excess_returns = monthly_excess_returns.loc[common_dates, common_stocks]

print("Data from Notebooks 1 & 2 loaded successfully.")
print("\nFactor Exposures (X):")
print(X.head())
print("\nMonthly Excess Returns (r):")
print(monthly_excess_returns.head())

Data from Notebooks 1 & 2 loaded successfully.

Factor Exposures (X):
           Size     Value  Momentum
AAPL   0.673549 -0.791782 -0.229023
AMZN   0.072566  0.292714  0.913754
GOOGL  0.461882 -0.475909 -0.093175
JNJ   -2.439123  0.588061 -2.641663
JPM   -1.547600  2.597559 -1.382237

Monthly Excess Returns (r):
                AAPL      AMZN     GOOGL       JNJ       JPM      MSFT  \
Date                                                                     
2019-02-28  0.042977 -0.047706 -0.001214  0.031760  0.006509  0.075558   
2019-03-31  0.095126  0.084036  0.042785  0.021153 -0.031892  0.050854   
2019-04-30  0.054336  0.079759  0.016653  0.007987  0.153071  0.105243   
2019-05-31 -0.126313 -0.080713 -0.079217 -0.066920 -0.089045 -0.051581   
2019-06-30  0.128719  0.064992 -0.023219  0.060190  0.053316  0.081318   

                  PG      TSLA       UNH       XOM  
Date                                                
2019-02-28  0.019761  0.040086 -0.105353  0.088640  
2019-03

In [None]:
# --- Run Fama-MacBeth Cross-Sectional Regressions ---

# I'll store the results (factor returns and residuals) in lists
factor_returns_list = []
residuals_list = []

# Add a constant to the exposure matrix (necessary for statsmodels).
# This acts as the regression intercept.
X_with_const = sm.add_constant(X)

# Loop through each month in our returns data
for date, returns_for_month in monthly_excess_returns.iterrows():
    # 'returns_for_month' is a Series of N stock returns for a single date (our Y variable)
    # 'X_with_const' is our N x K matrix of factor exposures (our X variables)
    
    # Drop any stocks with missing returns for this specific month
    returns_for_month.dropna(inplace=True)
    X_aligned = X_with_const.reindex(returns_for_month.index)
    
    # Run the cross-sectional regression for this month
    # We use OLS for simplicity. A real model would use GLS.
    model = sm.OLS(returns_for_month, X_aligned)
    results = model.fit()
    
    # Store the estimated coefficients (the factor returns, 'b')
    factor_returns_list.append(results.params)
    
    # Store the residuals (the specific returns, 'u')
    residuals_list.append(results.resid)

# Convert the lists of results into DataFrames
factor_returns = pd.DataFrame(factor_returns_list, index=monthly_excess_returns.index)
specific_returns = pd.DataFrame(residuals_list, index=monthly_excess_returns.index)

print("Fama-MacBeth regressions complete.")
print("\nTime Series of Factor Returns (b):")
print(factor_returns.head())
print("\nTime Series of Specific Returns (u):")
print(specific_returns.head())


Unnamed: 0_level_0,AAPL,AMZN,GOOGL,JNJ,JPM,MSFT,PG,TSLA,UNH,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-02-28,0.042977,-0.047706,-0.001214,0.03176,0.006509,0.075558,0.019761,0.040086,-0.105353,0.08864
2019-03-31,0.095126,0.084036,0.042785,0.021153,-0.031892,0.050854,0.053909,-0.127009,0.022804,0.020496
2019-04-30,0.054336,0.079759,0.016653,0.007987,0.153071,0.105243,0.028478,-0.149209,-0.059489,-0.008536
2019-05-31,-0.126313,-0.080713,-0.079217,-0.06692,-0.089045,-0.051581,-0.035627,-0.226366,0.035356,-0.110456
2019-06-30,0.128719,0.064992,-0.023219,0.06019,0.053316,0.081318,0.063694,0.205048,0.011809,0.081004
2019-07-31,0.074495,-0.016079,0.123146,-0.066949,0.043012,0.015344,0.081588,0.079323,0.018591,-0.031523
2019-08-31,-0.018061,-0.050074,-0.024314,-0.008497,-0.054531,0.013437,0.016953,-0.067822,-0.061879,-0.069224
2019-09-30,0.071162,-0.024533,0.023911,0.006146,0.069472,0.006687,0.032717,0.065839,-0.068702,0.029304
2019-10-31,0.109184,0.021975,0.02934,0.01906,0.068436,0.029717,0.00594,0.305927,0.161301,-0.044553
2019-11-30,0.076354,0.012387,0.034779,0.047289,0.053555,0.058262,-0.020877,0.046495,0.106319,0.019247


In [None]:
# Loop through each month in our returns data
for date, returns_for_month in monthly_excess_returns.iterrows():
    # 'returns_for_month' is a Series of N stock returns for a single date (our Y variable)
    # 'X_with_const' is our N x K matrix of factor exposures (our X variables)
    print(date)
    print(returns_for_month)
    print('next')

2019-02-28 00:00:00
AAPL     0.042977
AMZN    -0.047706
GOOGL   -0.001214
JNJ      0.031760
JPM      0.006509
MSFT     0.075558
PG       0.019761
TSLA     0.040086
UNH     -0.105353
XOM      0.088640
Name: 2019-02-28 00:00:00, dtype: float64
2019-03-31 00:00:00
AAPL     0.095126
AMZN     0.084036
GOOGL    0.042785
JNJ      0.021153
JPM     -0.031892
MSFT     0.050854
PG       0.053909
TSLA    -0.127009
UNH      0.022804
XOM      0.020496
Name: 2019-03-31 00:00:00, dtype: float64
2019-04-30 00:00:00
AAPL     0.054336
AMZN     0.079759
GOOGL    0.016653
JNJ      0.007987
JPM      0.153071
MSFT     0.105243
PG       0.028478
TSLA    -0.149209
UNH     -0.059489
XOM     -0.008536
Name: 2019-04-30 00:00:00, dtype: float64
2019-05-31 00:00:00
AAPL    -0.126313
AMZN    -0.080713
GOOGL   -0.079217
JNJ     -0.066920
JPM     -0.089045
MSFT    -0.051581
PG      -0.035627
TSLA    -0.226366
UNH      0.035356
XOM     -0.110456
Name: 2019-05-31 00:00:00, dtype: float64
2019-06-30 00:00:00
AAPL     0.1