In [68]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy.stats import mstats
import os

In [69]:
# --- Step 1: Load Data ---
file_path = os.getcwd() + '/data/Factor_data.xlsx'
df = pd.read_excel(file_path, sheet_name='ws_fund', index_col=0)
df = df.tail(36).copy()

"""
# Winsorize and standardize factors
for col in ['Quality', 'Momentum', 'Value', 'Low_vol', 'Market']:
    df[col] = mstats.winsorize(df[col], limits=[0.05, 0.05])
    df[col] = (df[col] - df[col].mean()) / df[col].std()
"""

"\n# Winsorize and standardize factors\nfor col in ['Quality', 'Momentum', 'Value', 'Low_vol', 'Market']:\n    df[col] = mstats.winsorize(df[col], limits=[0.05, 0.05])\n    df[col] = (df[col] - df[col].mean()) / df[col].std()\n"

In [70]:
market_neutral_factors = {}

for factor in ['Quality', 'Momentum', 'Value', 'Low_vol']:
    # Prepare regression variables
    x_market = sm.add_constant(df['Market'])
    y_factor = df[factor]

    
    # Run regression
    model = sm.OLS(y_factor, x_market).fit()
    
    # Get residuals (market-neutral factor returns)
    market_neutral_factors[factor] = model.resid

# Add market-neutral factors to DataFrame
for factor in market_neutral_factors:
    df[f'{factor}_mn'] = market_neutral_factors[factor]

# Now use ['Quality_mn', 'Momentum_mn', 'Value_mn', 'Low_vol_mn'] as your factors in further analysis

print(df.columns)

Index(['Net Return', 'Benchmark Return', 'Quality', 'Momentum', 'Value',
       'Low_vol', 'Market', 'Quality_mn', 'Momentum_mn', 'Value_mn',
       'Low_vol_mn'],
      dtype='object')


In [71]:
# --- Step 3: Prepare Regression Variables ---
# X = df[['Quality_mn', 'Momentum_mn', 'Value_mn', 'Low_vol_mn']] #, 'Market']]
X = df[['Quality', 'Momentum', 'Value', 'Low_vol']] 
y = df['Net Return']  # or df['Benchmark Return']

# Add constant for intercept
X = sm.add_constant(X)

# --- Step 4: Run Regression ---
model = sm.OLS(y, X).fit()
print(model.summary())

# --- Step 5: Get Betas (Factor Sensitivities) ---
betas = model.params.drop('const')
print(betas)

                            OLS Regression Results                            
Dep. Variable:             Net Return   R-squared:                       0.087
Model:                            OLS   Adj. R-squared:                 -0.031
Method:                 Least Squares   F-statistic:                    0.7391
Date:                Mon, 15 Sep 2025   Prob (F-statistic):              0.573
Time:                        00:28:51   Log-Likelihood:                 135.44
No. Observations:                  36   AIC:                            -260.9
Df Residuals:                      31   BIC:                            -253.0
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0097      0.001      8.978      0.0

In [72]:
# --- Step 6: Calculate Factor Variances and Covariances ---
#factor_returns = df[['Quality_mn', 'Momentum_mn', 'Value_mn', 'Low_vol_mn']] #, 'Market']]
factor_returns = df[['Quality', 'Momentum', 'Value', 'Low_vol']] 
factor_cov = factor_returns.cov()
factor_var = factor_returns.var()

factor_cov

Unnamed: 0,Quality,Momentum,Value,Low_vol
Quality,0.002098,0.002402,0.002119,0.001406
Momentum,0.002402,0.003658,0.002803,0.00177
Value,0.002119,0.002803,0.00319,0.001678
Low_vol,0.001406,0.00177,0.001678,0.001283


In [73]:
# --- Step 7: Calculate Risk Attribution ---
# Portfolio variance explained by factors
risk_contributions = {}
for i, factor in enumerate(factor_returns.columns):
    # Variance term
    var_term = betas[factor]**2 * factor_var[factor]
    # Covariance terms
    cov_term = 0
    for j, other_factor in enumerate(factor_returns.columns):
        if i != j:
            cov_term += betas[factor] * betas[other_factor] * factor_cov.loc[factor, other_factor]
    # Total contribution
    risk_contributions[factor] = var_term + cov_term

# Residual (unexplained) risk
residual_var = model.resid.var()

In [74]:
# --- Step 8: Summarize Results ---
total_risk = sum(risk_contributions.values()) + residual_var

print("\nRisk Attribution (Absolute):")
for factor, contribution in risk_contributions.items():
    print(f"{factor}: {contribution:.6f} ({contribution/total_risk:.2%} of total risk)")
print(f"Residual: {residual_var:.6f} ({residual_var/total_risk:.2%} of total risk)")
print(f"Total Portfolio Variance: {total_risk:.6f}")


Risk Attribution (Absolute):
Quality: 0.000000 (0.05% of total risk)
Momentum: 0.000000 (0.81% of total risk)
Value: 0.000002 (5.38% of total risk)
Low_vol: 0.000001 (2.48% of total risk)
Residual: 0.000033 (91.29% of total risk)
Total Portfolio Variance: 0.000036


In [75]:
# --- Step 9: Attribution Calculation ---
betas = model.params.drop('const')
alpha = model.params['const']
avg_factors = X.drop('const', axis=1).mean()

# Factor contributions
factor_contributions = betas * avg_factors

# Alpha contribution
alpha_contribution = alpha

# Residual (unexplained) contribution
residuals = model.resid
residual_contribution = residuals.mean()

# Total (should match average net return)
total_contribution = factor_contributions.sum() + alpha_contribution + residual_contribution

# --- Step 6: Display Results ---
print("Return Attribution (last 36 rows):")
print(f"Alpha: {alpha_contribution:.6%}")
for factor in betas.index:
    print(f"{factor}: {factor_contributions[factor]:.6%}")
print(f"Residual: {residual_contribution:.6%}")
print(f"Total: {total_contribution:.6%} (should match average Net Return: {y.mean():.6%})")

Return Attribution (last 36 rows):
Alpha: 0.973041%
Quality: 0.021804%
Momentum: -0.036270%
Value: 0.117367%
Low_vol: -0.077844%
Residual: 0.000000%
Total: 0.998097% (should match average Net Return: 0.998097%)
