In [2]:
import pandas as pd
vix_daily = pd.read_csv("Data/VIXCLS-2.csv", parse_dates=["observation_date"], index_col="observation_date")
vix_daily.rename(columns={"observation_date": "date"}, inplace=True)
vix_daily.rename(columns={"VIXCLS": "VIX"}, inplace=True)
vix = vix_daily.resample('ME').mean()
vix.index = vix.index.to_period('M').to_timestamp('M')
vix.index.name ="date"
returns_dataframe = pd.read_pickle('Data/hml_df.pkl')
returns = pd.DataFrame(returns_dataframe['hml_return'])
returns.index = pd.to_datetime(returns.index)
returns.index = returns.index.to_period('M').to_timestamp('M')

credit_spread_daily = pd.read_csv("Data/BAA10Y.csv", parse_dates=['observation_date'], index_col='observation_date')
credit_spread = credit_spread_daily.resample('ME').mean()
credit_spread.index = credit_spread.index.to_period('M')
credit_spread = credit_spread/100
credit_spread.index.name = 'date'
credit_spread.index = credit_spread.index.to_timestamp()
credit_spread.index = credit_spread.index.to_period('M').to_timestamp('M')
credit_spread.rename(columns={"BAA10Y": "Credit_Spread"}, inplace=True)


df = pd.concat([returns, vix, credit_spread], axis=1, join="inner")
df

Unnamed: 0_level_0,hml_return,VIX,Credit_Spread
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1990-01-31,-0.000529,23.347273,0.017386
1990-02-28,-0.003830,23.262632,0.016711
1990-03-31,-0.024993,20.062273,0.016182
1990-04-30,-0.017695,21.403500,0.015160
1990-05-31,-0.033597,18.097727,0.016532
...,...,...,...
2018-02-28,-0.008893,22.464737,0.016500
2018-03-31,0.006576,19.023810,0.017976
2018-04-30,0.012575,18.267619,0.018000
2018-05-31,-0.031393,14.124545,0.018477


In [4]:
import pandas as pd
import statsmodels.api as sm


# Define a high risk aversion dummy based on VIX (e.g., above median)
df['high_vix'] = (df['VIX'] > df['VIX'].median()).astype(int)

# Interaction term
df['value_x_vix'] = df['hml_return'] * df['high_vix']

# High credit spread dummy (above median)
df['high_cs'] = (df['Credit_Spread'] > df['Credit_Spread'].median()).astype(int)

# Interaction with value premium
df['value_x_cs'] = df['hml_return'] * df['high_cs']

# Regression: value premium on high_vix dummy and interaction
#X = sm.add_constant(df[['high_vix', 'value_x_vix']])
X = sm.add_constant(df[['high_vix', 'value_x_vix', 'high_cs', 'value_x_cs']])
y = df['hml_return']

model = sm.OLS(y, X).fit(cov_type="HC3")
print(model.summary())


                            OLS Regression Results                            
Dep. Variable:             hml_return   R-squared:                       0.810
Model:                            OLS   Adj. R-squared:                  0.807
Method:                 Least Squares   F-statistic:                     490.5
Date:                Sat, 05 Jul 2025   Prob (F-statistic):          4.41e-139
Time:                        21:16:03   Log-Likelihood:                 1032.9
No. Observations:                 342   AIC:                            -2056.
Df Residuals:                     337   BIC:                            -2037.
Df Model:                           4                                         
Covariance Type:                  HC3                                         
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.0040      0.001      2.909      

In [6]:
high_vix_returns = df.loc[df['high_vix'] == 1, 'hml_return']
low_vix_returns = df.loc[df['high_vix'] == 0, 'hml_return']

print("Mean Value Premium when VIX is High:", high_vix_returns.mean())
print("Mean Value Premium when VIX is Low:", low_vix_returns.mean())


Mean Value Premium when VIX is High: 0.0006650352689024049
Mean Value Premium when VIX is Low: 0.0036391369210514914


The average difference in raw returns between just “high VIX” vs. “low VIX” months (ignoring factor levels) is indeed very small (~0.1%).
However, the effect of the value premium on returns is much stronger in the high VIX regime.
In other words, risk aversion regimes don’t change the baseline returns much directly, but they amplify or dampen how much the value factor translates into actual returns.