<a href="https://colab.research.google.com/github/siv4gurunathan/trading_journal/blob/main/research_methods.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
df = pd.read_excel("Market return_updated.xlsx")

df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values('Date')
df.set_index('Date', inplace=True)

print("Data Loaded Successfully")
df['RF'] = df['RF'] / 100
etfs = ['XLE','XLF','XLP','XLK']
for etf in etfs:
    df[etf + '_excess'] = df[etf] - df['RF']

df['SPX_excess'] = df['SPX'] - df['RF']

print("Excess returns created")
capm_results = []

for etf in etfs:
    X = sm.add_constant(df['SPX_excess'])
    y = df[etf + '_excess']

    model = sm.OLS(y, X).fit(cov_type='HC3')

    capm_results.append({
        'ETF': etf,
        'Market_Beta': model.params['SPX_excess'],
        'p_value': model.pvalues['SPX_excess'],
        'R_squared': model.rsquared
    })

capm_df = pd.DataFrame(capm_results)

print("\n=== CAPM Results (Using SPX) ===")
print(capm_df)
results = []

for etf in etfs:
    X = df[['SPX_excess','SENT_ORTH']]
    X = sm.add_constant(X)
    y = df[etf + '_excess']

    model = sm.OLS(y, X).fit(cov_type='HC3')

    results.append({
        'ETF': etf,
        'Alpha': model.params['const'],
        'Market_Beta': model.params['SPX_excess'],
        'Behavioural_Beta': model.params['SENT_ORTH'],
        'Sentiment_pvalue': model.pvalues['SENT_ORTH'],
        'R_squared': model.rsquared
    })

betas_df = pd.DataFrame(results)

print("\n=== Behavioural Beta Results ===")
print(betas_df)
df_future = df.copy()

for etf in etfs:
    df_future[etf + '_lead'] = df_future[etf + '_excess'].shift(-1)

df_future = df_future.dropna()

predict_results = []

for etf in etfs:
    X = sm.add_constant(df_future['SENT_ORTH'])
    y = df_future[etf + '_lead']

    model = sm.OLS(y, X).fit(cov_type='HC3')

    predict_results.append({
        'ETF': etf,
        'Predict_Coeff': model.params['SENT_ORTH'],
        'p_value': model.pvalues['SENT_ORTH']
    })

predict_df = pd.DataFrame(predict_results)

print("\n=== Predictive Regression Results ===")
print(predict_df)
sorted_betas = betas_df.sort_values('Behavioural_Beta')

low_etf = sorted_betas.iloc[0]['ETF']
high_etf = sorted_betas.iloc[-1]['ETF']

print("\nLow behavioural beta ETF:", low_etf)
print("High behavioural beta ETF:", high_etf)

df['Spread'] = df[high_etf + '_excess'] - df[low_etf + '_excess']

spread_model = sm.OLS(df['Spread'], sm.add_constant(np.ones(len(df)))).fit(cov_type='HC3')

print("\n=== High-Low Spread Test ===")
print(spread_model.summary())
print("\nStandard Deviation of Sentiment:")
print(df['SENT_ORTH'].std())
df['High_Vol'] = (df['SPX_excess'].abs() > df['SPX_excess'].std()).astype(int)

for etf in etfs:
    df['Interaction'] = df['SENT_ORTH'] * df['High_Vol']

    X = sm.add_constant(df[['SPX_excess','SENT_ORTH','Interaction']])
    y = df[etf + '_excess']

    model = sm.OLS(y, X).fit(cov_type='HC3')
    print(etf)
    print(model.summary())
    ff_results = []

for etf in etfs:
    X = df[['SPX_excess','SMB','HML','RMW','CMA','SENT_ORTH']]
    X = sm.add_constant(X)
    y = df[etf + '_excess']

    model = sm.OLS(y, X).fit(cov_type='HC3')

    ff_results.append({
        'ETF': etf,
        'Sentiment_Coeff': model.params['SENT_ORTH'],
        'Sentiment_pvalue': model.pvalues['SENT_ORTH'],
        'R_squared': model.rsquared
    })

ff_df = pd.DataFrame(ff_results)

print("\n=== FF5 + SPX + Sentiment Robustness ===")
print(ff_df)




Data Loaded Successfully
Excess returns created

=== CAPM Results (Using SPX) ===
   ETF  Market_Beta       p_value  R_squared
0  XLE     1.059716  5.302877e-23   0.398879
1  XLF     1.180328  1.893576e-93   0.702324
2  XLP     0.547586  8.885279e-42   0.454607
3  XLK     1.284772  1.009251e-98   0.742609

=== Behavioural Beta Results ===
   ETF     Alpha  Market_Beta  Behavioural_Beta  Sentiment_pvalue  R_squared
0  XLE  0.001189     1.090227          0.012547          0.034946   0.411909
1  XLF -0.000646     1.192162          0.004867          0.157598   0.705106
2  XLP  0.002750     0.552486          0.002015          0.522289   0.456041
3  XLK  0.002507     1.269994         -0.006077          0.228401   0.746481

=== Predictive Regression Results ===
   ETF  Predict_Coeff   p_value
0  XLE       0.003148  0.629537
1  XLF      -0.005702  0.275771
2  XLP      -0.003492  0.290200
3  XLK      -0.016139  0.079060

Low behavioural beta ETF: XLK
High behavioural beta ETF: XLE

=== High-Low