In [45]:
! pip install pymssql
! pip install  SQLAlchemy==1.4.17



##  Momentum-Based 150/50 strategy.
This strategy capitalizes on historical momentum and volatility measures, targeting stocks expected to continue their trend based on past performance.

**Selected Features:**
- **`mom12m`**: 12-month momentum, to capture long-term upward or downward trends in stock prices.
- **`idiovol`**: Idiosyncratic return volatility, to measure the risk associated with each stock that is not explained by market movements.
- **`betasq`**: Beta squared, to account for the non-linear effects of market movements on stock prices, providing a measure of systematic risk.
- **`retvol`**: Return volatility, giving a straightforward measure of total stock return variability.
- **`bm`**: Book-to-market ratio, used as a value indicator, where higher values might indicate undervalued stocks.

This version of the 150/50 strategy focuses on leveraging momentum indicators combined with volatility measures to choose stocks

In [46]:
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.preprocessing import QuantileTransformer, OneHotEncoder
from sklearn.compose import make_column_transformer, ColumnTransformer
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression
from pandas_datareader.data import DataReader
import statsmodels.formula.api as smf
import plotly.graph_objects as go


In [47]:
# Database Connection Setup
server = "mssql-82792-0.cloudclusters.net:16272"
username = "user"
password = "RiceOwls1912"
database = "ghz"
connection_string = f"mssql+pymssql://{username}:{password}@{server}/{database}"
conn = create_engine(connection_string).connect()

In [48]:
# Data Retrieval
query = """
SELECT date, ticker, mom12m, idiovol, betasq, retvol, bm, siccd, ret
FROM data
ORDER BY date, ticker
"""
df = pd.read_sql(query, conn)
df['date'] = pd.to_datetime(df['date'])
df = df.dropna()
df.set_index(['date', 'ticker'], inplace=True)

df.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,mom12m,idiovol,betasq,retvol,bm,siccd,ret
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2000-01-01,AA,0.783092,0.04591,0.413558,0.024615,0.43876,3334,-0.160392
2000-01-01,AABC,0.077586,0.054532,0.33024,0.051763,1.107577,6020,-0.029528
2000-01-01,AAC,0.241379,0.084462,5.146754,0.0139,1.893766,6153,0.056338
2000-01-01,AACE,0.095833,0.051541,0.74486,0.020628,0.33986,6090,-0.067568
2000-01-01,AAG,-0.294565,0.027907,0.132426,0.014818,0.703278,3675,-0.100694


In [49]:
# Feature Engineering
qt = QuantileTransformer(output_distribution='normal')
features = ['mom12m', 'idiovol', 'betasq', 'retvol', 'bm']
df["actual"] = df.ret

grouped = df.groupby("date", group_keys=False)
df[features+["ret"]] = grouped[features+["ret"]].apply(
  lambda d:
    pd.DataFrame(
      qt.fit_transform(d),
      columns=d.columns,
      index=d.index
    )
)


In [50]:
# get industry
inds = pd.read_csv("siccodes12.csv", index_col="industry")
ind_names = inds.index.unique().to_list()

def industry(sic):
  try:
    return inds[(inds.start<=sic)&(sic<=inds.end)].index[0]
  except:
    return "Other"

codes = pd.Series({code: industry(code) for code in df.siccd.unique()})
codes = pd.DataFrame(codes).reset_index()
codes.columns = ["siccd", "industry"]

df = df.reset_index().merge(codes, on="siccd")
df = df.set_index(["date", "ticker"])

features.append("industry")

df

Unnamed: 0_level_0,Unnamed: 1_level_0,mom12m,idiovol,betasq,retvol,bm,siccd,ret,actual,industry
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2000-01-01,AA,1.062898,-0.705817,-0.608728,-0.675865,-0.231823,3334,-1.162112,-0.160392,Manufacturing
2000-01-01,KLU,0.717119,0.011258,0.789798,0.474865,-0.598009,3334,-1.574704,-0.219512,Manufacturing
2000-01-01,MXM,-0.468278,-1.129827,-0.384658,-0.986843,-2.097024,3334,-1.329090,-0.182216,Manufacturing
2000-02-01,AA,1.159989,-0.706181,-0.615931,-1.019566,-0.230006,3334,-0.193957,-0.013453,Manufacturing
2000-02-01,KLU,0.802226,0.017631,0.793211,0.409666,-0.597599,3334,-0.475214,-0.052083,Manufacturing
...,...,...,...,...,...,...,...,...,...,...
2022-02-01,LDOS,-0.733008,-0.955518,-0.400652,-0.462821,-0.394491,9711,1.253396,0.138513,Other
2022-03-01,LDOS,-0.013781,-0.933381,-0.437733,0.040330,-0.389770,9711,0.597236,0.064218,Other
2021-03-01,SYX,1.135179,0.168609,-1.247953,0.836376,-0.745520,5046,1.090863,0.143490,Shops
2021-04-01,SYX,0.478346,0.216044,-1.103486,-0.909831,-0.738148,5046,0.260943,0.038911,Shops


In [51]:
# Define the Model
transform1 = make_column_transformer(
    (OneHotEncoder(), ["industry"]),
    remainder="passthrough"
)
transform2 = PolynomialFeatures(degree=2)
model = LinearRegression(fit_intercept=False)
pipe = make_pipeline(transform1, transform2, model)

In [52]:
dates = ["2005-01", "2010-01", "2015-01", "2020-01", "3000-01"]
predictions = None

for train_date, end_date in zip(dates[:-1], dates[1:]):

  fltr1 = df.index.get_level_values("date") < train_date
  fltr2 = df.index.get_level_values("date") < end_date
  train = df[fltr1]
  test = df[~fltr1 & fltr2]

  Xtrain = train[features]
  ytrain = train["ret"]
  Xtest = test[features]
  ytest = test["ret"]

  pipe.fit(Xtrain, ytrain)
  print('Train set score: ' + str(pipe.score(Xtrain, ytrain)))

  pred = pipe.predict(Xtest)
  pred = pd.Series(pred, index=test.index)
  predictions = pd.concat((predictions, pred))
  print('Test set score: ' + str(pipe.score(Xtest,ytest)))

predict_df = pd.DataFrame({'pred_ret':predictions})



Train set score: 0.012012442517548072
Test set score: 0.001746569132200615
Train set score: 0.009313560535481269
Test set score: 0.005705647380521706
Train set score: 0.00881012584166696
Test set score: 0.01039237729520015
Train set score: 0.00930926387607156
Test set score: 0.005391262878212899


In [53]:
def get_stock_returns(predict_df, actual_returns_df, num_stocks, best=True):
    """Function to get the mean returns of the best or worst stocks based on prediction."""
    # Group by date and rank stocks based on their predicted returns
    grouped = predict_df.groupby("date", group_keys=False)
    if best:
        # If best is True, rank descending and pick the top stocks
        ranks = grouped.rank(ascending=False, method="first")
    else:
        # Otherwise, rank ascending and pick the bottom stocks
        ranks = grouped.rank(ascending=True, method="first")

    # Select stocks based on the ranks
    selected_stocks = predict_df[ranks <= num_stocks].dropna()

    # Merge with actual returns to calculate the returns
    actual_ret = pd.concat([selected_stocks, actual_returns_df], axis=1, join='inner')
    actual_ret.columns = ["pred_ret", "ret"]

    # Calculate average returns by date
    mean_rets = actual_ret.groupby("date").ret.mean()

    # Format the index to display just year and month
    mean_rets.index = pd.to_datetime(mean_rets.index)
    mean_rets.index = mean_rets.index.strftime('%Y-%m')

    return mean_rets

# Assuming predict_df contains the prediction and df['actual'] contains actual returns
long_num_stocks = 100
short_num_stocks = 100

# Get the best and worst stock returns
best_rets = get_stock_returns(predict_df, df["actual"], long_num_stocks, best=True)
worst_rets = get_stock_returns(predict_df, df["actual"], short_num_stocks, best=False)

In [54]:
import yfinance as yf
from pandas.tseries.offsets import BMonthEnd

import datetime as dt


spy = yf.download("SPY", start=2017)["Adj Close"]
spy = pd.DataFrame(spy)
spy_monthly = spy.resample('M').last().pct_change()
spy_monthly.index = spy_monthly.index.strftime('%Y-%m')
spy_monthly.columns = ['Monthly Returns']

rets = pd.concat([spy_monthly, best_rets, worst_rets], axis=1, join='inner')
rets.columns = ["spy", "best", "worst"]

rets["150/50"] = 1.5*rets.best - 0.5*rets.worst
rets

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0,spy,best,worst,150/50
2005-01,-0.022420,-0.009738,-0.154537,0.062662
2005-02,0.020904,0.039748,-0.051752,0.085499
2005-03,-0.018292,-0.001664,-0.093340,0.044173
2005-04,-0.018736,-0.019529,-0.129736,0.035574
2005-05,0.032225,0.043681,0.030732,0.050155
...,...,...,...,...
2021-11,-0.008035,-0.011630,-0.135818,0.050465
2021-12,0.046248,0.061546,-0.116578,0.150608
2022-01,-0.052741,-0.043053,-0.174039,0.022441
2022-02,-0.029517,-0.006553,0.005350,-0.012504


In [55]:
from pandas_datareader import DataReader as pdr


ff = pdr("F-F_Research_Data_Factors", "famafrench", start=2005)[0]/100
ff.index = ff.index.astype(str)
mkt = ff["Mkt-RF"] + ff["RF"]
rf = ff["RF"]

df_sharpe = pd.concat((rets['150/50'], mkt, rf), axis=1)
df_sharpe.columns = ["ret", "mkt", "rf"]


The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.


The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.



In [56]:
xret = df_sharpe.ret - df_sharpe.rf
print(f"Annualized Sharpe ratio of Momentum-Based 150-50 strategy is {np.sqrt(12)*xret.mean()/xret.std():.2%}")
print(f'Mean return of Momentum-Based 150-50 strategy is, {xret.mean():.4}')
print(f'Std dev of Momentum-Based 150-50 strategy is, {xret.std():.4}')

Annualized Sharpe ratio of Momentum-Based 150-50 strategy is 73.71%
Mean return of Momentum-Based 150-50 strategy is, 0.01232
Std dev of Momentum-Based 150-50 strategy is, 0.05789


In [57]:
import plotly.graph_objects as go


trace1 = go.Scatter(
    x = rets.index.to_list(),
    y = (1+rets['150/50']).cumprod(),
    mode="lines",
    name="150-50 strategy return",
    hovertemplate="%{x}<br>accumulation = $%{y:.2f}<extra></extra>"
)

trace2 = go.Scatter(
    x = rets.index.to_list(),
    y = (1+rets['spy']).cumprod(),
    mode="lines",
    name="SPY 500 return",
    hovertemplate="%{x}<br>accumulation = $%{y:.2f}<extra></extra>"
)

fig = go.Figure(trace1)
fig.add_trace(trace2)
fig.update_layout(
    title="Comparison of Momentum-Based 150-50 Strategy vs SPY 500 Returns",
    yaxis_title="",
    xaxis_title_font = {"size":18},
    template="plotly_white",
    yaxis_tickprefix="$",
    yaxis_tickformat=".2f",
    height=600,
    width=1000,
    legend=dict(
      x = 0.01,
      y = 0.99
    ),
    font_size=16
)
fig.show()

In [58]:
# Jensen's alpha
ff = pdr("F-F_Research_Data_Factors", "famafrench", start=2005)[0]/100
ff.index = ff.index.astype(str)
mkt_rf = ff["Mkt-RF"]
rf = ff["RF"]

df_ja = pd.concat((rets['150/50'], mkt_rf, rf), axis=1)
df_ja.columns = ["ret", "mkt_rf", "rf"]
df_ja["ret_rf"] = df_ja.ret - df_ja.rf
df_ja["mkt"] = df_ja.mkt_rf + df_ja.rf
df_ja.index.name = "date"
df_ja = df_ja.reset_index()
df_ja = df_ja.dropna()

import statsmodels.formula.api as smf


result = smf.ols("ret_rf~mkt_rf", df_ja).fit()
beta = result.params["mkt_rf"]
mkt = df_ja.rf + beta*df_ja.mkt_rf
active = df_ja.ret - mkt

# Extracting the alpha (intercept) from the model summary
#This value represents the Jensen's  alpha for the 150/50 portfolio, indicating its performance above the expected return based on the Fama-French factors model
alpha_ja = result.params['Intercept']
print(f'Jensen\'s alpha of Momentum-Based 150-50 strategy is {alpha_ja:.3}')

Jensen's alpha of Momentum-Based 150-50 strategy is 0.00806



The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.


The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.



In [59]:
trace1 = go.Scatter(
    x = df_ja.date,
    y = (1+df_ja.ret).cumprod(),
    name = "150-50 strategy total",
    hovertemplate="%{x}<br>total=$%{y:.2f}<extra></extra>"
)
trace2 = go.Scatter(
    x = df_ja.date,
    y = (1+active).cumprod(),
    name = "150-50 strategy active",
    hovertemplate="%{x}<br>active=$%{y:.2f}<extra></extra>"
)
trace3 = go.Scatter(
    x = df_ja.date,
    y = (1+mkt).cumprod(),
    name = "150-50 strategy market",
    hovertemplate="%{x}<br>market=$%{y:.2f}<extra></extra>"
)
fig = go.Figure()
for trace in [trace1, trace3, trace2]:
    fig.add_trace(trace)

fig.update_layout(
    title="Momentum-Based 150-50 Strategy Accumulation",
    yaxis_title="Accumulation",
    xaxis_title_font = {"size":18},
    template="plotly_white",
    yaxis_tickprefix="$",
    yaxis_tickformat=".2f",
    height=600,
    width=1000,
    legend=dict(
      x = 0.01,
      y = 0.99
    ),
    font_size=16
)

fig.show()

In [60]:
# Fama-French alpha
df_ff = pdr("F-F_Research_Data_5_Factors_2x3", "famafrench", start=2005)[0]/100
df_ff.index = df_ff.index.astype(str)
df_ff["ret"] = rets['150/50']
df_ff["ret_rf"] = df_ff.ret - df_ff.RF
df_ff = df_ff.dropna()
df_ff.index = df_ff.index.astype(str)
df_ff.index.name = "date"
df_ff = df_ff.reset_index()
df_ff = df_ff.rename(columns={"Mkt-RF": "mkt_rf", "RF": "rf"})

result_ff = smf.ols("ret_rf~mkt_rf+SMB+HML+CMA+RMW", df_ff).fit()
betas_ff = result_ff.params[1:]
mkt_ff = df_ff.rf + betas_ff[0]*df_ff.mkt_rf
smb = betas_ff[1]*df_ff.SMB
hml = betas_ff[2]*df_ff.HML
cma = betas_ff[3]*df_ff.CMA
rmw = betas_ff[4]*df_ff.RMW
active_ff = df_ff.ret - mkt_ff - smb - hml - cma - rmw

# Extracting the alpha (intercept) from the model summary
#This value represents the Fama-French alpha for the 150/50 portfolio, indicating its performance above the expected return based on the Fama-French factors model
alpha_ff = result_ff.params['Intercept']
print(f'Fama-French alpha of Momentum-Based 150-50 strategy is {alpha_ff:.3}')

Fama-French alpha of Momentum-Based 150-50 strategy is 0.00385



The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.


The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.

