![QuantConnect Logo](https://cdn.quantconnect.com/web/i/icon.png)
<hr>

In [1]:
# Research notebook for random forest algorithm
# Adapted from Jansen 2020 
import warnings
warnings.filterwarnings('ignore')

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt  
from talib import RSI, BBANDS, MACD, NATR, ATR
from scipy.stats import spearmanr, norm
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.pipeline import Pipeline
from AlgorithmImports import *
import seaborn as sns
idx = pd.IndexSlice
qb = QuantBook()

In [5]:
symbols = {}
assets = ["SHY", "TLT", "SHV", "TLH", "EDV", "BIL",
          "SPTL", "TBT", "TMF", "TMV", "TBF", "VGSH", "VGIT",
          "VGLT", "SCHO", "SCHR", "SPTS", "GOVT", 'SPY']

for i in range(len(assets)):
    symbols[assets[i]] = qb.AddEquity(assets[i],Resolution.Minute).Symbol

qb.SetStartDate(2013, 1, 1)

# qb.AddUniverse(CoarseSelectionFunction, FineSelectionFunction)
df = qb.History(qb.Securities.Keys, 2*360, resolution=Resolution.Daily)
df.dropna(inplace=True)

df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,close,high,low,open,volume
symbol,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
SHY SGNKIKYGE9NP,2010-02-24,73.144832,73.171102,73.109805,73.127319,935580.0
SHY SGNKIKYGE9NP,2010-02-25,73.179859,73.197372,73.127319,73.136075,1205229.0
SHY SGNKIKYGE9NP,2010-02-26,73.241156,73.258669,73.214886,73.241156,820380.0
SHY SGNKIKYGE9NP,2010-02-27,73.267426,73.284939,73.223642,73.223642,1024528.0
SHY SGNKIKYGE9NP,2010-03-02,73.254276,73.28057,73.227982,73.245511,959268.0


In [6]:
# compute RSI 
df['rsi'] = df.groupby('symbol').close.apply(RSI)

In [7]:
# compute bollenger bands
def compute_bb(close):
    high, mid, low = BBANDS(np.log1p(close), timeperiod=20)
    return pd.DataFrame({'bb_high': high, 'bb_mid': mid, 'bb_low': low}, index=close.index)
bb_df = df.groupby('symbol').close.apply(compute_bb)
bb_df.head() 
df = pd.concat([df, bb_df]) 


In [42]:
def compute_atr(stock_data):
    atr = ATR(stock_data.high, stock_data.low, stock_data.close, timeperiod=14)
    return atr.sub(atr.mean()).div(atr.std())
by_symbol = df.groupby('symbol', group_keys=False)
atr_series = by_symbol.apply(compute_atr)
df['atr'] = atr_series.to_frame().drop_duplicates()
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,close,high,low,open,volume,rsi,bb_high,bb_mid,bb_low,atr
symbol,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
SHY SGNKIKYGE9NP,2010-02-24,73.144832,73.171102,73.109805,73.127319,935580.0,,,,,
SHY SGNKIKYGE9NP,2010-02-25,73.179859,73.197372,73.127319,73.136075,1205229.0,,,,,
SHY SGNKIKYGE9NP,2010-02-26,73.241156,73.258669,73.214886,73.241156,820380.0,,,,,
SHY SGNKIKYGE9NP,2010-02-27,73.267426,73.284939,73.223642,73.223642,1024528.0,,,,,
SHY SGNKIKYGE9NP,2010-03-02,73.254276,73.28057,73.227982,73.245511,959268.0,,,,,
