# Machine Learning Techniques: Fixed Income ETF

In [1]:
#technical indicators

In [2]:
#libraries needed for my research 
import pandas_datareader.data as web
import datetime as dt
import math
import numpy as np
import pandas as pd
import ta
import sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.svm import SVR
import matplotlib.pyplot as plt
from ta.utils import dropna
import warnings
warnings.filterwarnings('ignore')

In [3]:
#Technical Indicators mainly for fixed income etfs (AGG, LQD, JNK)
ticker = input('Ticker: ')
end = dt.datetime.now()
start = end - dt.timedelta(days=365*15)
df = web.DataReader(ticker,'yahoo',start,end)
data1 = ta.add_all_ta_features(df, "Open", "High", "Low", "Close", "Volume", fillna=True).reset_index()
#shifting adjusted close one day down for forecasting reasons: I want to predict tomorrows prices using sklearn
#data['forecast'] = data['Adj Close']

Ticker: AGG


In [4]:
data1.columns

Index(['Date', 'High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close',
       'volume_adi', 'volume_obv', 'volume_cmf', 'volume_fi', 'momentum_mfi',
       'volume_em', 'volume_sma_em', 'volume_vpt', 'volume_nvi', 'volume_vwap',
       'volatility_atr', 'volatility_bbm', 'volatility_bbh', 'volatility_bbl',
       'volatility_bbw', 'volatility_bbp', 'volatility_bbhi',
       'volatility_bbli', 'volatility_kcc', 'volatility_kch', 'volatility_kcl',
       'volatility_kcw', 'volatility_kcp', 'volatility_kchi',
       'volatility_kcli', 'volatility_dcl', 'volatility_dch', 'trend_macd',
       'trend_macd_signal', 'trend_macd_diff', 'trend_sma_fast',
       'trend_sma_slow', 'trend_ema_fast', 'trend_ema_slow', 'trend_adx',
       'trend_adx_pos', 'trend_adx_neg', 'trend_vortex_ind_pos',
       'trend_vortex_ind_neg', 'trend_vortex_ind_diff', 'trend_trix',
       'trend_mass_index', 'trend_cci', 'trend_dpo', 'trend_kst',
       'trend_kst_sig', 'trend_kst_diff', 'trend_ichimoku_conv',
       '

In [5]:
#correlation matrix
correlationMatrix = data1.corr()
correlationMatrix = correlationMatrix[correlationMatrix>.1]
sig = correlationMatrix['Adj Close'].dropna()
sig.sort_values(ascending=False)

Adj Close                  1.000000
momentum_kama              0.892984
trend_ema_slow             0.892919
trend_ema_fast             0.892014
High                       0.891081
trend_sma_fast             0.890554
volatility_kcc             0.890525
Close                      0.890511
others_cr                  0.890511
volatility_kch             0.890500
volatility_bbm             0.890278
Open                       0.890067
trend_sma_slow             0.890062
volume_vwap                0.890056
volatility_bbh             0.889391
Low                        0.889194
trend_ichimoku_conv        0.888356
trend_ichimoku_a           0.887795
volatility_kcl             0.887718
trend_ichimoku_base        0.883792
trend_ichimoku_b           0.877243
volatility_bbl             0.874731
trend_visual_ichimoku_a    0.864260
trend_visual_ichimoku_b    0.852307
volume_nvi                 0.831937
volume_obv                 0.831189
volume_adi                 0.797612
trend_psar_down            0

In [6]:
print("COVARIANCE MATRIX")
covarianceMatrix = data1.cov()
covarianceMatrix = covarianceMatrix[covarianceMatrix > 1]
sigCov = (covarianceMatrix['Adj Close'].dropna())
sigCov.sort_values(ascending = False)

COVARIANCE MATRIX


volume_obv                 1.849398e+09
volume_adi                 3.382252e+08
Volume                     2.013170e+07
volume_fi                  1.827121e+05
volume_vpt                 6.181377e+03
volume_nvi                 1.223460e+03
Adj Close                  2.149561e+02
trend_cci                  1.260245e+02
trend_aroon_ind            9.482125e+01
trend_psar_up              6.490456e+01
volatility_kcl             6.191417e+01
Low                        6.183496e+01
Close                      6.121145e+01
Open                       6.110816e+01
trend_ichimoku_conv        6.099643e+01
trend_psar_down            6.089284e+01
volatility_kcc             6.086062e+01
momentum_kama              6.084841e+01
High                       6.076618e+01
volume_vwap                6.076500e+01
trend_ema_fast             6.073421e+01
trend_sma_fast             6.071679e+01
volatility_bbl             6.068842e+01
trend_ichimoku_a           6.059800e+01
volatility_bbm             6.040041e+01


In [7]:
end = dt.datetime.now()
start = end - dt.timedelta(days=365*15)
df = web.DataReader(ticker,'yahoo',start,end)
data = ta.add_all_ta_features(df, "Open", "High", "Low", "Close", "Volume", fillna=True).reset_index()
data = data[['Date','trend_ichimoku_base','volume_obv','momentum_kama','trend_ema_slow','volume_vpt','Open','High','Low','Close','Volume','Adj Close']]
data['forecast'] = data['Adj Close'].shift(-1)
data['Daily Return'] = (data['forecast'].pct_change())*100
data['target'] = int(False)
data.loc[data['Daily Return'] > 0, 'target']=int(True)
data

Unnamed: 0,Date,trend_ichimoku_base,volume_obv,momentum_kama,trend_ema_slow,volume_vpt,Open,High,Low,Close,Volume,Adj Close,forecast,Daily Return,target
0,2005-07-21,101.975002,135000.0,101.800003,101.800003,-6162.681776,102.150002,102.160004,101.790001,101.800003,135000.0,62.432251,62.524311,,0
1,2005-07-22,101.955002,260100.0,101.861283,101.811114,-5939.768776,101.949997,102.129997,101.750000,101.949997,125100.0,62.524311,62.524311,0.000000,0
2,2005-07-25,101.959999,357500.0,101.897368,101.821401,184.324518,101.949997,102.169998,101.779999,101.949997,97400.0,62.524311,62.591789,0.107923,1
3,2005-07-26,101.959999,470500.0,101.964208,101.839075,121.923191,101.879997,102.059998,101.879997,102.059998,113000.0,62.591789,62.554951,-0.058855,0
4,2005-07-27,101.959999,397500.0,101.978701,101.850996,79.009005,101.910004,102.080002,101.900002,102.000000,73000.0,62.554951,62.738960,0.294157,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3769,2020-07-13,117.655003,648154800.0,118.180740,117.944708,-4103.569365,118.459999,118.529999,118.370003,118.480003,4803200.0,118.480003,118.669998,0.160360,1
3770,2020-07-14,117.790001,652141500.0,118.233363,117.998434,5987.936622,118.580002,118.739998,118.580002,118.669998,3986700.0,118.669998,118.680000,0.008429,1
3771,2020-07-15,118.005001,655545100.0,118.274191,118.048920,6679.954938,118.500000,118.699997,118.500000,118.680000,3403600.0,118.680000,118.760002,0.067410,1
3772,2020-07-16,118.040001,659072000.0,118.372427,118.101593,2664.346119,118.830002,118.889999,118.739998,118.760002,3526900.0,118.760002,118.845001,0.071572,1


In [8]:
df = data.drop(['Date','Daily Return','Adj Close','forecast'], axis=1)
df = df.dropna()
df

Unnamed: 0,trend_ichimoku_base,volume_obv,momentum_kama,trend_ema_slow,volume_vpt,Open,High,Low,Close,Volume,target
0,101.975002,135000.0,101.800003,101.800003,-6162.681776,102.150002,102.160004,101.790001,101.800003,135000.0,0
1,101.955002,260100.0,101.861283,101.811114,-5939.768776,101.949997,102.129997,101.750000,101.949997,125100.0,0
2,101.959999,357500.0,101.897368,101.821401,184.324518,101.949997,102.169998,101.779999,101.949997,97400.0,1
3,101.959999,470500.0,101.964208,101.839075,121.923191,101.879997,102.059998,101.879997,102.059998,113000.0,0
4,101.959999,397500.0,101.978701,101.850996,79.009005,101.910004,102.080002,101.900002,102.000000,73000.0,1
...,...,...,...,...,...,...,...,...,...,...,...
3769,117.655003,648154800.0,118.180740,117.944708,-4103.569365,118.459999,118.529999,118.370003,118.480003,4803200.0,1
3770,117.790001,652141500.0,118.233363,117.998434,5987.936622,118.580002,118.739998,118.580002,118.669998,3986700.0,1
3771,118.005001,655545100.0,118.274191,118.048920,6679.954938,118.500000,118.699997,118.500000,118.680000,3403600.0,1
3772,118.040001,659072000.0,118.372427,118.101593,2664.346119,118.830002,118.889999,118.739998,118.760002,3526900.0,1


In [9]:
train,test = sklearn.model_selection.train_test_split(df, test_size=0.2, random_state = 222)

In [10]:
train,val = sklearn.model_selection.train_test_split(train, test_size=0.2, random_state = 222)

In [11]:
trainy = train['target']
del train['target']
valy = val['target']
del val['target']
testy = test['target']
del test['target']

In [12]:
#Support Vector Machine 
from sklearn import svm
model1 = sklearn.svm.SVC()
model1.fit(train,trainy)
model1.score(val,valy)

0.5016556291390728

In [13]:
#Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
model2 = sklearn.ensemble.RandomForestClassifier()
model2.fit(train,trainy)
model2.score(val,valy)

0.5132450331125827

In [14]:
#Logistic Regression
from sklearn.linear_model import LogisticRegression
model3 = sklearn.linear_model.LogisticRegression()
model3.fit(train,trainy)
model3.score(val,valy)

0.5033112582781457

In [15]:
#scores of each model (generally: higher the more accurate)
mod1prob = model1.score(test,testy)
mod2prob = model2.score(test,testy)
mod3prob = model3.score(test,testy)
print(model1.score(test,testy))
print(model2.score(test,testy))
print(model3.score(test,testy))

0.5602649006622517
0.5099337748344371
0.5721854304635762


In [16]:
#columns to use below to predict tomorrow's return (whether it will be a gain (1) or a loss (0))
p1 = data1[val.columns[0]].iloc[-1]
p2 = data1[val.columns[1]].iloc[-1]
p3 = data1[val.columns[2]].iloc[-1]
p4 = data1[val.columns[3]].iloc[-1]
p5 = data1[val.columns[4]].iloc[-1]
p6 = data1[val.columns[5]].iloc[-1]
p7 = data1[val.columns[6]].iloc[-1]
p8 = data1[val.columns[7]].iloc[-1]
p9 = data1[val.columns[8]].iloc[-1]
p10 = data1[val.columns[9]].iloc[-1]

prediction = model3.predict([[p1,p2,p3,p4,p5,p6,p7,p8,p9,p10]])
#prediction = model3.predict([[p1,p2,p3,p4,p5,p6,p7,p8,p9]])
#prediction = model3.predict([[p1,p2,p3,p4]])
if prediction == 1:
    print(f'{ticker} => Buy: There is a {mod3prob.round(4)*100}% confidence of having a positive daily return.')
else:
    print(f'{ticker} => Sell: There is a {mod3prob.round(4)*100}% confidence of having a negative daily return.')

AGG => Buy: There is a 57.220000000000006% confidence of having a positive daily return.


# Fixed Income ETF Screener

In [18]:
from bs4 import BeautifulSoup as bs
import requests

In [19]:
##stocks that have a 1 or 2 (buy) average recommendation from analysts usually beat the market 
#this is a little different but grouped the top rated according to ETFdb Free Trial
screener = pd.read_excel('fixed_income_etf.xlsx')
screener = screener[['Symbol','YTD Price Change','ETFdb Pro Tools']]
screener = screener.sort_values('ETFdb Pro Tools')
#gets the etfs with the highest rating of A- or higher 
etfs = screener.head(80)

In [20]:
start = '2019-01-01'
end = '2020-01-01'
failed = []
passed = []
data = pd.DataFrame()
for ticker in etfs[['Symbol']].values:
    try:
        data[ticker] = web.DataReader(ticker, data_source= "yahoo", start = start, end=end)["Adj Close"]
        passed.append(ticker)
    except (IOError, KeyError):
        msg = 'Failed to read symbol: {0!r}, replacing with NaN.'
        failed.append(ticker)

In [21]:
#AQR study on momentum:
#suggested that securities with a positive return from the previos year usually repeats in the next year
#dataframe screener for all the stocks that have had positive returns in the past time window
data = data.dropna(axis=1)
returns = ((data.iloc[-1] - data.iloc[0])/data.iloc[0])*100
returns = pd.DataFrame(returns)
returns.columns.name = 'Ticker'
returns = returns[(returns[0] > 0)]
returns = returns.sort_values(0,ascending=False)
returns

Ticker,0
UJB,28.786511
ILTB,19.122348
BLV,18.600559
PCY,17.658411
LQD,17.005247
...,...
SCHO,3.486927
SHY,3.368372
MINT,3.335264
SHV,2.351239


In [22]:
def fundamental_metric(soup, metric):
    return soup.find(text = metric).find_next(class_='snapshot-td2').text

def get_fundamental_data(df_fund):
    for symbol in df_fund.index:
        try:
            url = ("http://finviz.com/quote.ashx?t=" + symbol)
            headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}
            soup = bs(requests.get(url, headers=headers).content)            
            for m in df_fund.columns:                
                df_fund.loc[symbol,m] = fundamental_metric(soup,m)                
        except Exception as e:
            print (symbol, 'not found')
    return df_fund

In [23]:
stock_list = returns.index.values
metric = ['Price','Dividend','RSI (14)','SMA20','SMA50','SMA200','Perf YTD','ATR']

In [24]:
df_fund = pd.DataFrame(index=stock_list,columns=metric)
df_fund = get_fundamental_data(df_fund)
df_fund = df_fund.sort_values('RSI (14)')
df_fund['SMA20'] = df_fund['SMA20'].str.replace('%', '')
df_fund['SMA50'] = df_fund['SMA50'].str.replace('%', '')
df_fund['SMA200'] = df_fund['SMA200'].str.replace('%', '')
df_fund['Perf YTD'] = df_fund['Perf YTD'].str.replace('%', '')
df_fund = df_fund.apply(pd.to_numeric, errors='coerce')

EMSH not found


# THESE BUY/LONG AND SELL/SHORT SIGNALS ARE JUST FOR RSI

# Can put the tickers in the ML models at the top of the notebook

In [25]:
df_fund = df_fund.sort_values('RSI (14)',ascending = True).dropna(axis='index')
buy = df_fund.index.values[0:10]
sell = df_fund.index.values[-11:-1]
print(f'Possible Buy: {buy}')
print(f'Possible Sell/Short: {sell}')
df_fund

Possible Buy: ['VMBS' 'IBDL' 'MBB' 'SCHO' 'VGSH' 'SHV' 'SHY' 'IBDM' 'SCHR' 'HYLD']
Possible Sell/Short: ['PZT' 'LQD' 'TFI' 'BIV' 'MUB' 'ILTB' 'MINT' 'BLV' 'PZA' 'ITM']


Unnamed: 0,Price,Dividend,RSI (14),SMA20,SMA50,SMA200,Perf YTD,ATR
VMBS,54.28,1.37,39.76,-0.16,-0.34,1.02,2.07,0.09
IBDL,25.26,0.60,45.40,-0.08,-0.15,-0.03,-0.16,0.05
MBB,110.68,2.75,46.84,-0.01,-0.18,1.29,2.40,0.16
SCHO,51.59,1.00,47.70,0.00,-0.05,1.03,2.20,0.02
VGSH,62.17,1.19,48.35,0.00,-0.03,1.05,2.20,0.03
...,...,...,...,...,...,...,...,...
MINT,101.70,2.31,77.48,0.09,0.44,0.50,0.09,0.05
BLV,114.98,3.42,77.69,2.39,4.36,9.28,14.68,0.75
PZA,26.85,0.72,78.10,0.86,1.72,1.99,1.28,0.07
ITM,51.38,1.10,81.57,0.68,1.72,2.67,2.23,0.14


In [26]:
#RSI (14) can be a buy signal (lower numbers "usually lower than 30" indicate a buy signal bc security is oversold)
#can plug these tickers up top in the machine learning models for next trading day prediction
###EXAMPLE
buy[0]

'VMBS'