In [1]:
#Libraries
import pandas as pd
import random
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import pandas_datareader.data as web
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
import warnings
warnings.filterwarnings('ignore')

#XMPT is a municipal bond etf that is mainly made up of Nuveen Muni funds

#Main ETF function to get the neccessary long term data
def etf(ticker='xmpt',end=dt.datetime.now(),start = '2000-01-01'): 
    etf = web.DataReader(ticker,'yahoo',start,end)[['Close','High','Low']]
    etf['Target'] = etf['Close'].shift(-1)
    etf['sm15'] = etf['Close'].rolling(window=15).mean()
    etf = etf.drop('Close',axis=1)
    etf = etf.dropna()
    return etf

#Macro possible drivers of fixed income performance
def macro(end=dt.datetime.now(),start = '2000-01-01'):
    symbols_list = 'CPIAUCSL','DGS10','FEDFUNDS','USD3MTD156N','TEDRATE','UNRATE','PAYEMS','IC4WSA','VIXCLS','BAMLCC0A0CMTRIV','BAMLHYH0A0HYM2TRIV'
    symbols = []
    for ticker in symbols_list: 
        try:
            r = web.DataReader(ticker,'fred',start,end)
            r[f'{ticker}'] = r[f'{ticker}']
            r1 = r[[f'{ticker}']]
            symbols.append(r1)
        except:
            msg = 'Failed to read symbol: {0!r}, replacing with NaN.'

    df = pd.concat(symbols, sort=False, axis=1)
    df = df.fillna(method='ffill')
    df = df.dropna()
    return df

#Function to combine the ETF and macro driver dataframes
def combine(etf,df):
    data = pd.concat([etf,df],axis=1)
    data = data.dropna()
    return data

def main(df,ticker):
    #resampling data based on quarterly
    df = df.resample('M',convention='start').asfreq()
    df = df.dropna(inplace=False)
    #splitting the data set into train, test, and validation
    train,test = sklearn.model_selection.train_test_split(df, test_size=0.2, random_state = 222)
    train,val = sklearn.model_selection.train_test_split(train, test_size=0.2, random_state = 222)
    trainy = train['Target']
    del train['Target']
    valy = val['Target']
    del val['Target']
    testy = test['Target']
    del test['Target']
    #AdaBoost Regressor
    from sklearn.ensemble import AdaBoostRegressor
    from sklearn.datasets import make_regression
    model = AdaBoostRegressor(random_state=0, n_estimators=100)
    model.fit(train, trainy)
    score = model.score(val,valy)
    modprob = model.score(test,testy)
    #prediction
    pred = web.DataReader('ILTB','yahoo','2000-01-01',dt.datetime.now())[['High','Low','Close']]
    pred['sm15'] = pred['Close'].rolling(window=15).mean()
    pred = pred.drop('Close',axis=1)
    pred = pred.resample('M').first()
    pred = pred.resample('M',convention='start').asfreq()
    pred = pred.dropna(inplace=False)
    #High
    p1 = pred.iloc[-1][0]
    #Low
    p2 = pred.iloc[-1][1]
    #15 Day Moving Average
    p3 = pred.iloc[-1][2]
    #3 month LIBOR, 3 month libor to treasury spread, unemployment rate, nonfarm employees
    #4 week moving average of initial claims, VIX, BAML US Corporate Index, BAML US HY Index
    symbols_list = 'CPIAUCSL','DGS10','FEDFUNDS','USD3MTD156N','TEDRATE','UNRATE','PAYEMS','IC4WSA','VIXCLS','BAMLCC0A0CMTRIV','BAMLHYH0A0HYM2TRIV'
    symbols = []
    for ticker in symbols_list: 
        try:
            r = web.DataReader(ticker,'fred','2020-01-01',dt.datetime.now())
            r[f'{ticker}'] = r[f'{ticker}']
            r1 = r[[f'{ticker}']]
            symbols.append(r1)
        except:
            msg = 'Failed to read symbol: {0!r}, replacing with NaN.'

    macro = pd.concat(symbols, sort=False, axis=1)
    macro = macro.fillna(method='ffill')
    macro = macro.dropna()
    p4,p5,p6,p7 = macro.iloc[-1][0],macro.iloc[-1][1],macro.iloc[-1][2],macro.iloc[-1][3]
    p8,p9,p10,p11 = macro.iloc[-1][4],macro.iloc[-1][5],macro.iloc[-1][6],macro.iloc[-1][7]
    p12,p13,p14 = macro.iloc[-1][8],macro.iloc[-1][9],macro.iloc[-1][10]
    prediction = model.predict([[p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13,p14]])
    print1 = (f'Model Validation Data Score: {score}')
    print2 = (f'Model Test Data Score: {modprob}')
    print3 = (f'Next Close Price: ${prediction[0]}')
    return print1, print2, print3,df

ticker='XMPT'
main_df = combine(etf = etf(ticker,end=dt.datetime.now(),start = '2000-01-01'), 
        df = macro(end=dt.datetime.now(),start = '2000-01-01')) 
df = main_df
if __name__ == '__main__':
    main(df, ticker)
    
main(df, ticker)

('Model Validation Data Score: 0.9713301859488833',
 'Model Test Data Score: 0.9908084353877905',
 'Next Close Price: $29.60600051879883',
                  High        Low     Target       sm15  CPIAUCSL  DGS10  \
 2011-08-31  25.610001  25.450001  25.350000  25.028666   226.106   2.23   
 2011-09-30  25.809999  25.809999  25.809999  25.580000   226.597   1.92   
 2011-10-31  25.790001  25.730000  25.809999  25.553333   226.750   2.17   
 2011-11-30  26.260000  26.260000  26.200001  26.036667   227.169   2.08   
 2012-01-31  27.879999  27.879999  28.080000  27.460000   227.842   1.83   
 ...               ...        ...        ...        ...       ...    ...   
 2020-04-30  24.379999  24.150000  24.400000  24.902666   255.902   0.64   
 2020-06-30  26.290001  26.110001  26.240000  26.151333   257.214   0.66   
 2020-07-31  27.459999  27.370001  27.500000  27.054667   258.723   0.55   
 2020-08-31  27.150000  26.809999  27.180000  27.333333   259.681   0.72   
 2020-09-30  27.000000  2