In [1]:
import pandas as pd
import yfinance as yf

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.style.use('seaborn')

from add_indicators import bollinger_bands, rsi, macd, add_all_indicators

In [2]:
def show_all():
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', None)
    pd.set_option('display.max_colwidth', None)

def add_change(df):
    df['% Change'] = df['Close'].diff()
    df['% Change'] = df['% Change']/df['Close']
    df['% Change'] = df['% Change'].shift(periods=-1)
    
def add_volume(df):
    df['% Volume'] = df['Volume'].diff()
    df['% Volume'] = df['% Volume']/df['Volume']
    #df['% Volume'] = df['% Volume'].shift(periods=-1)

def preprocess(df):
    add_change(df)
    add_volume(df)
    add_all_indicators(df)
    return df.dropna()

def create_model(model, train_X, train_y):
    model.fit(train_X, train_y)
    return model

def run_predictions(name, model, val_X, val_y):
    model_predictions = model.predict(val_X)
    mae = mean_absolute_error(val_y, model_predictions)
    score = r2_score(val_y, model_predictions)
    return name, mae, score

def plot(start, end, df):
    plt.plot(df['% Change'].iloc[start:end], label='% Change')
    plt.plot(df['rfr'].iloc[start:end], label='rfr')
    plt.plot(df['xgb'].iloc[start:end], label='xgb')
    plt.title('Results')
    plt.legend()
    plt.show()
    
def show_results(results):
    for r in results:
        df[r[0]] = r[1].predict(df[features])
        print('Model: ', r[0])
        print('% MAE: ', r[2])
        print('Score: ', r[3])
        print()

In [5]:
ticker = yf.Ticker("spy")
df = ticker.history(period="1y", interval="1d")

df = preprocess(df)

#features = ['Close', 'bb_bbl', 'bb_bbh', 'rsi']
#features = ['bb_bbl', 'bb_bbh', 'rsi', 'macd', 'macd_diff', 'macd_signal']
#features = ['Close', 'bb_bbl', 'bb_bbh', 'rsi', 'macd', 'macd_diff', 'macd_signal', '% Volume']
'''
features = ['Close', '% Volume',
            'bb_bbh', 'bb_bbl', 'bb_avg', 'bb_bbh_ind', 'bb_bbl_ind',
            'bb_pband', 'bb_wband', 'rsi', 'macd', 'macd_diff', 'macd_signal',
            'don_h', 'don_l', 'don_m', 'don_p', 'don_w',
            'ema_9', 'sma_5', 'sma_10', 'sma_15', 'sma_30', 'sma_50']

features = ['Close',# '% Volume',
            'bb_bbh', 'bb_bbl', 'bb_avg',
            'macd', 'macd_signal',
            'don_h', 'don_l', 'don_m', 'don_p', 'don_w']
'''
features = ['Close', '% Volume',
            'bb_bbh', 'bb_bbl', 'bb_avg', 'bb_bbh_ind', 'bb_bbl_ind',
            'bb_pband', 'bb_wband', 'rsi', 'macd', 'macd_diff', 'macd_signal',
            'don_h', 'don_l', 'don_m', 'don_p', 'don_w',
            'ema_9', 'sma_5', 'sma_10', 'sma_15', 'sma_30', 'sma_50']

X = df[features].copy()
y = df['% Change'].copy()

train_X, val_X, train_y, val_y = train_test_split(X, y, random_state = 0)

In [7]:
from tpot import TPOTRegressor

tpot = TPOTRegressor(generations=15, population_size=100, verbosity=2, n_jobs=4)
tpot.fit(train_X, train_y)
print(tpot.score(val_X, val_y))
tpot.export('test_pipeline.py')

Optimization Progress:   0%|          | 0/1600 [00:00<?, ?pipeline/s]


Generation 1 - Current best internal CV score: -0.00014290829287786207

Generation 2 - Current best internal CV score: -0.00014290829287786207

Generation 3 - Current best internal CV score: -0.0001426016678061029

Generation 4 - Current best internal CV score: -0.0001426016678061029

Generation 5 - Current best internal CV score: -0.0001426016678061029

Generation 6 - Current best internal CV score: -0.0001426016678061029

Generation 7 - Current best internal CV score: -0.0001426016678061029

Generation 8 - Current best internal CV score: -0.0001426016678061029

Generation 9 - Current best internal CV score: -0.0001426016678061029

Generation 10 - Current best internal CV score: -0.0001426016678061029

Generation 11 - Current best internal CV score: -0.0001426016678061029

Generation 12 - Current best internal CV score: -0.0001426016678061029

Generation 13 - Current best internal CV score: -0.0001426016678061029

Generation 14 - Current best internal CV score: -0.0001426016678061029