In [1]:
import pandas as pd
import yfinance as yf

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.style.use('seaborn')

from add_indicators import bollinger_bands, rsi, macd, add_all_indicators

from sklearn import preprocessing

In [2]:
def show_all():
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', None)
    pd.set_option('display.max_colwidth', None)

def add_change(df):
    df['% Change'] = df['Close'].diff()
    df['% Change'] = df['% Change']/df['Close']
    df['% Change'] = df['% Change'].shift(periods=-1)
    
def add_volume(df):
    df['% Volume'] = df['Volume'].diff()
    df['% Volume'] = df['% Volume']/df['Volume']
    #df['% Volume'] = df['% Volume'].shift(periods=-1)

def preprocess(df):
    add_change(df)
    add_volume(df)
    add_all_indicators(df)
    return df.dropna()

def create_model(model, train_X, train_y):
    model.fit(train_X, train_y)
    return model

def run_predictions(name, model, val_X, val_y):
    model_predictions = model.predict(val_X)
    mae = mean_absolute_error(val_y, model_predictions)
    score = r2_score(val_y, model_predictions)
    return name, mae, score

def plot(start, end, df):
    plt.plot(df['% Change'].iloc[start:end], label='% Change')
    plt.plot(df['rfr'].iloc[start:end], label='rfr')
    plt.plot(df['xgb'].iloc[start:end], label='xgb')
    plt.title('Results')
    plt.legend()
    plt.show()
    
def show_results(results):
    for r in results:
        df[r[0]] = r[1].predict(df[features])
        print('Model: ', r[0])
        print('% MAE: ', r[2])
        print('Score: ', r[3])
        print()

In [23]:
ticker = yf.Ticker("spy")
df = ticker.history(period="1y", interval="1d")

df = preprocess(df)

#print(df.columns)
'''
print(df[['Close', 'ema_9']].head())
df[['Close', 'ema_9']] = preprocessing.StandardScaler().fit_transform(df[['Close', 'ema_9']])
print(df[['Close', 'ema_9']].head())
'''
print(df[['Close', 'ema_9']].head())
df[['Close', 'ema_9']] = preprocessing.StandardScaler().fit_transform(df[['Close', 'ema_9']])
print(df[['Close', 'ema_9']].head())

features = ['Close', '% Volume', 'rsi',
            'ema_9', 'sma_5', 'sma_10', 'sma_15', 'sma_30']

X = df[features].copy()
y = df['% Change'].copy()

                 Close       ema_9
Date                              
2020-05-08  288.766968  277.845413
2020-05-11  288.826202  278.942658
2020-05-12  283.069458  279.935156
2020-05-13  278.063110  280.249768
2020-05-14  281.390808  280.030360
               Close     ema_9
Date                          
2020-05-08 -1.807362 -1.991067
2020-05-11 -1.805324 -1.953432
2020-05-12 -2.003433 -1.919390
2020-05-13 -2.175718 -1.908599
2020-05-14 -2.061201 -1.916124


In [27]:
train_X, val_X, train_y, val_y = train_test_split(X, y, random_state = 0)

results = []
fitted_models = []
for n, model in [('rfr', RandomForestRegressor()), ('xgb', XGBRegressor())]:
    fitted_model = create_model(model, train_X, train_y)
    name, mae, score = run_predictions(n, fitted_model, val_X, val_y)
    results.append([name, fitted_model, mae, score])
    df[n] = fitted_model.predict(df[features])
    fitted_models.append(fitted_model)

show_results(results)

Model:  rfr
% MAE:  0.01055386842063081
Score:  -0.06793241113221127

Model:  xgb
% MAE:  0.010470797461438693
Score:  -0.07861419383316726

