In [1]:
import pandas as pd
import yfinance as yf

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.style.use('seaborn')

from add_indicators import bollinger_bands, rsi, macd, add_all_indicators

In [2]:
def show_all():
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', None)
    pd.set_option('display.max_colwidth', None)

def add_change(df):
    df['% Change'] = df['Close'].diff()
    df['% Change'] = df['% Change']/df['Close']
    df['% Change'] = df['% Change'].shift(periods=-1)

# shows volume change from previous period to current period, lagging
def add_volume(df):
    df['% Volume'] = df['Volume'].diff()
    df['% Volume'] = df['% Volume']/df['Volume']
    #df['% Volume'] = df['% Volume'].shift(periods=-1)

def preprocess(df):
    add_change(df)
    add_volume(df)
    add_all_indicators(df)
    return df.dropna()

def create_model(model, train_X, train_y):
    model.fit(train_X, train_y)
    return model

def run_predictions(name, model, val_X, val_y):
    model_predictions = model.predict(val_X)
    mae = mean_absolute_error(val_y, model_predictions)
    score = r2_score(val_y, model_predictions)
    return name, mae, score

def plot(start, end, df):
    plt.plot(df['% Change'].iloc[start:end], label='% Change')
    plt.plot(df['rfr'].iloc[start:end], label='rfr')
    plt.plot(df['xgb'].iloc[start:end], label='xgb')
    plt.title('Results')
    plt.legend()
    plt.show()
    
def show_results(results):
    for r in results:
        df[r[0]] = r[1].predict(df[features])
        print('Model: ', r[0])
        print('% MAE: ', r[2])
        print('Score: ', r[3])
        print()

In [3]:
ticker = yf.Ticker("spy")
df = ticker.history(period="1y", interval="1d")

df = preprocess(df)

features = ['Close', '% Volume', 'rsi',
            'ema_9', 'sma_5', 'sma_10', 'sma_15', 'sma_30']

X = df[features].copy()
y = df['% Change'].copy()

X_train, X_val, y_train, y_val = train_test_split(X, y, random_state = 0)

In [7]:
model = XGBRegressor(objective='reg:squarederror')
model.fit(X_train, y_train)

model_predictions = model.predict(X_val)
mae = mean_absolute_error(y_val, model_predictions)
score = r2_score(y_val, model_predictions)

print('MAE: {}'.format(mae))
print('R2: {}'.format(score))

MAE: 0.009588287908595172
R2: 0.1360563233722275
