In [None]:
# Based on "Stock Market Forecasting Using Machine Learning Algorithms"
# by Shunrong Shen, Haomiao Jiang, Tongda Zhang
# https://pdfs.semanticscholar.org/b68e/8d2f4d2c709bb5919b82effcb6a7bbd3db37.pdf
# Data from yahoo.com, macrotrends.net, investing.com (2001-01-01 to 2020-01-24)

# Get Data from SQL Server
import pandas as pd
import numpy as np
import pyodbc 
import matplotlib.pyplot as plt
import sklearn.decomposition
import random
from sklearn.model_selection import train_test_split

NUM_DAYS = 30

connection = pyodbc.connect("Driver={SQL Server Native Client 11.0};"
                      "Server=DESKTOP-2JHG1EA\\SQLEXPRESS;"
                      "Database=Sandbox;"
                      "Trusted_Connection=yes;")
df = pd.read_sql(
    'SELECT * FROM [Sandbox].[dbo].[PredictionDataSetSp500] ORDER BY [FeatureDate]', 
    connection)

# Save some random periods of data for profit return % calculation
def get_random_period(df):
    row_count = df.shape[0]
    random_index = random.randint(1, row_count - NUM_DAYS)
    random_period = df[random_index: random_index + NUM_DAYS] 
    before_period = df[0:random_index - 1]
    after_period = df[random_index + 1 + NUM_DAYS:row_count]
    df = before_period.append(after_period, ignore_index=True)
    return df, random_period

df, random_period_1 = get_random_period(df)
df, random_period_2 = get_random_period(df)
df, random_period_3 = get_random_period(df)
df, random_period_4 = get_random_period(df)
df, random_period_5 = get_random_period(df)

# Split data
def get_x_and_y(df):
    y = df[['Y_Index_GSPC']]
    y = np.where(df['Y_Index_GSPC'] > 0, 1, 0)
    X = df[['X_Index_SSMI', 'X_Index_N225', 'X_Index_AXJO', 'X_Index_HSI', 'X_Index_N100', 'X_Index_FTSE', 'X_Index_GDAXI']]
    return X, y
    
X, y = get_x_and_y(df)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Use AutoML
from tpot import TPOTClassifier

model = TPOTClassifier(verbosity=2, n_jobs=-1, config_dict='TPOT light')
model.fit(X_train, y_train)

In [None]:
print('Score', model.score(X_test, y_test))
print('')

# Calculate Profit Return
def print_profit_return(model, random_period, period):
    X_random_period, y_random_period = get_x_and_y(random_period) 
    predictions = model.predict(X_random_period)
    capital_initial = 10000
    capital_actual = capital_initial 
    capital_model = capital_initial
    for index, prediction in enumerate(predictions):
        percent_change = random_period.iloc[index]['Y_Index_GSPC']
        capital_actual += capital_actual * percent_change  
        if(prediction == 1):
            capital_model += capital_model * percent_change
    
    model_thirty_day_return = round(((capital_model/capital_initial) - 1) * 100, 2)
    print(period, 'Results:')
    print('Actual Capital: $' + str(round(capital, 2)))
    print('Actual Returns Monthly: ' + str(thirty_day_return) + '%')
    print('Model Capital: $' + str(round(capital, 2)))
    print('Model Returns Monthly: ' + str(model_thirty_day_return) + '%')
    print('')

print_profit_return(model, random_period_1, 'Period 1')
print_profit_return(model, random_period_2, 'Period 2')
print_profit_return(model, random_period_3, 'Period 3')
print_profit_return(model, random_period_4, 'Period 4')
print_profit_return(model, random_period_5, 'Period 5')
