In [None]:
# Based on "Stock Market Forecasting Using Machine Learning Algorithms"
# by Shunrong Shen, Haomiao Jiang, Tongda Zhang
# https://pdfs.semanticscholar.org/b68e/8d2f4d2c709bb5919b82effcb6a7bbd3db37.
# Data from yahoo.com and investing.com
# !pip install TPOT

# ===== 0 Utils and Consts
import time
import csv
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tpot import TPOTClassifier

# =====

PATH_ROOT = r'C:/Users/Motohiro/Downloads/'
PATH_FILE = PATH_ROOT + 'world_SPY_a.csv'
PATH_EXPORT = PATH_ROOT + 'model_world_SPY_a_v3.py'

# =====

# Utils
start_time = time.time()
def watch_restart():
    global start_time
    start_time = time.time()
def watch_print(title):
    global start_time
    print(title,round(time.time() - start_time, 4), 'seconds')

In [None]:
watch_restart()

# ===== 1.0 Get Data from CSV
df = pd.read_csv(PATH_FILE)
print(df.columns)

watch_print('Get Data')

In [None]:
# Split data
def get_x_and_y(df):
  y = np.where(df['YSpydiffActual'] > 0, 1, 0)
  X = df[['XFtsediffNorm',
          'XStoxxdiffNorm',
          'XGdaxidiffNorm',
          'XSsmidiffNorm',
          'XN225diffNorm', 
          'XAxjodiffNorm', 
          'XHsidiffNorm']]
          # 'XSensexdiffNorm']] 
  return X, y

X, y = get_x_and_y(df)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print('X', X.head())
print('y', y)
print('X_train shape', X_train.shape)
print('X_test shape', X_test.shape)
print('y_train shape', y_train.shape)
print('y_test shape', y_test.shape)

In [None]:
model = TPOTClassifier(
    generations=10000,
    verbosity=2,
    n_jobs=-1, 
    config_dict='TPOT light')
model.fit(X_train, y_train)
print('Score', model.score(X_test, y_test))

In [None]:
model.export(PATH_EXPORT)