In [60]:
!pip install torch pandas sklearn yfinance boto3




In [49]:
import numpy as np
import argparse

from models.GRU_model import train as gru_train, predict as gru_predict
from models.LSTM_model import train as lstm_train, predict as lstm_predict
from dataset_QQQ import Data as dataset_QQQ, Config as Config_QQQ
from dataset_top10 import Data as dataset_top10, Config as Config_top10

In [50]:
def main(config):

    if config.dataset_type == "qqq":
        dataset = dataset_QQQ(config)
    elif config.dataset_type == "top10":
        dataset = dataset_top10(config)

    if config.do_train:
        train_X, valid_X, train_Y, valid_Y = dataset.get_train_and_valid_data()
        if config.model_type == "gru":
            gru_train(config, [train_X, train_Y, valid_X, valid_Y])
        elif config.model_type == "lstm":
            lstm_train(config, [train_X, train_Y, valid_X, valid_Y])

    if config.do_predict:
        test_X = dataset.get_test_data()
        if config.model_type == "gru":
            pred_result = gru_predict(config, test_X)
        elif config.model_type == "lstm":
            pred_result = lstm_predict(config, test_X)
    
    label_data = dataset.data[dataset.train_num + dataset.start_num_in_test : ,
                                            config.label_in_feature_index]
    predict_data = pred_result * dataset.std[config.label_in_feature_index] + \
                dataset.mean[config.label_in_feature_index]

    label_name = [dataset.data_column_name[i] for i in config.label_in_feature_index]
    label_column_num = len(config.label_columns)

    loss = np.mean((label_data[config.predict_day:] - predict_data[:-config.predict_day] ) ** 2, axis=0)
    loss_norm = loss/(dataset.std[config.label_in_feature_index] ** 2)
    print("The mean squared error of stock {} is ".format(label_name) + str(loss_norm))

    label_X = range(dataset.data_num - dataset.train_num - dataset.start_num_in_test)
    predict_X = [ x + config.predict_day for x in label_X]

    for i in range(label_column_num):
        print("The predicted stock {} for the next {} day(s) is: ".format(label_name[i], config.predict_day) + str(np.squeeze(predict_data[-config.predict_day:, i])))
 

In [51]:
config = Config_top10()
config.dataset_type = "top10"
config_dict = {}

for key in dir(config):
    if not key.startswith("_"):
        config_dict[key] = getattr(config, key)
for key, value in config_dict.items():
    print(key, ' : ', value)

main(config)

batch_size  :  64
data_save_path  :  ./dataset/
dataset_type  :  top10
do_continue_train  :  False
do_predict  :  True
do_train  :  True
do_train_visualized  :  False
dropout_rate  :  0.2
epoch  :  100
etf  :  QQQ
feature_columns  :  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
hidden_size  :  128
input_size  :  22
label_columns  :  [20, 21]
label_in_feature_index  :  [20, 21]
layers  :  2
learning_rate  :  0.001
model_save_path  :  ./saved_model/
model_type  :  gru
output_size  :  2
patience  :  5
predict_day  :  1
random_seed  :  42
shuffle_train_data  :  True
tickers  :  ['AAPL', 'MSFT', 'AMZN', 'TSLA', 'GOOG', 'FB', 'GOOGL', 'NVDA', 'PYPL', 'ADBE', 'QQQ', 'QQQ']
time_step  :  20
train_data_rate  :  0.95
valid_data_rate  :  0.15
Epoch 0/100
The train loss is 0.166469. The valid loss is 0.047024.
Epoch 1/100
The train loss is 0.036278. The valid loss is 0.021532.
Epoch 2/100
The train loss is 0.020114. The valid loss is 0.013128.
Epoch 3/100
The trai

In [59]:
import boto3

bucket = "stockpricestorage"
s3 = boto3.resource('s3')
s3.meta.client.upload_file('./requirements.txt', bucket, 'requirements.txt')