In [17]:
#imports
from LSTM import LSTM
import DataPreparationLSTM
import os
import pandas as pd
from sklearn.model_selection import TimeSeriesSplit
import math

In [2]:
input_data = pd.read_csv('data/SPY_data_5min.csv')
input_data['time'] = pd.to_datetime(input_data['time'])

# Filter trades within the time window from 09:30 to 16:00
input_data = input_data[(input_data['time'].dt.time >= pd.to_datetime('09:30').time()) & 
                 (input_data['time'].dt.time <= pd.to_datetime('16:00').time())]

input_data.drop(['Unnamed: 0','money','open','high','low'], axis=1, inplace=True)
input_data['time'] = pd.to_datetime(input_data['time']).dt.strftime('%H:%M')


In [45]:
learning_rates = [0.01]
layer_one =[20,40]
layer_two = [20,40]
layer_three =[0]
layer_four = [0]

In [46]:
futures = [1, 5, 20]

In [47]:
lags = [20, 30]

In [48]:
sv = [False, True]

In [49]:
configs = []

for i in range(len(learning_rates)):
    for j in range(len(layer_one)):
        for k in range(len(layer_two)):
            for c in range(len(layer_three)):
                for f in range(len(layer_four)):
                    configs.append([learning_rates[i], layer_one[j], layer_two[k], layer_three[c], layer_four[f]])

In [None]:
models = {}

config_2 = []

for f in range(len(futures)):
    for l in range(len(lags)):
        for s in range(len(sv)):
            config_2.append([futures[f], lags[l], sv[s]])

for conf in config_2:
    prepared_data = DataPreparationLSTM.DataPreparationLSTM(
        df = input_data,
        future = conf[0],
        lag = conf[1],
        min_max_scaler=True,
        log_transform=True,
        semi_variance=conf[2],
        jump_detect=True,
        period_train= list(
            [
                pd.to_datetime("2006-01-01",format = "%Y-%m-%d"),
                pd.to_datetime("2010-12-31",format = "%Y-%m-%d"),
            ]
        ),
        period_validation = list(
            [
                pd.to_datetime("2011-01-01",format = "%Y-%m-%d"),
                pd.to_datetime("2015-12-31",format = "%Y-%m-%d"),  
            ]
        ),
        period_test= list(
            [
                pd.to_datetime("2016-01-01",format = "%Y-%m-%d"),
                pd.to_datetime("2018-12-31",format = "%Y-%m-%d"),
            ]
        ),
    )

    prepared_data.prepare_all()
    best_config = None
    best_params = None
    best_val = 0.0
    for (learning_rate,layer_one,layer_two,layer_three,layer_four) in configs:

        folds = prepared_data.splits.copy()
        errors = []
        for (x_train, y_train, validation_x, validation_y) in folds:
            model = LSTM(
                train_matrix = x_train,
                train_y = y_train,
                test_matrix= validation_x,
                test_y = validation_y,
                epochs = 70,
                learning_rate= learning_rate,
                layer_one= layer_one,
                layer_two= layer_two,
                layer_three=layer_three,
                layer_four= layer_four
            )
            
            model.train_lstm()

            model.predict_lstm()
            
            prediction_train = model.prediction_train
            prediction_test = model.prediction_test
        
            target_scaler = prepared_data.applied_scaler_targets
        
            prediction_train_normal = prepared_data.back_transformation(prediction_train)
            prediction_test_normal = prepared_data.back_transformation(prediction_test)
        
            train_y_normal = prepared_data.back_transformation(model.train_y)
            test_y_normal = prepared_data.back_transformation(model.test_y)
        
            model.prediction_train = prediction_train_normal
            model.prediction_test = prediction_test_normal
            model.test_y = test_y_normal
            model.train_y = train_y_normal
            
            model.make_accuracy_measures()

            errors.append([model.train_accuracy, model.test_accuracy])

            
        
        sum = 0.0
        for error in errors:
            train_acc = error[0]
            test_acc = error[1]
            sum += train_acc["RSquared"] + test_acc["RSquared"]

        sum /= len(errors)
        
        if sum > best_val:
            best_val = sum
            best_config = [learning_rate,layer_one,layer_two,layer_three,layer_four, conf[0] , conf[1], conf[2]]
            best_params = errors

    output_directory = "output/LSTM_outputdata"
    os.makedirs(output_directory,exist_ok=True)
    future = best_config[5]
    lag = best_config[6]
    sv = best_config[7]
    output_file_path = os.path.join(output_directory,f'LSTM_{future}_{lag}_{sv}.txt')
          
    with open(output_file_path, 'w') as output_file:
        output_file.write("Error rates : \n")
        output_file.write(f"{best_params} \n")
        output_file.write("Configuration : \n")
        output_file.write("Configuration : \n")
        output_file.write(f"Learning rate : {best_config[0]}\t")
        output_file.write(f"Layer 1 : {best_config[1]}\t")
        output_file.write(f"Layer 2 : {best_config[2]}\t")
        output_file.write(f"Layer 3 : {best_config[3]}\t")
        output_file.write(f"Layer 4 : {best_config[4]}\t\n")

