In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import yfinance as yf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.layers import SimpleRNN, LSTM, GRU
from tensorflow.keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from itertools import product

import warnings
warnings.filterwarnings('ignore')
import random

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

seed = 1999
np.random.seed(seed)
tf.random.set_seed(seed)
random.seed(seed)
keras.utils.set_random_seed(seed)

In [None]:
def get_ready_data(dataname='eurusd_tech', n_past=5, val_size=252, test_size=30):
    '''
    This function imports the data given the dataname given as a parameter and will then modify the dataframe
    to something that the Machine Learning model can understand. Other parameters are avaialble:
        * n_past: how much past data should the prediction be based on.
        * validation_size: how much portion of the dataframe should the model be validated on.
        * testing_size: on how much portion of the dataframe should the model be tested on.
    
    Output:
        * scaler, X_train, y_train, X_val, y_val, X_test, y_test
    '''
    typedata = dataname.split('_')[1]

    if typedata == 'tech':
        dataframe = pd.read_csv(f'data/technical_df/{dataname}.csv').iloc[:,1:]
    elif typedata == 'macro':
        dataframe = pd.read_csv(f'data/macro_df/{dataname}.csv').iloc[:,1:]
    elif typedata == 'both':
        dataframe = pd.read_csv(f'data/both_df/{dataname}.csv').iloc[:,1:]
    else:
        raise ValueError('Check the dataname.')

    dataset = dataframe.values
    dataset = dataset.astype('float32')

    train_size = len(dataset) - (test_size + val_size)

    test_df = dataset[-test_size:]
    val_df = dataset[train_size:train_size+val_size]
    train_df = dataset[:train_size]

    scaler = MinMaxScaler(feature_range=(0, 1))
    scaler.fit(train_df)

    train_df_scaled = scaler.fit_transform(train_df)
    val_df_scaled = scaler.fit_transform(val_df)
    test_df_scaled = scaler.fit_transform(test_df)

    def df_to_X_y(dataset, n_past=5):
        dataX, dataY = [], []
        for i in range(len(dataset)-n_past-1):
            a = dataset[i:(i+n_past), :]
            dataX.append(a)
            dataY.append(dataset[i + n_past, 0])
        return np.array(dataX), np.array(dataY)

    X_train, y_train = df_to_X_y(train_df_scaled)
    X_val, y_val = df_to_X_y(val_df_scaled)
    X_test, y_test = df_to_X_y(test_df_scaled)

    return scaler, X_train, y_train, X_val, y_val, X_test, y_test

def descaled(arr):
    '''
    This function descales the scaled data in order to have the real values.
    '''
    extended = np.zeros((len(arr), X_train.shape[2]))
    extended[:, 0] = arr
    return scaler.inverse_transform(extended)[:, 0]

In [None]:
def RNN_model(dataname, epochs_list, batch_size_list, learning_rate_list, units_list, units2_list, activation1_list, activation2_list):
    model = 'RNN'
    typedata = dataname.split('_')[1]
    
    total_combinations = len(epochs_list) * len(batch_size_list) * len(learning_rate_list) * len(units_list) * len(units2_list) * len(activation1_list) * len(activation2_list)
    it = 1
    results = []

    scaler, X_train, y_train, X_val, y_val, X_test, y_test = get_ready_data(dataname)

    for epochs, batch_size, learning_rate, units, units2, activation1, activation2 in product(epochs_list, batch_size_list, 
                                                                                     learning_rate_list, units_list, units2_list,
                                                                                     activation1_list, activation2_list):
        print(f'Iteration {it}/{total_combinations}:')
        model_rnn = Sequential()
        model_rnn.add(SimpleRNN(units, activation=activation1, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
        model_rnn.add(SimpleRNN(units2, activation=activation2, return_sequences=False))
        model_rnn.add(Dense(1))
        optimizer = Adam(learning_rate=learning_rate)
        model_rnn.compile(optimizer=optimizer, loss='mean_squared_error')

        history_rnn = model_rnn.fit(
        X_train,
        y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(X_val, y_val),
        verbose=0)

        y_pred_rnn = model_rnn.predict(X_test)
        y_pred_rnn = np.delete(descaled(y_pred_rnn.flatten()),0)
        y_test_rnn = np.delete(descaled(y_test),-1)

        mse = mean_squared_error(y_test_rnn, y_pred_rnn)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(y_test_rnn, y_pred_rnn)

        results.append({
            'epochs': epochs,
            'batch_size': batch_size,
            'learning_rate': learning_rate,
            'units': units,
            'units2': units2,
            'activation1': activation1,
            'activation2': activation2,
            'mse': mse,
            'rmse': rmse,
            'mae': mae
        })

        it += 1
        
    return results

In [None]:
epochs_list=[50, 100]
batch_size_list=[16, 32, 64]
learning_rate_list=[0.001, 0.01, 0.1]
units_list=[16, 32, 64]
units2_list=[16, 32, 64]
activation1_list=['relu', 'sigmoid', 'tanh']
activation2_list=['relu', 'sigmoid', 'tanh']

In [None]:
dataname = 'eurusd_tech'
scaler, X_train, y_train, X_val, y_val, X_test, y_test = get_ready_data(dataname)

tech_rnn = RNN_model(
    dataname,
    epochs_list,
    batch_size_list,
    learning_rate_list,
    units_list,
    units2_list,
    activation1_list,
    activation2_list)

tech_rnn_df = pd.DataFrame(tech_rnn).sort_values(by='mse')
tech_rnn_df.to_csv('data/hyper_parameters/tech_rnn_df.csv')

In [None]:
dataname = 'eurusd_macro'
scaler, X_train, y_train, X_val, y_val, X_test, y_test = get_ready_data(dataname)

macro_rnn = RNN_model(
    dataname,
    epochs_list,
    batch_size_list,
    learning_rate_list,
    units_list,
    units2_list,
    activation1_list,
    activation2_list)

macro_rnn_df = pd.DataFrame(macro_rnn).sort_values(by='mse')
macro_rnn_df.to_csv('data/hyper_parameters/macro_rnn_df.csv')

In [None]:
dataname = 'eurusd_both'
scaler, X_train, y_train, X_val, y_val, X_test, y_test = get_ready_data(dataname)

both_rnn = RNN_model(
    dataname,
    epochs_list,
    batch_size_list,
    learning_rate_list,
    units_list,
    units2_list,
    activation1_list,
    activation2_list)

both_rnn_df = pd.DataFrame(both_rnn).sort_values(by='mse')
both_rnn_df.to_csv('data/hyper_parameters/both_rnn_df.csv')

In [None]:
def LSTM_model(dataname, epochs_list, batch_size_list, learning_rate_list, units_list, units2_list, activation1_list, activation2_list):
    model = 'LSTM'
    typedata = dataname.split('_')[1]
    
    total_combinations = len(epochs_list) * len(batch_size_list) * len(learning_rate_list) * len(units_list) * len(units2_list) * len(activation1_list) * len(activation2_list)
    it = 1
    results = []

    scaler, X_train, y_train, X_val, y_val, X_test, y_test = get_ready_data(dataname)

    for epochs, batch_size, learning_rate, units, units2, activation1, activation2 in product(epochs_list, batch_size_list, 
                                                                                     learning_rate_list, units_list, units2_list,
                                                                                     activation1_list, activation2_list):
        print(f'Iteration {it}/{total_combinations}:')
        model_lstm = Sequential()
        model_lstm.add(LSTM(units, activation=activation1, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
        model_lstm.add(LSTM(units2, activation=activation2, return_sequences=False))
        model_lstm.add(Dense(1))
        optimizer = Adam(learning_rate=learning_rate)
        model_lstm.compile(optimizer=optimizer, loss='mean_squared_error')

        history_lstm = model_lstm.fit(
        X_train,
        y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(X_val, y_val),
        verbose=0)

        y_pred_lstm = model_lstm.predict(X_test)
        y_pred_lstm = np.delete(descaled(y_pred_lstm.flatten()),0)
        y_test_lstm = np.delete(descaled(y_test),-1)

        mse = mean_squared_error(y_test_lstm, y_pred_lstm)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(y_test_lstm, y_pred_lstm)

        results.append({
            'epochs': epochs,
            'batch_size': batch_size,
            'learning_rate': learning_rate,
            'units': units,
            'units2': units2,
            'activation1': activation1,
            'activation2': activation2,
            'mse': mse,
            'rmse': rmse,
            'mae': mae
        })

        it += 1
        
    return results

In [None]:
dataname = 'eurusd_tech'
scaler, X_train, y_train, X_val, y_val, X_test, y_test = get_ready_data(dataname)

tech_lstm = LSTM_model(
    dataname,
    epochs_list,
    batch_size_list,
    learning_rate_list,
    units_list,
    units2_list,
    activation1_list,
    activation2_list)

tech_lstm_df = pd.DataFrame(tech_lstm).sort_values(by='mse')
tech_lstm_df.to_csv('data/hyper_parameters/tech_lstm_df.csv')

In [None]:
dataname = 'eurusd_macro'
scaler, X_train, y_train, X_val, y_val, X_test, y_test = get_ready_data(dataname)

macro_lstm = LSTM_model(
    dataname,
    epochs_list,
    batch_size_list,
    learning_rate_list,
    units_list,
    units2_list,
    activation1_list,
    activation2_list)

macro_lstm_df = pd.DataFrame(macro_lstm).sort_values(by='mse')
macro_lstm_df.to_csv('data/hyper_parameters/macro_lstm_df.csv')

In [None]:
dataname = 'eurusd_both'
scaler, X_train, y_train, X_val, y_val, X_test, y_test = get_ready_data(dataname)

both_lstm = LSTM_model(
    dataname,
    epochs_list,
    batch_size_list,
    learning_rate_list,
    units_list,
    units2_list,
    activation1_list,
    activation2_list)

both_lstm_df = pd.DataFrame(both_lstm).sort_values(by='mse')
both_lstm_df.to_csv('data/hyper_parameters/both_lstm_df.csv')

In [None]:
def GRU_model(dataname, epochs_list, batch_size_list, learning_rate_list, units_list, units2_list, activation1_list, activation2_list):
    model = 'GRU'
    typedata = dataname.split('_')[1]
    
    total_combinations = len(epochs_list) * len(batch_size_list) * len(learning_rate_list) * len(units_list) * len(units2_list) * len(activation1_list) * len(activation2_list)
    it = 1
    results = []

    scaler, X_train, y_train, X_val, y_val, X_test, y_test = get_ready_data(dataname)

    for epochs, batch_size, learning_rate, units, units2, activation1, activation2 in product(epochs_list, batch_size_list, 
                                                                                     learning_rate_list, units_list, units2_list,
                                                                                     activation1_list, activation2_list):
        print(f'Iteration {it}/{total_combinations}:')
        model_gru = Sequential()
        model_gru.add(GRU(units, activation=activation1, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
        model_gru.add(GRU(units2, activation=activation2, return_sequences=False))
        model_gru.add(Dense(1))
        optimizer = Adam(learning_rate=learning_rate)
        model_gru.compile(optimizer=optimizer, loss='mean_squared_error')

        history_gru = model_gru.fit(
        X_train,
        y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(X_val, y_val),
        verbose=0)

        y_pred_gru = model_gru.predict(X_test)
        y_pred_gru = np.delete(descaled(y_pred_gru.flatten()),0)
        y_test_gru = np.delete(descaled(y_test),-1)

        mse = mean_squared_error(y_test_gru, y_pred_gru)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(y_test_gru, y_pred_gru)

        results.append({
            'epochs': epochs,
            'batch_size': batch_size,
            'learning_rate': learning_rate,
            'units': units,
            'units2': units2,
            'activation1': activation1,
            'activation2': activation2,
            'mse': mse,
            'rmse': rmse,
            'mae': mae
        })

        it += 1
        
    return results

In [None]:
dataname = 'eurusd_tech'
scaler, X_train, y_train, X_val, y_val, X_test, y_test = get_ready_data(dataname)

tech_gru = GRU_model(
    dataname,
    epochs_list,
    batch_size_list,
    learning_rate_list,
    units_list,
    units2_list,
    activation1_list,
    activation2_list)

tech_gru_df = pd.DataFrame(tech_gru).sort_values(by='mse')
tech_gru_df.to_csv('data/hyper_parameters/tech_gru_df.csv')

In [None]:
dataname = 'eurusd_macro'
scaler, X_train, y_train, X_val, y_val, X_test, y_test = get_ready_data(dataname)

macro_gru = GRU_model(
    dataname,
    epochs_list,
    batch_size_list,
    learning_rate_list,
    units_list,
    units2_list,
    activation1_list,
    activation2_list)

macro_gru_df = pd.DataFrame(macro_gru).sort_values(by='mse')
macro_gru_df.to_csv('data/hyper_parameters/macro_gru_df.csv')

In [None]:
dataname = 'eurusd_both'
scaler, X_train, y_train, X_val, y_val, X_test, y_test = get_ready_data(dataname)

both_gru = GRU_model(
    dataname,
    epochs_list,
    batch_size_list,
    learning_rate_list,
    units_list,
    units2_list,
    activation1_list,
    activation2_list)

both_gru_df = pd.DataFrame(both_gru).sort_values(by='mse')
both_gru_df.to_csv('data/hyper_parameters/both_gru_df.csv')