In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import datetime

import keras
from keras.models import Sequential
from keras.layers import Dense, InputLayer, LSTM, Dropout
from keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score
import keras_tuner
from keras import regularizers

# 240 days data

In [2]:
class MyHyperModel(keras_tuner.HyperModel):
    def build(self,hp):
        model = Sequential()
        model.add(InputLayer(input_shape=(240,1 )))
        hp_units = hp.Choice('units', values=[32,64,128])
        hp_lr = hp.Choice('learning_rate', values=[1e-6,1e-7,1e-8])
        hp_regulariers = hp.Choice('regularies', values = ['l1','l2', 'None'])
        if hp_regulariers == 'l1':
            regularizers = keras.regularizers.L1(hp_lr)
        elif hp_regulariers == 'l2':
            regularizers = keras.regularizers.L2(hp_lr)
        elif hp_regulariers == 'None':
            regularizers = None
        
        model.add(LSTM(units=hp_units,kernel_regularizer=regularizers,name='dense_0'))
        model.add(Dropout(hp.Choice('dropout_rate',values =[0.5])))
        model.add(Dense(1, activation='sigmoid'))
        
        hp_optimizer = hp.Choice('optimizer', values=['sgd', 'rmsprop', 'adam',"adamax"])
        if hp_optimizer == 'sgd':
            optimizer = keras.optimizers.SGD(learning_rate=hp_lr)
        elif hp_optimizer == 'rmsprop':
            optimizer = keras.optimizers.RMSprop(learning_rate=hp_lr)
        elif hp_optimizer == 'adam':
            optimizer = keras.optimizers.Adam(learning_rate=hp_lr)
        elif hp_optimizer == 'adamax':
            optimizer = keras.optimizers.Adamax(learning_rate=hp_lr)
        else:
            raise ValueError("Invalid optimizer choice")

        model.compile(
            optimizer = optimizer
            ,loss=keras.losses.BinaryCrossentropy()
            , metrics=['accuracy'])
        return model


    def fit(self, hp, model, *args, **kwargs):
            return model.fit(
                *args,
                batch_size=hp.Choice("batch_size", [16,32,64,128]),
                **kwargs,
            )

In [3]:
tuner = keras_tuner.BayesianOptimization(MyHyperModel(),
            objective='val_accuracy', #overwrite=True,
            max_trials=30, directory='lstm', seed=100)
print(tuner.search_space_summary())

Reloading Tuner from lstm/untitled_project/tuner0.json
Search space summary
Default search space size: 6
units (Choice)
{'default': 32, 'conditions': [], 'values': [32, 64, 128], 'ordered': True}
learning_rate (Choice)
{'default': 1e-06, 'conditions': [], 'values': [1e-06, 1e-07, 1e-08], 'ordered': True}
regularies (Choice)
{'default': 'l1', 'conditions': [], 'values': ['l1', 'l2', 'None'], 'ordered': False}
dropout_rate (Choice)
{'default': 0.5, 'conditions': [], 'values': [0.5], 'ordered': True}
optimizer (Choice)
{'default': 'sgd', 'conditions': [], 'values': ['sgd', 'rmsprop', 'adam', 'adamax'], 'ordered': False}
batch_size (Choice)
{'default': 16, 'conditions': [], 'values': [16, 32, 64, 128], 'ordered': True}
None


In [5]:
timesteps = 240
num_input =1
num_classes=1
label = list(range(timesteps)) + ['target'] + ['ticker'] + ['target_date'] + ['sector']

training_data = []
training_label = []
testing_data =[]
testing_label =[]

accuracy_results = []


for i in range(5):
    # read the data
    path  = '/home/RDC/yeungwin/H:/yeungwin/DAX/data/'
    train = pd.read_csv(path+'Set_' + str(i) + '_Train.csv', index_col=0).dropna()
    test = pd.read_csv(path+'Set_' + str(i) + '_Test.csv', index_col=0).dropna()

    train.columns = label
    test.columns = label

    train_label = train.iloc[:, timesteps]
    train_data = train.iloc[:, :timesteps]
    test_label = test.iloc[:,timesteps]
    test_data = test.iloc[:, :timesteps]
    
    
     # reshape input
    #  data: (samples, timesteps, features)
    x_train = np.array(train_data).reshape((len(train_data), timesteps, num_input), order = 'F')
    x_test = np.array(test_data).reshape((len(test_data), timesteps, num_input), order = 'F')
    # label: (samples, target)
    y_train = np.array(train_label).reshape((len(train_label), num_classes))
    y_test = np.array(test_label).reshape((len(test_label), num_classes))
        
    print(x_train.shape)
    print(y_train.shape)
    print(x_test.shape)
    print(y_test.shape)
    
    print("-------------------------------------------------------------------------------------------------------")
    print("Training the model for Training Set " + str(i) + " from " +
    datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S'))
    print("-------------------------------------------------------------------------------------------------------")
        
    if i ==0:   
        tuner = keras_tuner.BayesianOptimization(MyHyperModel(),
            objective='val_accuracy', #overwrite=True,
            max_trials=30, directory='lstm', seed=100)
        early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience = 10, restore_best_weights=False)
        tuner.search(x_train,y_train, epochs =1000, validation_split=0.2, callbacks=[early_stop])

        # save the best model
        hypermodel = MyHyperModel()
        best_hp = tuner.get_best_hyperparameters()[0]
        best_model = hypermodel.build(best_hp)

        early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience = 10, restore_best_weights=False)
        result = best_model.fit(x_train,y_train, epochs=1000, validation_split =0.2, verbose =1, callbacks=[early_stop])
        print(tuner.get_best_hyperparameters()[0].get_config()["values"])

    else: 
        load_path = '/home/RDC/yeungwin/H:/yeungwin/DAX/5_LSTM/lstm_model_weight/lstm_weight.h5'
        print('Model restore from ' + load_path)
        cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=load_path,
                                                 save_weights_only=True,
                                                 verbose=1)
        early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
                    patience = 10)

        result = best_model.fit(
            x_train, 
            y_train, 
            epochs = 1000, 
            validation_split=0.2,
            verbose =1,
            callbacks=[cp_callback, early_stop]        
        ) 
            
    model_path ="/home/RDC/yeungwin/H:/yeungwin/DAX/5_LSTM/lstm_model_weight/lstm_weight.h5"    
    best_model.save_weights(model_path)
    print("Model saved to " + model_path)
    print("Training end: " + datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S'))
        
    ##make prediction
    pred_ff_test = best_model.predict(x_test)
    #pred = pred_ff_test.tolist()
    pred = pred_ff_test.reshape((1, len(pred_ff_test))).tolist()[0]
    output_data = pd.DataFrame({'y_prob': pred, 'y_true': test['target'], 'Ticker': test['ticker'],
                                    'Date': test['target_date'], 'Sector': test['sector'], })
    accuracy = accuracy_score(np.round(output_data['y_prob']), output_data['y_true'])
    print('Overall Accuracy for test set:'+ str(accuracy))
    output_data.to_csv('/home/RDC/yeungwin/H:/yeungwin/DAX/5_LSTM/lstm_prediction/lstm_prediction_period_'+str(i)+'.csv')
    print('Prediction for period ' + str(i) + ' successfully saved.')

(14717, 240, 1)
(14717, 1)
(7149, 240, 1)
(7149, 1)
-------------------------------------------------------------------------------------------------------
Training the model for Training Set 0 from 2024-01-09 02:51:21
-------------------------------------------------------------------------------------------------------
Reloading Tuner from lstm/untitled_project/tuner0.json
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
{'units': 32, 'learning_rate': 1e-07, 'regularies': 'l1', 'dropout_rate': 0.5, 'optimizer': 'adamax', 'batch_size': 16}
Model saved to /home/RDC/yeungwin/H:/yeungwin/DAX/5_LSTM/lstm_model_weight/lstm_weight.h5
Training end: 2024-01-09 02:55:27
Overall Accuracy for test set:0.5002098195551825
Prediction for period 0 successfully saved.
(14569, 240, 1)
(14569, 1)
(7500, 240, 1)
(7500, 1)
----------------------------------------------------------------------------------------

Prediction for period 3 successfully saved.
(14806, 240, 1)
(14806, 1)
(980, 240, 1)
(980, 1)
-------------------------------------------------------------------------------------------------------
Training the model for Training Set 4 from 2024-01-09 03:09:08
-------------------------------------------------------------------------------------------------------
Model restore from /home/RDC/yeungwin/H:/yeungwin/DAX/5_LSTM/lstm_model_weight/lstm_weight.h5
Epoch 1/1000
Epoch 1: saving model to /home/RDC/yeungwin/H:/yeungwin/DAX/5_LSTM/lstm_model_weight/lstm_weight.h5
Epoch 2/1000
Epoch 2: saving model to /home/RDC/yeungwin/H:/yeungwin/DAX/5_LSTM/lstm_model_weight/lstm_weight.h5
Epoch 3/1000
Epoch 3: saving model to /home/RDC/yeungwin/H:/yeungwin/DAX/5_LSTM/lstm_model_weight/lstm_weight.h5
Epoch 4/1000
Epoch 4: saving model to /home/RDC/yeungwin/H:/yeungwin/DAX/5_LSTM/lstm_model_weight/lstm_weight.h5
Epoch 5/1000
Epoch 5: saving model to /home/RDC/yeungwin/H:/yeungwin/DAX/5_LSTM/lstm_mode

In [3]:
tuner = keras_tuner.BayesianOptimization(MyHyperModel(),
            objective='val_accuracy', #overwrite=True,
            max_trials=30, directory='lstm', seed=100)


Reloading Tuner from lstm/untitled_project/tuner0.json
<keras_tuner.src.engine.hyperparameters.hyperparameters.HyperParameters object at 0x7f6262b0e290>


In [7]:
print(tuner.get_best_hyperparameters()[0].values)

{'units': 32, 'learning_rate': 1e-07, 'regularies': 'l1', 'dropout_rate': 0.5, 'optimizer': 'adamax', 'batch_size': 16}
