In [None]:
# https://github.com/philipperemy/keras-tcn

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import datetime

import keras
from keras.models import Sequential
from keras.layers import Dense,InputLayer, Dropout
from keras.callbacks import EarlyStopping
from tcn import TCN,tcn_full_summary
from sklearn.metrics import accuracy_score

import numpy as np
import tensorflow as tf
#from sklearn.model_selection import RandomizedSearchCV
#from tensorflow.keras.models import Sequential
#from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
import keras_tuner

# Global Model

In [3]:
class MyHyperModel(keras_tuner.HyperModel):
    def build(self,hp):
        model = Sequential()
        model.add(InputLayer(input_shape=(240,1 )))
        hp_units = hp.Choice('units', values=[32,64,128])
        kernel_size = hp.Choice('kernel_size', values=[2, 4, 8,16])
        model.add(TCN(nb_filters=hp_units,kernel_size=kernel_size, name='dense_0'))
        model.add(Dropout(0.5))
        model.add(Dense(1, activation='sigmoid'))
        hp_lr = hp.Choice('learning_rate', values=[1e-6,1e-7,1e-8])
        hp_optimizer = hp.Choice('optimizer', values=['sgd', 'rmsprop', 'adam',"adamax"])
        if hp_optimizer == 'sgd':
            optimizer = keras.optimizers.SGD(learning_rate=hp_lr)
        elif hp_optimizer == 'rmsprop':
            optimizer = keras.optimizers.RMSprop(learning_rate=hp_lr)
        elif hp_optimizer == 'adam':
            optimizer = keras.optimizers.Adam(learning_rate=hp_lr)
        elif hp_optimizer == 'adamax':
            optimizer = keras.optimizers.Adamax(learning_rate=hp_lr)
        else:
            raise ValueError("Invalid optimizer choice")

        model.compile(
            optimizer = optimizer
            ,loss=keras.losses.BinaryCrossentropy()
            , metrics=['accuracy'])
        return model


    def fit(self, hp, model, *args, **kwargs):
            return model.fit(
                *args,
                batch_size=hp.Choice("batch_size", [16,32,64,128,256]),
                **kwargs,
            )


In [None]:
timesteps = 240
num_input =1
num_classes=1
label = list(range(timesteps)) + ['target'] + ['ticker'] + ['target_date'] + ['sector']

training_data = []
training_label = []
testing_data =[]
testing_label =[]

accuracy_results = {}
hyperparameter_records = []


for i in range(5):
    # read the data
    data_path = '/SP500/data/'
    train = pd.read_csv(data_path + 'Set_' + str(i) + '_Train.csv', index_col=0).dropna()
    test = pd.read_csv(data_path + 'Set_' + str(i) + '_Test.csv', index_col=0).dropna()

    train.columns = label
    test.columns = label

    train_label = train.iloc[:, timesteps]
    train_data = train.iloc[:, :timesteps]
    test_label = test.iloc[:,timesteps]
    test_data = test.iloc[:, :timesteps]


     # reshape input
    #  data: (samples, timesteps, features)
    x_train = np.array(train_data).reshape((len(train_data), timesteps, num_input), order = 'F')
    x_test = np.array(test_data).reshape((len(test_data), timesteps, num_input), order = 'F')
    # label: (samples, target)
    y_train = np.array(train_label).reshape((len(train_label), num_classes))
    y_test = np.array(test_label).reshape((len(test_label), num_classes))

    print(x_train.shape)
    print(y_train.shape)
    print(x_test.shape)
    print(y_test.shape)

    print("-------------------------------------------------------------------------------------------------------")
    print("Training the model for Training Set " + str(i) + " from " +
    datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S'))
    print("-------------------------------------------------------------------------------------------------------")

    if i==0:
        tuner = keras_tuner.BayesianOptimization(MyHyperModel(),
            objective='val_accuracy', #overwrite=True,
            max_trials=30, directory='tcn', seed=111)
        early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience = 10, restore_best_weights=False)
        tuner.search(x_train,y_train, epochs =1000,validation_split=0.2, callbacks=[early_stop])

        # save the best model
        hypermodel = MyHyperModel() # the same model but save training to different path
        best_hp = tuner.get_best_hyperparameters()[0]
        best_model = hypermodel.build(best_hp)
        best_hp_values = tuner.get_best_hyperparameters()[0].get_config()["values"]
        print(best_hp_values)
        early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
                    patience = 10, restore_best_weights=False)
        result = best_model.fit(x_train,y_train, epochs=1000, validation_split =0.2, verbose =1, callbacks=[early_stop])


    else:
        load_path = '/SP500/7_TCN/weight/tcn_weight.h5'
        print('Model restore from ' + load_path)
        cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=load_path,
                                                 save_weights_only=True,
                                                 verbose=1)
        early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
                    patience = 10, restore_best_weights=False)

        result = best_model.fit(
            x_train, 
            y_train, 
            epochs = 1000, 
            validation_split=0.2,
            verbose =1,
            callbacks=[cp_callback, early_stop]        
        ) 


    save_path = "/SP500/7_TCN/weight/tcn_weight.h5"
    best_model.save_weights(save_path)
    print("Model saved to " + save_path)
    print("Training end: " + datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S'))

    ##make prediction
    pred_ff_test = best_model.predict(x_test)
    pred = pred_ff_test.reshape((1, len(pred_ff_test))).tolist()[0]
    output_data = pd.DataFrame({'y_prob': pred, 'y_true': test['target'], 'Ticker': test['ticker'],
                                    'Date': test['target_date'], 'Sector': test['sector'], })
    accuracy = accuracy_score(np.round(output_data['y_prob']), output_data['y_true'])
    print('Overall Accuracy for test set:'+ str(accuracy))


    output_data.to_csv('/SP500/7_TCN/tcn_pred/tcn_prediction_period_'+str(i)+'.csv')
    print('Prediction for period '+ str(i) + ' successfully saved.')

