In [83]:
# Imports (External)
import numpy as np
import pandas as pd
import datetime
from datetime import datetime

from monthdelta import monthdelta
import xlrd
import xlsxwriter
from collections import OrderedDict
import pickle

import sys
sys.path.append('../')  

# Visualization/plotting imports
import matplotlib as mpl
import matplotlib.pyplot as plt

# Machine learning imports
import pywt
from pywt import wavedec, waverec
from scipy import signal
from statsmodels.robust import mad

import sklearn
import tensorflow as tf
import keras
from keras.layers import Input, Dense
from keras.models import Model, Sequential
from keras import optimizers
from keras import regularizers

# Internal Imports
from wsae_lstm.utils import pickle_load

In [84]:
dict_dataframes_index=pickle_load(path_filename="../data/pickled/clean_data_index_interval")
#dict_dataframes_index.keys()

In [85]:
dict_dataframes_index['csi300 index data'][1].drop(['date'],axis=1,inplace=True)

In [86]:
len(dict_dataframes_index['csi300 index data'][1].columns)
# features will be 19 (20-1=19), with date column removed

19

In [87]:
data = dict_dataframes_index['csi300 index data'][1]

# Stacked Autoencoders
## Notes from source article on Stacked Autoencoder design:

"The single-layer autoencoder maps the input daily variables into the first hidden vector." 

"In this paper, [the activate function] is set to be a sigmoid function as in Chen et al. [19]"

"As a result, the gradient descent algorithm is applied to complete parameter optimization as suggested in Yin et al. [62]"

"After training the first single-layer autoencoder, the reconstruction layer of the first single layer autoencoder is removed, and the hidden layer is reserved as the input layer of the second single-layer autoencoder." (Bao et al., 2017, p. 7-8)


"SAEs is the main part of the
model and is used to learn the deep features of financial time series in an unsupervised manner. Specifically, it is a neural network consisting of multiple single layer autoencoders in
which the output feature of each layer is wired to the inputs of the successive layer. The unsupervised training of SAEs is done one AE at a time by minimizing the error between the output
data and the input data. As a result, the SAEs model can successfully learn invariant and
abstract features [19]." (Bao et al., 2017, p.2)

## Implementation notes & summary:
**Stacked Autoencoder with 5 Layers that consists of 4-single layer autoencoders**

- Input variables for each dataset range from 18-25 features
- Depth of 5, hidden layer size of 10
- Gradient descent for parameter optimization
- Unsupervised training

## Hypotheses from meta-referenced articles for model & implementation details: 
- 100 Epochs for training (Chen et al., 2014)

In [107]:
input_dim = data.shape[1]
hidden_dim = 10  

# Layer 1 - Input Layer, SAE_1
input_data = Input(shape=(input_dim,))
encoded = Dense(hidden_dim, activation='sigmoid')(input_data)
decoded = Dense(input_dim, activation='sigmoid')(encoded)
# Layer 2 - SAE_2
encoded = Dense(hidden_dim, activation='sigmoid')(decoded)
decoded = Dense(input_dim, activation='sigmoid')(encoded)
# Layer 3 - SAE_3
encoded = Dense(hidden_dim, activation='sigmoid')(decoded)
decoded = Dense(input_dim, activation='sigmoid')(encoded)
# Layer 4 - SAE_4
encoded = Dense(hidden_dim, activation='sigmoid')(decoded)
decoded = Dense(input_dim, activation='sigmoid')(encoded)
# Layer 5 - SAE_5
encoded = Dense(hidden_dim, activation='sigmoid')(decoded)
decoded = Dense(input_dim, activation='sigmoid')(encoded)

autoencoder = Model(input_data, decoded)
autoencoder.compile(optimizer='sgd',loss='mse')
autoencoder.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_28 (InputLayer)        (None, 19)                0         
_________________________________________________________________
dense_153 (Dense)            (None, 10)                200       
_________________________________________________________________
dense_154 (Dense)            (None, 19)                209       
_________________________________________________________________
dense_155 (Dense)            (None, 10)                200       
_________________________________________________________________
dense_156 (Dense)            (None, 19)                209       
_________________________________________________________________
dense_157 (Dense)            (None, 10)                200       
_________________________________________________________________
dense_158 (Dense)            (None, 19)                209       
__________

In [108]:
autoencoder.fit(data, data, epochs=5,verbose=2)

Epoch 1/5
 - 0s - loss: 5511446505136931.0000
Epoch 2/5
 - 0s - loss: 5511446492855570.0000
Epoch 3/5
 - 0s - loss: 5511446556016854.0000
Epoch 4/5
 - 0s - loss: 5511446445484607.0000
Epoch 5/5
 - 0s - loss: 5511446547244453.0000


<keras.callbacks.History at 0x23522dd1160>

In [109]:
autoencoded_data = autoencoder.predict(data)

In [110]:
len(pd.DataFrame(autoencoded_data).columns)

19

In [111]:
pd.DataFrame(autoencoded_data).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,0.999643,0.999642,0.999636,0.999638,1.0,0.985372,0.636521,0.497739,0.788838,0.984568,0.99964,0.999638,0.999637,0.599553,0.999637,0.455108,0.667783,0.387989,1.0
1,0.999643,0.999642,0.999636,0.999638,1.0,0.985372,0.636521,0.497739,0.788838,0.984568,0.99964,0.999638,0.999637,0.599553,0.999637,0.455108,0.667783,0.387989,1.0
2,0.999643,0.999642,0.999636,0.999638,1.0,0.985372,0.636521,0.497739,0.788838,0.984568,0.99964,0.999638,0.999637,0.599553,0.999637,0.455108,0.667783,0.387989,1.0
3,0.999643,0.999642,0.999636,0.999638,1.0,0.985372,0.636521,0.497739,0.788838,0.984568,0.99964,0.999638,0.999637,0.599553,0.999637,0.455108,0.667783,0.387989,1.0
4,0.999643,0.999642,0.999636,0.999638,1.0,0.985372,0.636521,0.497739,0.788838,0.984568,0.99964,0.999638,0.999637,0.599553,0.999637,0.455108,0.667783,0.387989,1.0


In [92]:
# input_dim = data.shape[1]
# hidden_dim = 10
# learning_rate = 1e-7

# # Layer 1 - Input layer
# input_layer = Input(shape=(input_dim,))
# encoder = Dense(hidden_dim, activation="sigmoid", 
#                 activity_regularizer=regularizers.l1(learning_rate))(input_layer)
# decoder = Dense(input_dim, activation='sigmoid')(encoder)
# # Layer 2 - SAE_1
# encoder = Dense(input_dim, activation="sigmoid")(decoder)
# decoder = Dense(hidden_dim, activation='sigmoid')(encoder)
# # # Layer 3 - SAE_2
# # encoder = Dense(input_dim, activation="sigmoid")(decoder)
# # decoder = Dense(hidden_dim, activation='sigmoid')(encoder)
# # # layer 4 - SAE_3
# # encoder = Dense(input_dim, activation="sigmoid")(decoder)
# # decoder = Dense(hidden_dim, activation='sigmoid')(encoder)
# # # layer 5 - SAE_4
# # encoder = Dense(input_dim, activation="sigmoid")(decoder)
# # decoder = Dense(hidden_dim, activation='sigmoid')(encoder)



# autoencoder = Model(inputs=input_layer, outputs=decoder)
# autoencoder.compile(optimizer='sgd',
#                     loss='mean_squared_error',
#                     )

In [93]:
# autoencoder.summary()


In [94]:
# autoencoder.fit(data, data,
#                 epochs=10)

In [95]:

# input_dim = 19
# encoding_dim = 19

# autoencoder = Sequential()

# autoencoder.add(
#     Dense(encoding_dim, input_shape=(input_dim,), activation='sigmoid'))
# autoencoder.add(
#     Dense(10, activation='sigmoid')
# )
# autoencoder.add(
#     Dense(input_dim, activation='sigmoid')
# )
# autoencoder.summary()
# autoencoder.compile(optimizer='sgd',loss='mse')

In [96]:
# autoencoder.fit(data,data,epochs=10)