In [447]:
import numpy as np
import pandas as pd

from sklearn.svm import SVR

import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Conv1D, MaxPooling1D, InputLayer
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

In [448]:
# Reading the dataset
data_csv = "dataset.csv"
df = pd.read_csv(data_csv)
print('Dataset shape: ', df.shape)
print(df.dtypes)
df.head()

Dataset shape:  (1088, 7)
Date          object
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume       float64
dtype: object


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2014-01-02,1204.300049,1227.300049,1204.300049,1225.0,1225.0,209.0
1,2014-01-03,1221.699951,1239.0,1221.699951,1238.400024,1238.400024,142.0
2,2014-01-06,1232.800049,1247.0,1221.900024,1237.800049,1237.800049,127.0
3,2014-01-07,1239.300049,1242.400024,1226.300049,1229.400024,1229.400024,73.0
4,2014-01-08,1227.5,1227.5,1218.599976,1225.300049,1225.300049,189.0


In [449]:
# Verifying null values and deleting name from dataset
null_columns=df.columns[df.isnull().any()]
print(df[df.isnull().any(axis=1)][null_columns].head())
# Drop the lines with null values
df = df.dropna()
# Drop Date column
# df.pop("Date")

print('Dataset shape: ', df.shape)

     Open  High  Low  Close  Adj Close  Volume
126   NaN   NaN  NaN    NaN        NaN     NaN
229   NaN   NaN  NaN    NaN        NaN     NaN
247   NaN   NaN  NaN    NaN        NaN     NaN
514   NaN   NaN  NaN    NaN        NaN     NaN
534   NaN   NaN  NaN    NaN        NaN     NaN
Dataset shape:  (1080, 7)


In [450]:
print("Minimum: {}\nMaximum: {}\nMean: {}\nMedian: {}\nSD: {}\nSkewness: {}\nKurtosis: {}".format(df["Low"].min(), df["High"].max(), 
df["Open"].mean(), df["Open"].median(), df["Open"].std(), df["Open"].skew(), df["Open"].kurtosis()))

Minimum: 1046.199951
Maximum: 1391.400024
Mean: 1240.0312965685187
Median: 1251.0
SD: 72.12056317647767
Skewness: -0.5470330704899712
Kurtosis: -0.35155548285018057


In [451]:
lastday_2017 = df.loc[df["Date"]=="2017-12-29"].index.values[0]
df = df["Close"].values

# Transforming the dataset to ln scale
df = np.log(df)

# # Split dataset into train and test
train_set = df[0:lastday_2017]
test_set = df[lastday_2017:]
print("Train: ", train_set.shape, "Test: ", test_set.shape)

Train:  (1006,) Test:  (74,)


# Regression models

In [452]:
# FFNN class
class FFNN:
    def __init__(self, input_dim, normalize_factor=0):
        self.normalize_factor = normalize_factor if normalize_factor > 0 else 1
        optimizer = Adam()
        h_n = 3 if input_dim == 4 or input_dim == 6 else 5
        self.model = Sequential()
        self.model.add(Dense(h_n, input_dim=input_dim))
        self.model.add(Dense(1))
        self.model.compile(loss="mean_squared_error", optimizer=optimizer, metrics=["accuracy", "mean_absolute_error"])
    
    def fit(self,x_train,y_train):
        self.model.fit(x_train/self.normalize_factor, y_train,
                        epochs=50,
                        batch_size=128,
                        verbose=0
                      )

    def predict(self, x_test):
        y_valid_pred = self.model.predict(x_test/self.normalize_factor)
        return y_valid_pred.flatten()

In [453]:
# LSTM class
class PLSTM:
    def __init__(self, input_dim, type, normalize_factor=0):
        self.normalize_factor = normalize_factor if normalize_factor > 0 else 1
        optimizer = Adam()
        self.model = Sequential()
        h_n1 = 100 if type in [1,3,4] else 200
        self.model.add(InputLayer(input_shape=(input_dim,)))
        self.model.add(LSTM(h_n1))
        if type>2:
            h_n2 = 50 if type == 3 else 100
            self.model.add(LSTM(h_n2))
            if type==4:
                self.model.add(Dense(32))
        self.model.add(Dense(1))
        self.model.compile(loss="mean_squared_error", optimizer=optimizer, metrics=["accuracy", "mean_absolute_error"])
    
    def fit(self,x_train,y_train):
        self.model.fit(x_train/self.normalize_factor, y_train,
                        epochs=50,
                        batch_size=128,
                        verbose=0
                      )

    def predict(self, x_test):
        y_valid_pred = self.model.predict(x_test/self.normalize_factor)
        return y_valid_pred.flatten()

In [454]:
# CNN-LSTM class
class CNNLSTM:
    def __init__(self, input_dim, type, normalize_factor=0):
        self.normalize_factor = normalize_factor if normalize_factor > 0 else 1
        optimizer = Adam()
        self.model = Sequential()
        self.h_n1 = 100 if type == 1 else 200
        self.filter1 = 32 if type == 1 else 64
        self.filter2 = 64 if type == 1 else 128
        self.model.add(Conv1D(self.filter1, 2))
        self.model.add(Conv1D(self.filter2, 2))
        self.model.add(MaxPooling1D(pool_size=2))
        self.model.add(LSTM(self.h_n1))
        if type==2:
            self.model.add(Dense(32))
        self.model.add(Dense(1))
        self.model.compile(loss="mean_squared_error", optimizer=optimizer, metrics=["accuracy", "mean_absolute_error"])
    
    def fit(self,x_train,y_train):
        self.model.fit(x_train/self.normalize_factor, y_train,
                        epochs=50,
                        batch_size=128,
                        verbose=0
                      )

    def predict(self, x_test):
        y_valid_pred = self.model.predict(x_test/self.normalize_factor)
        return y_valid_pred.flatten()

In [455]:
def create_models(input_dim):
    # SVR
    svr = SVR(kernel='rbf', C=1, tol=1e-3)

    # FFNN
    ffnn = FFNN(input_dim)

    # LSTM1
    lstm1 = PLSTM(input_dim, 1)

    # LSTM2
    lstm2 = PLSTM(input_dim, 2)

    # LSTM3
    lstm3 = PLSTM(input_dim, 3)

    # LSTM4
    lstm4 = PLSTM(input_dim, 4)

    # CNN-LSTM1
    cnnlstm1 = CNNLSTM(input_dim, 1)

    # CNN-LSTM2
    cnnlstm2 = CNNLSTM(input_dim, 2)

    labels = ["SVR", "FFNN", "LSTM1", "LSTM2", "LSTM3", "LSTM4", "CNN-LSTM1", "CNN-LSTM2"]
    models = [svr, ffnn, lstm1, lstm2, lstm3, lstm4, cnnlstm1, cnnlstm2]

    return labels, models

In [456]:
entries = [4, 6, 9]
for entry in entries:
    #Creating dataset
    train_x = []
    train_y = []
    for j in range(entry, len(train_set)):
        train_x.append(train_set[j-entry:j])
        train_y = np.append(train_y, train_set[j])
    train_x = np.array(train_x)
    print(train_x.shape)
    print(train_y.shape, "\n")

    labels, models = create_models(entry)
    for i in range(len(models)):
        print(labels[i])
        models[i].fit(train_x, train_y)

(1002, 4)
(1002,) 



ValueError: Input 0 of layer lstm_225 is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 4)