Import Library

In [103]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D

In [104]:
# from google.colab import drive
# drive.mount('/gdrive')
# %cd /gdrive

In [105]:
# from google.colab import drive
# drive.mount('/content/drive')

Load Dataset

In [106]:
dataset = pd.read_csv('NASDAQCompositeDataset.csv', index_col=0)
#dataset = dataset.iloc[::-1] # reverse the order of dataset


Dataset Preview

In [107]:
dataset

Unnamed: 0_level_0,Date,Price,Open,High,Low,Vol.,Change %
No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,9/6/2017,6393.31,6394.35,6407.40,6356.20,419.28M,0.28%
1,9/7/2017,6397.87,6402.94,6413.07,6379.81,433.05M,0.07%
2,9/8/2017,6360.19,6389.65,6391.41,6354.96,408.11M,-0.59%
3,9/11/2017,6432.26,6411.18,6439.11,6410.71,431.57M,1.13%
4,9/12/2017,6454.28,6448.81,6455.02,6429.54,404.69M,0.34%
...,...,...,...,...,...,...,...
1372,2/17/2023,11787.27,11777.50,11803.22,11673.21,966.30M,-0.58%
1373,2/21/2023,11492.30,11640.37,11684.14,11491.18,1.02B,-2.50%
1374,2/22/2023,11507.07,11517.20,11582.52,11445.17,942.98M,0.13%
1375,2/23/2023,11590.40,11636.93,11638.97,11432.58,903.17M,0.72%


Preproccessing Data

1. Filtering Data

In [108]:
dataset=dataset.drop(['Price', 'High', 'Low', 'Vol.', "Change %"], axis=1)
dataset['Date'] = pd.to_datetime(dataset['Date'])
dataset['Open']= dataset['Open'].str.replace(',', '')
dataset

Unnamed: 0_level_0,Date,Open
No,Unnamed: 1_level_1,Unnamed: 2_level_1
0,2017-09-06,6394.35
1,2017-09-07,6402.94
2,2017-09-08,6389.65
3,2017-09-11,6411.18
4,2017-09-12,6448.81
...,...,...
1372,2023-02-17,11777.50
1373,2023-02-21,11640.37
1374,2023-02-22,11517.20
1375,2023-02-23,11636.93


2. Filling Missing Value at Missing Date

In [109]:
dataset = dataset.set_index('Date')
dataset = dataset.resample('D').mean()
dataset = dataset.reset_index()
dataset = dataset.fillna(method='ffill')
dataset

Unnamed: 0,Date,Open
0,2017-09-06,6394.35
1,2017-09-07,6402.94
2,2017-09-08,6389.65
3,2017-09-09,6389.65
4,2017-09-10,6389.65
...,...,...
1993,2023-02-20,11777.50
1994,2023-02-21,11640.37
1995,2023-02-22,11517.20
1996,2023-02-23,11636.93


3. Reshape Data

4. Normalization Data with MinMaxScaler without Date

In [113]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
dataset["Open"] = pd.to_numeric(dataset["Open"])
scaled_data = scaler.fit_transform(dataset['Open'].values.reshape(-1,1))
scaled_data

array([[0.0138385 ],
       [0.01470943],
       [0.01336198],
       ...,
       [0.53323614],
       [0.54537537],
       [0.52177722]])

5. Split Data

In [None]:
# dataset['Date'] = dataset.index
# dataset = dataset.set_index('Date')

6. Create Data Train

7. Ekstraksi Fitur dengan CNN

8. Flattening Feature Map

Split Dataset into 3 range time different

In [None]:
data_before_pandemic=dataset[dataset["Date"] < "3/11/2020"]
data_while_pandemic=dataset[(dataset["Date"] >= "3/11/2020") & (dataset["Date"] <= "9/13/2022")]
data_after_pandemic=dataset[dataset["Date"] > "9/13/2022"]

#Dataset before pandemic

In [None]:
data_before_pandemic

#Dataset while pandemic

In [None]:
data_while_pandemic

#Dataset after pandemic

In [None]:
data_after_pandemic

Normalize Dataset

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
dataset['Date'] = pd.to_numeric(pd.to_datetime(dataset['Date']))
dataset["Open"] = pd.to_numeric(dataset["Open"])
dataset= scaler.fit_transform(dataset)


Split Dataset Into Train and Test Sets

In [None]:
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

Define a function to create a dataset with look back

In [None]:
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

Reshape dataset into X=t and Y=t+1

In [None]:
look_back = 3
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

Reshape input to be [samples, time steps, features]

In [None]:
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))

Create and Fit the CNN-LSTM model

In [None]:
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(look_back, 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.2))
model.add(LSTM(100))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=64, verbose=2)

Make predictions

In [None]:
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict


Invert predictions back to original scale

In [None]:
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])


calculate root mean squared error (RMSE)

In [None]:
from sklearn.metrics import mean_squared_error
trainScore = np.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train RMSE: %.2f' % (trainScore))
testScore = np.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test RMSE: %.2f' % (testScore))