In [1]:
# IMPORTING IMPORTANT LIBRARIES
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np 
import math
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
import preprocessing 

# FOR REPRODUCIBILITY
np.random.seed(7)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [6]:
# IMPORTING DATASET 
dataset = pd.read_csv('apple_share_price.csv', usecols=[1,2,3,4])
dataset = dataset.reindex(index = dataset.index[::-1])
#print dataset

# CREATING OWN INDEX FOR FLEXIBILITY
#obs = np.arange(1, len(dataset) + 1, 1)  # 建立 [1 ... 1664] index 作為繪圖 x 軸
#print obs

# TAKING DIFFERENT INDICATORS FOR PREDICTION
OHLC_avg_org = dataset.mean(axis = 1)
HLC_avg = dataset[['High', 'Low', 'Close']].mean(axis = 1)
close_val = dataset[['Close']]

# PREPARATION OF TIME SERIES DATASE
OHLC_avg_rs = np.reshape(OHLC_avg_org.values, (len(OHLC_avg_org),1)) # 1664; 將矩陣從 1*1664 轉置成 1664*1
#print("OHLC_avg_rs: %r" % (OHLC_avg_rs))
scaler = MinMaxScaler(feature_range=(0, 1))
OHLC_avg = scaler.fit_transform(OHLC_avg_rs)  # normalize 到 [0, 1]
#print("OHLC_avg.scaler: %r" % (OHLC_avg))

# TRAIN-TEST SPLIT
train_OHLC = int(len(OHLC_avg) * 0.75)
test_OHLC = len(OHLC_avg) - train_OHLC
train_OHLC, test_OHLC = OHLC_avg[0:train_OHLC,:], OHLC_avg[train_OHLC:len(OHLC_avg),:]
print("train_OHLC.len:%d test_OHLC.len:%d" % (len(train_OHLC), len(test_OHLC)))
print("train_OHLC:%r" % (train_OHLC))

# TIME-SERIES DATASET (FOR TIME T, VALUES FOR TIME T+1)
if (1): #TonyH
    trainX_ts, trainY = preprocessing.new_dataset(train_OHLC, 0)
    testX_ts, testY = preprocessing.new_dataset(test_OHLC, 0)
else:
    trainX_ts, trainY = preprocessing.new_dataset(train_OHLC, 1)
    testX_ts, testY = preprocessing.new_dataset(test_OHLC, 1)
print("trainX_ts(len:%d):%r" % (len(trainX_ts), trainX_ts))
print("trainY(len:%d):%r" % (len(trainY), trainY))

# RESHAPING TRAIN AND TEST DATA
trainX = np.reshape(trainX_ts, (trainX_ts.shape[0], 1, trainX_ts.shape[1]))
testX = np.reshape(testX_ts, (testX_ts.shape[0], 1, testX_ts.shape[1]))
step_size = 1
print("trainX.reshape:%r" % (trainX))

train_OHLC.len:1248 test_OHLC.len:416
train_OHLC:array([[0.01530745],
       [0.0196717 ],
       [0.02058364],
       ...,
       [0.57970731],
       [0.57634184],
       [0.57117422]])
trainX_ts(len:1246):array([[0.01530745],
       [0.0196717 ],
       [0.02058364],
       ...,
       [0.61776967],
       [0.60029095],
       [0.57970731]])
trainY(len:1246):array([0.0196717 , 0.02058364, 0.02347143, ..., 0.60029095, 0.57970731,
       0.57634184])
trainX.reshape:array([[[0.01530745]],

       [[0.0196717 ]],

       [[0.02058364]],

       ...,

       [[0.61776967]],

       [[0.60029095]],

       [[0.57970731]]])


In [2]:
# LSTM MODEL
model = Sequential()
model.add(LSTM(32, input_shape=(1, step_size), return_sequences = True))
model.add(LSTM(16))
model.add(Dense(1))
model.add(Activation('linear'))

# MODEL COMPILING
model.compile(loss='mean_squared_error', optimizer='adagrad') # Try SGD, adam, adagrad and compare!!!

# Training
model.fit(trainX, trainY, epochs=5, batch_size=1, verbose=2)

train_OHLC.len:1248 test_OHLC.len:416
trainX.reshape:array([[[0.01530745]],

       [[0.0196717 ]],

       [[0.02058364]],

       ...,

       [[0.61776967]],

       [[0.60029095]],

       [[0.57970731]]])
Epoch 1/5
 - 6s - loss: 0.0046
Epoch 2/5
 - 6s - loss: 3.1900e-04
Epoch 3/5
 - 5s - loss: 2.7418e-04
Epoch 4/5
 - 5s - loss: 2.3755e-04
Epoch 5/5
 - 5s - loss: 2.1279e-04


<keras.callbacks.History at 0x7fe47ef26450>

In [None]:
# PREDICTION
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

# DE-NORMALIZING FOR PLOTTING
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])

# TRAINING RMSE
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train RMSE: %.2f' % (trainScore))

# TEST RMSE
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test RMSE: %.2f' % (testScore))
#print("testY[0]:%r" % (testY[0]))
#print("testPredict[:,0]:%r" % (testPredict[:,0]))

# CREATING SIMILAR DATASET TO PLOT TRAINING PREDICTIONS
trainPredictPlot = np.empty_like(OHLC_avg)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[step_size:len(trainPredict)+step_size, :] = trainPredict

# CREATING SIMILAR DATASSET TO PLOT TEST PREDICTIONS
testPredictPlot = np.empty_like(OHLC_avg)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(step_size*2)+1:len(OHLC_avg)-1, :] = testPredict

# DE-NORMALIZING MAIN DATASET 
OHLC_avg = scaler.inverse_transform(OHLC_avg)

# PLOT OF MAIN OHLC VALUES, TRAIN PREDICTIONS AND TEST PREDICTIONS
plt.plot(OHLC_avg, 'g', label = 'original dataset')
plt.plot(trainPredictPlot, 'r', label = 'training set')
plt.plot(testPredictPlot, 'b', label = 'predicted stock price/test set')
#plt.legend(loc = 'upper right')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.xlabel('Time in Days')
plt.ylabel('OHLC Value of Apple Stocks')
plt.show()

In [None]:
# PREDICT FUTURE VALUES
last_val = testPredict[-1]
print("last_val:%r" % (last_val))
last_val_scaled = last_val/last_val
print("last_val_scaled:%r" % (last_val_scaled))
next_val = model.predict(np.reshape(last_val_scaled, (1,1,1)))
print("next_val:%r" % (next_val))
print "Last Day Value:", np.asscalar(last_val)
print "Next Day Value:", np.asscalar(last_val*next_val)
# print np.append(last_val, next_val)