# 利用GRU神經網路完成完成 yahoo Finance 的股票預測

(1) 擷取資料

(2) 資料可視化

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from pandas import read_csv
# 用 pandas 載入數據及截取某一行的數據
# 分析引擎選擇 python,前面8筆資料不取
dataItem = read_csv('yahoo_stock.csv', usecols=[4],
                     engine='python', skiprows=8)
# 讀取dataItem Series 的 value
data = dataItem.values
# 將資料型態轉換成 float32
data = data.astype('float32')
print(data.shape)  # 秀出資料維度
plt.plot(data)   # 以圖表表示出
plt.show()

(3) 製作訓練資料與標籤

In [None]:
def GetDataAndLabel(data,TimeStep):
    trainData, trainLabel = [], []
    for i in range(len(data)-TimeStep):
        TrainDataOne = data[i:(i+TimeStep),0]
        trainData.append(TrainDataOne)
        trainLabel.append(data[i+TimeStep,0])
    return np.array(trainData), np.array(trainLabel)

(4) 資料數據歸一化

In [None]:
from sklearn.preprocessing import MinMaxScaler
# 將數據歸一化
scaler = MinMaxScaler(feature_range=(0, 1))
data = scaler.fit_transform(data)

(5) 將資料切割為訓練集與測試集

In [None]:
# 將資料切割成訓練集與測試集, 分割比例為 9:1
TrainDataNum = int(len(data) * 0.9)
TestDataNum = len(data) - TrainDataNum
# 前面 0~ TrainDataNum-1 的資料為訓練集
trainData = data[0:TrainDataNum,:]
# 從 TrainDataNum 之後的資料為測試集
testData = data[TrainDataNum:len(data),:]
TimeStep = 6
traindataNew, trainLabelNew = GetDataAndLabel(trainData, TimeStep)
testdataNew, testLabelNew = GetDataAndLabel(testData, TimeStep)
print("traindataNew.shape :",traindataNew.shape)
print("trainLabelNew.shape :",trainLabelNew.shape)
print("testdataNew.shape :",testdataNew.shape)
print("testLabelNew.shape :",testLabelNew.shape)

(6) 修改資料維度

In [None]:
# 將訓練資料與測試資料的維度改為 [batch_size, time_steps, input_dim]
traindataNew = np.reshape(traindataNew,
              (traindataNew.shape[0], traindataNew.shape[1], 1))
testdataNew = np.reshape(testdataNew,
              (testdataNew.shape[0], testdataNew.shape[1], 1))
print("traindataNew.shape :",traindataNew.shape)
print("testdataNew.shape :",testdataNew.shape)

(7) 建立網路模型

In [None]:
from tensorflow.keras.layers import GRU, Dense
from tensorflow import keras
model = keras.Sequential()
model.add(GRU(128,input_shape=(TimeStep,1),return_sequences=True))
model.add(GRU(64,input_shape=(TimeStep,1)))
model.add(Dense(1))
print(model.summary())

(8) 網路編譯與訓練

In [None]:
# 模型建立與訓練
model.compile(loss='mean_squared_error',
              optimizer='adam',metrics=['accuracy'])
hist = model.fit(traindataNew,trainLabelNew,
                 epochs=250,batch_size=64,verbose=1)

(9) 損失值可視化

In [None]:
# 繪出每個訓練周期的損失值
loss = hist.history["loss"]
epochs = range(len(loss))
plt.plot(epochs,loss,'r-',label="Training loss")
plt.title('Training Loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

(10) 預測訓練集與測試集

In [None]:
# 重新拿訓練集與測試集來預測股價資訊
trainPredict = model.predict(traindataNew)
testPredict = model.predict(testdataNew)

(11) 反歸一化數值圖表可視化

In [None]:
# 將預測資料轉換為原始資料
trainRealPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainLabelNew])
testRealPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testLabelNew])

# 創造一個與原始資料一樣的陣列,
PredtrainingData = np.empty_like(data)
PredtestData = np.empty_like(data)

# 將內部資料設定成 None (空類型)
originaldata = scaler.inverse_transform(data)
PredtrainingData[:, :] = np.nan
PredtestData[:, :] = np.nan
# 訓練集的預測資料是從 TimeStep 時間開始,
# 一直到 len(trainPredict) + TimeStep 結束
PredtrainingData[TimeStep:
             len(trainPredict) + TimeStep, :] = trainRealPredict
# 測試集的預測資料是從訓練集的長度 + (TimeStep * 2)-1時間開始,
# 一直到 len(trainPredict)-1結束
PredtestData[len(trainPredict) + (TimeStep * 2)-1:
         len(data) - 1, :] = testRealPredict
# 繪製原始資料
plt.plot(originaldata,color = 'green',label="Original data")
# 繪製訓練集的預測資料
plt.plot(PredtrainingData, color = 'red',label="Train data Predict")
# 繪製測試集的預測資料
plt.plot(PredtestData, color = 'blue',label="Test data Predict")
plt.legend()
plt.show()