In [20]:
import numpy as np
import math
import matplotlib.pyplot as plt  
from sklearn.metrics import mean_squared_error,mean_absolute_error
import pandas as pd

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, Bidirectional 
from keras.layers import Dropout
from keras.callbacks import EarlyStopping

import sys
import xlwt
from sklearn.preprocessing import MinMaxScaler
import datetime,pickle,os,glob

from tqdm import tqdm_notebook as tqdm

def getStationList():
    with open('pickles/stationList.pickle', 'rb') as handle:
        stationList = pickle.load(handle)
    os.chdir('excelFiles/LSTM')
    replaceDict = ['.xls','LSTMresult-']
    for direct in glob.glob("*.xls"):
        fileName = direct                                               
        for w in replaceDict:
            fileName = fileName.replace(w,'')
        
        if fileName in stationList:
            stationList.remove(fileName)
            print(fileName)
    os.chdir('../..')
    partStation = ['忠明','臺南','鳳山','花蓮']
    return partStation
def transfromData(trainRaw, testRaw,windowSize):  ##Train ratial, train, test
    sc = MinMaxScaler(feature_range = (0, 1))

    npTrain = sc.fit_transform(np.array(trainRaw).reshape(-1,1))
    npTest = sc.fit_transform(np.array(testRaw).reshape(-1,1))
    
    X_train, y_train = splitXy(npTrain,windowSize)
    X_test, y_test = splitXy(npTest,windowSize)
    return sc, X_train, y_train, X_test, y_test

def splitXy(data, windowSize):
    windows = []
    for i in range(6, data.shape[0]-windowSize):
        a_data = data[i-6:i, 0].tolist()
        a_data.append(data[i+windowSize-7])
        windows.append(a_data)
    np.random.shuffle(windows)
    X = []
    y = []
    for i in range(len(windows)):
        X.append(windows[i][:6])
        y.append(windows[i][-1:][0])
    X, y = np.array(X), np.array(y)
    print(y.shape)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))
    return X,y
def buildModel():
    regressor = Sequential()
    #regressor.add(Bidirectional(LSTM(units=50,return_sequences=True),input_shape = (X_train.shape[1], 1)))
    regressor.add(LSTM (units = 50, return_sequences = True, input_shape = (6, 1)))
    regressor.add(Dropout(0.2))
    regressor.add(LSTM (units = 50,return_sequences=True))
    regressor.add(Dropout(0.2))
    regressor.add(LSTM (units = 50))
    regressor.add(Dropout(0.2))
    regressor.add(Dense(units = 1))
    # Compiling
    regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
    return regressor


def Visualize():
    predicted = sc.inverse_transform(regressor.predict(X_test))
    originY = sc.inverse_transform (y_test)
    print("MSE : ["+str(mean_squared_error(predicted, originY))+"]")
    # Visualising the results
    plt.plot(originY[:100], color = 'red', label = 'Real')  
    plt.plot(predicted[:100], color = 'blue', label = 'Predicted ') 
    plt.legend()
    plt.show()
def writeExcelHead(sheet1,epochs,station):
    sheet1.write(0,1,station)
    raw = 1
    for e in range(epochs):
        sheet1.write(raw,0,e+1)
        raw+=1

##get data##

def fetchData(station,windowSize):

    with open('pickles/'+station+'2017trainRaw.pickle', 'rb') as handle:
        trainRawData = pickle.load(handle)
    with open('pickles/'+station+'2017testRaw.pickle', 'rb') as handle:
        testRawData = pickle.load(handle)
        
    sc, X_train, y_train, X_test, y_test = transfromData(trainRawData,testRawData,windowSize)
    return sc, X_train, y_train, X_test, y_test

def train(model,epochs,windowSize,station):
    
    sc, X_train, y_train, X_test, y_test = fetchData(station,windowSize)

    
    for i in tqdm(range(epochs)):
        model.fit(X_train, y_train,validation_split=0.2, epochs = 1, batch_size = 32,verbose=0)

        
    predicted = sc.inverse_transform(model.predict(X_test))
    originY = sc.inverse_transform (y_test)
       
    mse = mean_squared_error(predicted, originY)
    mae = mean_absolute_error(predicted,originY)

    model.save('model/LSTM/LSTM'+station+str(windowSize-6)+'.h5')
    return mse,mae




epochs = 250
stationList = getStationList()
col=1
for station in tqdm(stationList):
    print("training : " +station)
    MSEs = []
    MAEs = []
    for windowSize in tqdm(range(7,31)):
        model = buildModel()
        mse,mae = train(model,epochs,windowSize,station)
        MSEs.append(mse)
        MAEs.append(mae)
        
    book = xlwt.Workbook(encoding="utf-8")
    sheet1 = book.add_sheet("Sheet1")
    writeExcelHead(sheet1,epochs,station)
    row = 1
    for m in MSEs:
        sheet1.write(row,1,m)
        row+=1
    row = 1
    for m in MAEs:
        sheet1.write(row,2,m)
        row+=1
    book.save("excelFiles/LSTM/LSTMresult-"+station+".xls")
        
    print('check point at ' + str(datetime.datetime.now()))






松山


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

training : 松山


HBox(children=(IntProgress(value=0, max=24), HTML(value='')))

(22883, 1)
(3947, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22882, 1)
(3946, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22881, 1)
(3945, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22880, 1)
(3944, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22879, 1)
(3943, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22878, 1)
(3942, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22877, 1)
(3941, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22876, 1)
(3940, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22875, 1)
(3939, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22874, 1)
(3938, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22873, 1)
(3937, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22872, 1)
(3936, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22871, 1)
(3935, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22870, 1)
(3934, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22869, 1)
(3933, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22868, 1)
(3932, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22867, 1)
(3931, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22866, 1)
(3930, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22865, 1)
(3929, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22864, 1)
(3928, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22863, 1)
(3927, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22862, 1)
(3926, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22861, 1)
(3925, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

(22860, 1)
(3924, 1)


HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

check point at 2019-07-27 10:48:07.554681
training : 臺中


HBox(children=(IntProgress(value=0, max=24), HTML(value='')))

FileNotFoundError: [Errno 2] No such file or directory: 'pickles/臺中2017trainRaw.pickle'

In [19]:
stationList

['二林',
 '南投',
 '埔里',
 '大里',
 '彰化',
 '忠明',
 '沙鹿',
 '竹山',
 '線西',
 '西屯',
 '豐原',
 '三重',
 '中壢',
 '中山',
 '古亭',
 '土城',
 '基隆',
 '士林',
 '大同',
 '大園',
 '平鎮',
 '新店',
 '新莊',
 '板橋',
 '林口',
 '桃園',
 '永和',
 '汐止',
 '淡水',
 '菜寮',
 '萬華',
 '萬里',
 '觀音',
 '陽明',
 '龍潭',
 '冬山',
 '宜蘭',
 '三義',
 '新竹',
 '湖口',
 '竹東',
 '苗栗',
 '頭份',
 '臺東',
 '花蓮',
 '關山',
 '金門',
 '馬公',
 '馬祖',
 '臺南',
 '臺西',
 '善化',
 '嘉義',
 '安南',
 '崙背',
 '斗六',
 '新港',
 '新營',
 '朴子',
 '麥寮',
 '仁武',
 '前金',
 '前鎮',
 '大寮',
 '小港',
 '屏東',
 '左營',
 '復興',
 '恆春',
 '林園',
 '楠梓',
 '橋頭',
 '潮州',
 '美濃',
 '鳳山']

In [None]:
#基隆 松山 淡水 新竹 苗栗 台中 台南 高雄 屏東 花蓮 宜蘭

In [None]:
    key_list = ['松山','忠明','臺南','鳳山','花蓮']