In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas.io import sql
from sqlalchemy import create_engine
from matplotlib.pyplot import savefig

import keras
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM

import math

%matplotlib inline

# RNN Explain

In [None]:
# Data Collect
engine = create_engine('')
cnn = engine.connect()

data = sql.read_sql("", cnn)
data.columns = ['idx','Location','Date','SO2','CO','O3','NO2','PM10','PM25']
data = pd.DataFrame(data,columns=['Location','Date','PM10'])
data_seoul=data[data.Location.isin(['서울'])]
data_seoul=data_seoul.sort_values(['Date'], ascending=[True])
del data_seoul['Location']
data_seoul=data_seoul.set_index(['Date'])
print(len(data_seoul))
data_seoul=pd.rolling_mean(data_seoul,30)
data_seoul=data_seoul[29:]

data_seoul_train=data_seoul[:2864]
data_seoul_test=data_seoul[2864:]

# RNN Example
PM10_train = np.round(data_seoul_train.PM10.tolist(), 3)
PM10_test = np.round(data_seoul_test.PM10.tolist(), 3)

from scipy.linalg import toeplitz
# 3차원 텐서를 생성
devision_train=np.fliplr(toeplitz(np.r_[PM10_train[-1], np.zeros(PM10_train.shape[0] - 2)], PM10_train[::-1]))
devision_test=np.fliplr(toeplitz(np.r_[PM10_test[-1], np.zeros(PM10_test.shape[0] - 2)], PM10_train[::-1]))

# 30일기준으로 MA를 하였으니 30일로 일단 실행
X_train = devision_train[:-1, :10][:,:,np.newaxis]
X_test = devision_test[:-1, :10][:,:,np.newaxis]

Y_train = devision_train[:-1, 10]
Y_test = devision_test[:-1, 10]

# 30일 MA의 전체 length는 3621이니 3621을 30일치씩 실행
#print(X_train.shape, Y_train.shape)

# X의 값이 30일치이고 Y의 값은 그 다음에 나오는 값
# X의 값 30일로 Y의 1번째의 값을 구하는 구도
#print("X의 training값",X_train[:2])
#print("Y의 training값",Y_train[:2])

fig = plt.figure(figsize=(15, 6)) 
plt.subplot(211)
plt.plot([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], X_train[0].flatten(), 'bo-', label="input sequence")
plt.plot([10], Y_train[0], 'ro', label="target")
plt.xlim(0,13)
plt.ylim(60,70)
plt.legend()
plt.title("First sample sequence")
plt.subplot(212)
plt.plot([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], X_train[1].flatten(), 'bo-', label="input sequence")
plt.plot([11], Y_train[1], 'ro', label="target")
plt.xlim(0,13)
plt.ylim(60,70)
plt.legend()
plt.title("Second sample sequence")
plt.tight_layout()
plt.savefig("Test.jpg", dpi=500, bbox_inches='tight')
plt.show()

In [None]:
data_seoul.tail()

In [None]:
import matplotlib

matplotlib.rc('xtick', labelsize=15)
matplotlib.rc('ytick', labelsize=15)
matplotlib.rc('legend', fontsize=15)

data_seoul=data[data.Location.isin(['서울'])]
data_seoul=data_seoul.sort_values(['Date'], ascending=[True])
del data_seoul['Location']
data_seoul=data_seoul.set_index(['Date'])

data_seoul_re=pd.rolling_mean(data_seoul,30)

In [None]:
ax=data_seoul.plot(figsize=(16,10))
ax.set_xticks(np.arange(0,4107,365))

date_range = pd.date_range('2005', '2017', freq='A')
date_range = date_range.map(lambda t: t.strftime('%Y'))
ax.set_xticklabels(date_range)

ax.set_ylim(0,400)

In [None]:
ax=data_seoul_re.plot(figsize=(16,10))
ax.set_xticks(np.arange(0,4107,365))

date_range = pd.date_range('2005', '2017', freq='A')
date_range = date_range.map(lambda t: t.strftime('%Y'))
ax.set_xticklabels(date_range)
ax.set_ylim(0,400)

# RNN Example Air Quality using Keras

In [None]:
engine = create_engine('')
cnn = engine.connect()

data = sql.read_sql("", cnn)
data.columns = ['idx','Location','Date','SO2','CO','O3','NO2','PM10','PM25']
#data = pd.DataFrame(data,columns=['Location','Date','PM10'])
data = pd.DataFrame(data,columns=['Location','Date','PM10','SO2','CO','O3','NO2'])
data_seoul=data[data.Location.isin(['서울'])]
data_seoul=data_seoul.sort_values(['Date'], ascending=[True])
del data_seoul['Location']
data_seoul=data_seoul.set_index(['Date'])
#print(len(data_seoul))
#print(data_seoul)
data_seoul=pd.rolling_mean(data_seoul,30)
data_seoul=data_seoul[29:]

# test & train set
dataset=data_seoul.values
dataset = np.round(dataset.astype('float32'),3)

train_size = int(len(dataset) * 0.8)
#train_size = int(len(dataset) * 0.8) #원본
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
print(len(train), len(test))

# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

look_back = 14
#look_back = 14

trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

print(trainX)
print(trainY)

# callback loss history set
class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.losses = []

    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))

batch_size=7
#batch_size=7
history = LossHistory()        
model = Sequential()
#model.add(Dense(8, input_dim=look_back, activation='relu'))
model.add(Dense(1, input_dim=look_back, activation='relu'))
model.compile(loss='mean_squared_error', optimizer='rmsprop')
model.fit(trainX, trainY, nb_epoch=50, batch_size=batch_size, callbacks=[history], validation_data=(testX, testY), shuffle=False)

trainScore = model.evaluate(trainX, trainY, verbose=0)
print('Train Score: %.2f MSE (%.2f RMSE)' % (trainScore, math.sqrt(trainScore)))
testScore = model.evaluate(testX, testY, verbose=0)
print('Test Score: %.2f MSE (%.2f RMSE)' % (testScore, math.sqrt(testScore)))

trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
print(len(trainPredict))
print(len(testPredict))
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
# plot baseline and predictions
fig=plt.figure(1,figsize=(16,10))

plt.plot(dataset, color='blue')
dataset_legend=["dataset"]
plt.legend(dataset_legend)
plt.plot(trainPredictPlot, color='green')
plt.plot(testPredictPlot, color='red')
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas.io import sql
from sqlalchemy import create_engine
from matplotlib.pyplot import savefig

import keras
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM

import math

%matplotlib inline

# Data Collect
engine = create_engine('mysql+pymysql://jh:jh@211.180.114.142/armway_db?charset=utf8')
cnn = engine.connect()

data = sql.read_sql("select * from 20161122_day_air_quality", cnn)
data.columns = ['idx','Location','Date','SO2','CO','O3','NO2','PM10','PM25']
data = pd.DataFrame(data,columns=['Location','Date','PM10','O3','NO2','CO','SO2'])
data_seoul=data[data.Location.isin(['서울'])]
data_seoul=data_seoul.sort_values(['Date'], ascending=[True])
del data_seoul['Location']
data_seoul=data_seoul.set_index(['Date'])
data_seoul=pd.rolling_mean(data_seoul,30)
data_seoul=data_seoul[29:]

data_seoul_train=data_seoul[:3263]
data_seoul_test=data_seoul[3264:]

# RNN Example
PM10_train = np.round(data_seoul_train.PM10.tolist(), 3)
PM10_test = np.round(data_seoul_test.PM10.tolist(), 3)

from scipy.linalg import toeplitz
# 3차원 텐서를 생성
# feature는 1, 30개의 데이터
devision_train=np.fliplr(toeplitz(np.r_[PM10_train[-1], np.zeros(PM10_train.shape[0] - 2)], PM10_train[::-1]))
devision_test=np.fliplr(toeplitz(np.r_[PM10_test[-1], np.zeros(PM10_test.shape[0] - 2)], PM10_train[::-1]))

# 30일기준으로 MA를 하였으니 30일로 일단 실행
X_train = devision_train[:-1, :30][:,:,np.newaxis]
X_test = devision_test[:-1, :30][:,:,np.newaxis]

# 역순으로 입력
Y_train = devision_train[:-1, 30]
Y_test = devision_test[:-1, 30]

# 30일 MA의 전체 length는 3621이니 3621을 30일치씩 실행
print(X_train.shape, Y_train.shape)
# X의 값이 30일치이고 Y의 값은 그 다음에 나오는 값
# X의 값 30일로 Y의 1번째의 값을 구하는 구도

In [None]:
print(len(X_test), len(Y_test))
print(len(X_train), len(Y_train))
print(len(X_test)+len(X_train))
print(len(data_seoul))

In [None]:
#Feature는 1개 미세먼지만 TimeSeries Feature이기때문
len(X_train[3260])

In [None]:
class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.losses = []

    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        
history = LossHistory() 

in_out_neurons = 1
hidden_neurons = 500
length_of_sequences = 30
model = Sequential()  
model.add(LSTM(hidden_neurons, batch_input_shape=(None, length_of_sequences, in_out_neurons), return_sequences=False))  
model.add(Dense(in_out_neurons))  
model.add(Activation("relu")) 
#model.compile(loss="mean_squared_error", optimizer="rmsprop",metrics=['accuracy'])
model.compile(loss="mse", optimizer="rmsprop")

model.fit(X_train,Y_train, batch_size=30, nb_epoch=50, validation_split=0.03, callbacks=[history]) 
#model.fit(X_train,Y_train, batch_size=7, nb_epoch=50, validation_split=0.03, callbacks=[history]) 

trainScore = model.evaluate(X_train, Y_train, verbose=0)
print('Train Score: %.2f MSE (%.2f RMSE)' % (trainScore, math.sqrt(trainScore)))
testScore = model.evaluate(X_test, Y_test, verbose=0)
print('Test Score: %.2f MSE (%.2f RMSE)' % (testScore, math.sqrt(testScore)))

In [None]:
import matplotlib

matplotlib.rc('xtick', labelsize=15)
matplotlib.rc('ytick', labelsize=15)
matplotlib.rc('legend', fontsize=15)

In [None]:
print(model.layers[0].get_config())

In [None]:
print(model.to_yaml())

In [None]:
model.summary()

In [None]:
plt.figure(figsize=(16,10))
plt.plot(history.losses[0:400])
plt.legend(["LSTM_Loss"])

In [None]:
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
print(len(trainPredict))
print(len(testPredict))
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
# plot baseline and predictions
fig=plt.figure(1,figsize=(16,10))

plt.plot(dataset, color='blue')
dataset_legend=["dataset"]
plt.legend(dataset_legend)
plt.plot(trainPredictPlot, color='green')
plt.plot(testPredictPlot, color='red')
plt.show()

In [None]:
print(len(X_train))
trainPredict = model.predict(X_train)
testPredict = model.predict(X_test)
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
# plot baseline and predictions
fig=plt.figure(1,figsize=(16,10))

plt.plot(dataset, color='blue')
dataset_legend=["dataset"]
plt.legend(dataset_legend)
plt.plot(trainPredictPlot, color='green')
plt.plot(testPredictPlot, color='red')
plt.show()