In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
import keras as kr
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import SimpleRNN
import matplotlib.pyplot as plt
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
import io

In [3]:
data=pd.read_excel('data_akbilgic.xlsx',header=1)
data=data.loc[0:529] #keep the rows from 0 to 529 (first 530 rows)
data=data.drop(columns=['date']) #drop date column date

In [None]:
data.reset_index(inplace=True)#add index

plt.plot(data['ISE'])
plt.savefig('./ise_plot.png')
plt.show()

In [7]:
df = data['ISE'].to_frame(name='ISE')
df_1 = df.copy() #MAKE A COPY
scaler = MinMaxScaler(feature_range=(0, 1)) #SCALING OUR DATA
df = scaler.fit_transform(np.reshape(df['ISE'].values, (df.shape[0], 1)))
df = pd.DataFrame(data=df, columns=['ISE'])

In [9]:
def timeseries_to_supervised(df, n_in, n_out):
   agg = pd.DataFrame()

   for i in range(n_in, 0, -1):
      df_shifted = df.shift(i).copy()
      df_shifted.rename(columns=lambda x: ('%s(t-%d)' % (x, i)), inplace=True)
      agg = pd.concat([agg, df_shifted], axis=1)

   for i in range(0, n_out):
      df_shifted = df.shift(-i).copy()
      if i == 0:
         df_shifted.rename(columns=lambda x: ('%s(t)' % (x)), inplace=True)
      else:
         df_shifted.rename(columns=lambda x: ('%s(t+%d)' % (x, i)), inplace=True)
      agg = pd.concat([agg, df_shifted], axis=1)
   agg.dropna(inplace=True)
   return agg
n_in = 4 #Timeseries consists of 4 input steps.
n_out = 1
sdf = timeseries_to_supervised(df, n_in, n_out)
X, y = sdf[[('ISE(t-%d)' % i) for i in range(4, 0, -1)]].values, sdf['ISE(t)'].values

In [None]:
len_data = X.shape[0]
print(len_data)
train_size = int(len_data * .5)
print ("Train size: %d" % train_size)
print ("Test size: %d" % (len_data - train_size))

xtr, ytr = X[:train_size, :], y[:train_size]
xte, yte = X[train_size:, :], y[train_size:]
print(xtr.shape, ytr.shape)
print(xte.shape, yte.shape)

**CONVERT TO 3D NUMPY ARRAYS**

In [None]:
samples = train_size
steps = 1
print("Samples",samples)
features_in = 4
features_out = n_out
xtr = np.reshape(xtr, (samples, steps, features_in))# (Samples,steps,input_features)=(263,1,4)
ytr = np.reshape(ytr, (samples, steps, features_out)) # (Samples,steps,output_features)=(263,1,1)
print("Training data input shape:",xtr.shape,"Training data output shape:", ytr.shape)
xte = np.reshape(xte, (samples, steps, features_in))
yte = np.reshape(yte, (samples, steps, features_out))
print("Testing data input shape:",xte.shape,"Testing data output shape:", yte.shape)

**CREATE A SIMPLE RNN MODEL**

In [None]:
batch_size = 1 #batch size is 1.The weights are updated 263 times per epoch.
model = Sequential()

model.add(SimpleRNN(units=50, input_shape=(xtr.shape[1], xtr.shape[2]), activation="relu", return_sequences=True))
model.add(Dense(50, activation="relu")) #linear activation functio was tested as well.

model.add(Dense(1, activation="relu")) #one output neuron since it is a regression problem.
model.compile(loss='mean_squared_error', optimizer='adam')#mean squered error is used as loss function.
model.summary()

**TRAIN MODEL**

In [None]:
model.fit(xtr,ytr, epochs=50, batch_size=batch_size, verbose=2)

**Make predictions**

In [15]:
trainPredict = model.predict(xtr, batch_size=batch_size)
testPredict = model.predict(xte, batch_size=batch_size)

In [16]:
trainPredict = np.reshape(trainPredict, (samples*steps, features_out))
ytr2d = np.reshape(ytr, (samples*steps, features_out))
testPredict = np.reshape(testPredict, (samples*steps, features_out))
yte2d = np.reshape(yte, (samples*steps, features_out))



**Inverse the normalization of the data.**

In [17]:
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform(ytr2d)
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform(yte2d)

In [None]:
print("Test Mean Squared Error: ", mean_squared_error(testY, testPredict))
#print("Test MSE: ", sum(np.square(testY-testPredict))/testY.shape[0])#computed by hand
print("Test Mean Absolute Error: ", sum(abs(testY-testPredict))/testY.shape[0])
print("Test R2 (R-squared.): ", r2_score(testY, testPredict))
#print("Test R2: ", 1-(sum(np.square(testY-testPredict))/sum(np.square(testY-testY.mean()))))#computed by hand

In [19]:
predicted = np.concatenate((trainPredict,testPredict),axis=0)#Concatenate the training and the testing predictions.

**PLOT THE MODEL WITH TRAINING (LEFT) AND TESTING(RIGHT)**

In [None]:
original = np.concatenate((trainY,testY),axis=0)
predicted = np.concatenate((trainPredict,testPredict),axis=0)
index = range(0, original.shape[0])
plt.plot(index,original, 'g')# true predictions
plt.plot(index,predicted, 'r') #model predictions
plt.axvline(df.index[train_size], c="b")
plt.savefig('./rnn_plot.png')
plt.show()