# Навчання рекурентних нейронних мереж засобами TensorFlow
**Самошин Андрій КА-83**

In [None]:
import numpy as np
import pandas as pd
import gc
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score, classification_report
from IPython.display import clear_output
import time

In [None]:
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras import Sequential, Input, initializers, optimizers, callbacks, layers

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
from sklearn.model_selection import train_test_split
from keras.layers import SimpleRNN
from keras.layers import GRU
from tensorflow.keras.layers import AveragePooling2D, MaxPooling2D, BatchNormalization, Conv2D, Dense, Dropout, Flatten, GlobalAveragePooling2D

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df = pd.read_csv("../input/national-stock-exchange-time-series/infy_stock.csv",
                 usecols=['Date', 'Close'], parse_dates=['Date'],index_col='Date')
df.shape

In [None]:
print("Min:",df.index.min())
print("Max:",df.index.max())

In [None]:
plt.figure(figsize=(17,5))
plt.plot(df['Close'])
plt.title("Closing Price",fontsize=20)
plt.show()

15/06/2015 спостерігається величезне падіння, це було розділення ціни акцій. Якщо взяти ці дані, прогноз може бути не таким, як очікувалось, оскільки між ними є розкол.

Ми повинні або відкинути період, або скорегувати значення перед розділенням. Оскільки розділення дорівнює 2 для 1 періоду, ми можемо нормалізувати дані до поділу, поділивши їх на 2. (Старі частки вдвічі менші, ніж сьогоднішні).

In [None]:
prices = pd.concat([df.Close[:'2015-06-12']/2,df.Close['2015-06-15':]])
plt.figure(figsize=(17,5))
plt.plot(prices)
plt.title("New closing Price",fontsize=20)
plt.show()

In [None]:
scaler = StandardScaler()
prices = scaler.fit_transform(prices.values.reshape(-1, 1)).flatten()
train, test = train_test_split(prices, test_size=0.3, shuffle=False)

In [None]:
plt.figure(figsize=(17,5))
plt.plot(train, label='train')
plt.plot(np.arange(len(train), len(train)+len(test)), test, label='test')
plt.title("Train and Test periods", fontsize=20)
plt.legend()

In [None]:
def get_chunks(train, n_input, n_out=7):
    X, y = list(), list()
    in_start = 0
    for _ in range(len(train)):
        in_end = in_start + n_input
        out_end = in_end + n_out
        if out_end <= len(train):
            x_input = train[in_start:in_end]
            x_input = x_input.reshape((len(x_input), 1))
            X.append(x_input)
            y.append(train[in_end:out_end])
            in_start += 1
    return np.array(X),np.array(y)

In [None]:
n_input = 7
X_train, y_train = get_chunks(train, n_input=n_input, n_out=1)
X_test, y_test = get_chunks(test, n_input=n_input, n_out=1)

In [None]:
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

In [None]:
fit_params = {
        'x': X_train,
        'y': y_train,
        'validation_data': (X_test, y_test),
        'verbose': 1,
        'epochs': 40,
        'batch_size': 16}

In [None]:
def results_plot(history):
    
    plt.figure(figsize=(17,5))
    plt.plot(history.history['loss'], color='b', label="train loss")
    plt.plot(history.history['val_loss'], color='r', label="val loss")

    plt.plot()
    
def plot_preds(y_test, preds):
    
    plt.figure(figsize=(17,5))
    plt.plot(preds, label='preds')
    plt.plot(y_test, label='test')
    plt.legend()
    plt.title('Real test and predicted')
    plt.show()

In [None]:
def mae(y_true, y_pred):
    output_errors = np.average(np.abs(y_pred - y_true), axis=0)
    return np.average(output_errors)

def mape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def rmse(y_true, y_pred):
    return np.sqrt(((y_pred - y_true) ** 2).mean())

def brief_stats(y_true, y_pred):
    print(pd.Series({
        'mape': mape(y_true, y_pred),
        'mae': mae(y_true, y_pred),
        'rmse': rmse(y_true, y_pred),
    }))

In [None]:
def base_rnn():
    model = Sequential()
    model.add(SimpleRNN(1, input_shape=(1,n_input),
                   return_sequences=False
                  ))
    model.compile(loss='mse', optimizer='adam')
    return model

In [None]:
rnn_model = base_rnn()
history_rnn =  rnn_model.fit(**fit_params)
results_plot(history_rnn)

preds = rnn_model.predict(X_test).flatten()
plot_preds(y_test[:, 0], preds)
brief_stats(y_test[:, 0], preds)

In [None]:
def lstm1():
    model = Sequential()
    model.add(LSTM(12, activation='relu', input_shape=(1,n_input),
                   return_sequences=True
                  ))
    model.add(TimeDistributed(Dense(1)))
    model.compile(loss='mse', optimizer='adam')
    
    return model

In [None]:
lstm_model = lstm1()
history_lstm =  lstm_model.fit(**fit_params)
results_plot(history_lstm)

preds = lstm_model.predict(X_test).flatten()
plot_preds(y_test[:, 0], preds)
brief_stats(y_test[:, 0], preds)

In [None]:
def gru1():
    model = Sequential()
    model.add(GRU(12, activation='relu', input_shape=(1,n_input),
                   return_sequences=True
                  ))
    model.add(TimeDistributed(Dense(1)))
    model.compile(loss='mse', optimizer='adam')
    return model

In [None]:
gru_model = gru1()
history_gru = gru_model.fit(**fit_params)
results_plot(history_gru)

preds = gru_model.predict(X_test).flatten()
plot_preds(y_test[:, 0], preds)
brief_stats(y_test[:, 0], preds)

In [None]:
def gru2():
    model = Sequential()
    model.add(GRU(48, activation='relu', input_shape=(1,n_input),
                   return_sequences=True,
                   recurrent_dropout = 0.1
                  ))
    model.add(GRU(12, activation='relu', input_shape=(1,n_input),
                   return_sequences=True,
                   recurrent_dropout = 0.1
                  ))
    model.add(TimeDistributed(Dense(9)))
    model.add(TimeDistributed(Dense(1)))
    model.compile(loss='mse', optimizer='adam')
    return model

In [None]:
gru_model = gru2()
history_gru = gru_model.fit(**fit_params)
results_plot(history_gru)

preds = gru_model.predict(X_test).flatten()
plot_preds(y_test[:, 0], preds)
brief_stats(y_test[:, 0], preds)

In [None]:
def gru3():
    model = Sequential()
    model.add(GRU(48, input_shape=(1,n_input),
                   return_sequences=True,
                  ))
    model.add(GRU(32,input_shape=(1,n_input),
                   return_sequences=True,
                   dropout = 0.3
                  ))
    model.add(TimeDistributed(Dense(32)))
    model.add(TimeDistributed(Dense(1)))
    model.compile(loss='mse', optimizer='adam')
    return model

In [None]:
gru_model = gru3()
history_gru = gru_model.fit(**fit_params)
results_plot(history_gru)

preds = gru_model.predict(X_test).flatten()
plot_preds(y_test[:, 0], preds)
brief_stats(y_test[:, 0], preds)

In [None]:
from keras.layers import Conv1D, MaxPooling1D, Conv2D, Flatten

def conv_gru():
    model = Sequential()
    model.add(Conv1D(filters=32, kernel_size=2, activation='relu',
                     input_shape=(X_test_1d.shape[1],X_test_1d.shape[2])))

    model.add(GRU(48, activation='relu', 
                   return_sequences=True,
                  ))
    model.add(Flatten())

    model.add(Dense(32))
    model.add(Dropout(0.3))
    model.add(Dense(1))
    model.compile(loss='mse', optimizer='adam')
    return model

In [None]:
X_train_1d = X_train.reshape(X_train.shape[0], X_train.shape[2], X_train.shape[1],)
X_test_1d = X_test.reshape(X_test.shape[0], X_test.shape[2], X_test.shape[1],)

In [None]:
conv_gru_model = conv_gru()
history = conv_gru_model.fit(X_train_1d, y_train,
                                   validation_data = (X_test_1d, y_test),
                                   batch_size =32,
                                   epochs = 40)
results_plot(history)

preds = conv_gru_model.predict(X_test_1d).flatten()
plot_preds(y_test[:, 0], preds)
brief_stats(y_test[:, 0], preds)

In [None]:
from statsmodels.graphics.tsaplots import plot_pacf, plot_acf
plot_acf(prices)
plot_pacf(prices)