In [None]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow

from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import SimpleRNN
from keras.layers import Dropout

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
#https://www.kaggle.com/ozkanozturk/stock-price-prediction-by-simple-rnn-and-lstm

As you cant make a "perfect" model for predict stocks, a good model could be the one who can show how the price will probably behave on the next weeks

Importing the data

In [None]:
df = pd.read_csv('/kaggle/input/yahoo-finance-apple-inc-aapl/AAPL_daily_update.csv',sep =",", index_col='Date')

df.head(5)

Creating the variable Average, between the low and the high price

In [None]:
df['Average'] = (df['High'] + df['Low'])/2
df = df[['Average']]

df.head(5)

Plotting the average stock price

In [None]:
df.plot(legend=True)
plt.title('Apple stock price')
plt.show()

Estabishing the train and dataset validation

In [None]:
length_data = len(df) 
split_ratio = 0.7
length_train = round(length_data * split_ratio)  
length_test = length_data - length_train

In [None]:
print("Data length :", length_data)
print("Train data length :", length_train)
print("Validation data lenth :", length_test)

In [None]:
train_df = df[:length_train].iloc[:,:1]
train_df

test_df = df[length_train:].iloc[:,:1]
test_df

Creating an array

In [None]:
df_train = train_df.values
df_train.shape

df_train = np.reshape(df_train, (-1,1))
df_train.shape

Transforming the data in the same scale, using Mix Max

In [None]:
scaler = MinMaxScaler(feature_range = (0,1))

df_train_scaled = scaler.fit_transform(df_train)

df_train_scaled.shape

Creating the x and y arrays

In [None]:
X_train = []
y_train = []

time_step = 7

for i in range(time_step, length_train):
    X_train.append(df_train_scaled[i-time_step:i,0])
    y_train.append(df_train_scaled[i,0])
    
# convert list to array
X_train, y_train = np.array(X_train), np.array(y_train)

Reshape the array

In [None]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1],1))
y_train = np.reshape(y_train, (y_train.shape[0],1))

print("Shape of X_train after reshape :",X_train.shape)
print("Shape of y_train after reshape :",y_train.shape)

Initializing the RNN model

In [None]:
regressor = Sequential()

regressor.add(
    SimpleRNN(units = 50, 
              activation = "tanh", 
              return_sequences = True, 
              input_shape = (X_train.shape[1],1))
             )

regressor.add(
    Dropout(0.2)
             )

regressor.add(
    SimpleRNN(units = 50, 
              activation = "tanh", 
              return_sequences = True)
             )

regressor.add(
    Dropout(0.2)
             )

regressor.add(
    SimpleRNN(units = 50, 
              activation = "tanh", 
              return_sequences = True)
             )

regressor.add(
    Dropout(0.2)
             )

regressor.add(
    SimpleRNN(units = 50)
             )

regressor.add(
    Dropout(0.2)
             )

regressor.add(Dense(units = 1))

regressor.compile(
    optimizer = "adam", 
    loss = "mean_squared_error",
    metrics = ["accuracy"])

history = regressor.fit(X_train, y_train, epochs = 50, batch_size = 32)

Plotting Loss vs Epochs results

In [None]:
plt.figure(figsize =(10,7))
plt.plot(history.history["loss"])
plt.xlabel("Epochs")
plt.ylabel("Losses")
plt.title("Simple RNN model, Loss vs Epoch")
plt.show()

Running the predicitions and scaling back from 0-1 to original

In [None]:
y_pred = regressor.predict(X_train)
y_pred = scaler.inverse_transform(y_pred)
y_pred.shape

In [None]:
y_train = scaler.inverse_transform(y_train)
y_train.shape

Visualising the results

In [None]:
plt.figure(figsize = (30,10))
plt.plot(y_pred, color = "b", label = "y_pred" )
plt.plot(y_train, color = "g", label = "y_train")
plt.xlabel("Days")
plt.ylabel("Open price")
plt.title("Simple RNN model, Predictions with input X_train vs y_train")
plt.legend()
plt.show()

Converting the test to array and scaling open values to between 0 and 1

In [None]:
test_df = test_df.values  # getting "open" column and converting to array
test_df = np.reshape(test_df, (-1,1))  # converting 1D to 2D array
scaled_test_df =  scaler.fit_transform(test_df)  # scaling open values to between 0 and 1
print("Shape of scaled validation dataset :",scaled_test_df.shape)

Creating X test and y test

In [None]:
X_test = []
y_test = []

for i in range(time_step, length_test):
    X_test.append(scaled_test_df[i-time_step:i,0])
    y_test.append(scaled_test_df[i,0])

Converting to array

In [None]:
X_test, y_test = np.array(X_test), np.array(y_test)

Reshape the array

In [None]:
X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
y_test = np.reshape(y_test, (-1,1))
print("Shape of X_test after reshape :",X_test.shape)
print("Shape of y_test after reshape :",y_test.shape)

Running the test predicitions and scaling back from 0-1 to original

In [None]:
y_pred_of_test = regressor.predict(X_test)
y_pred_of_test = scaler.inverse_transform(y_pred_of_test) 
print("Shape of y_pred_of_test :",y_pred_of_test.shape)

Visualising the test results

In [None]:

plt.figure(figsize = (30,10))
plt.plot(y_pred_of_test, label = "y_pred_of_test", c = "orange")
plt.plot(scaler.inverse_transform(y_test), label = "y_test", c = "g")
plt.xlabel("Days")
plt.ylabel("Open price")
plt.title("Simple RNN model, Prediction with input X_test vs y_test")
plt.legend()
plt.show()