# Prediciting Stock Prices Using Recurrent Neural Networks (RNNs)

**Objective**: The purpose of this project is to build a Recurrent Neural Network using Keras and TensorFlow to predict next day closing Amazon stock prices. The data is obtained from [Yahoo finance](https://finance.yahoo.com/quote/AMZN/history?period1=1381017600&period2=1696550400&interval=1d&filter=history&frequency=1d&includeAdjustedClose=true) and spans a period of 10 years (07/10/23 - 05/10/23)

## 1. Importing the required libraries 

In [1]:
!pip install pandas_ta
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pandas_ta as ta



## 2. Uploading Amazon stock data 

In [2]:
# Specify the file path
file_path = "/Users/mazin/Desktop/data/AMZN.csv"

# Load the CSV file into a Pandas DataFrame
data = pd.read_csv(file_path)

# Display the first and last few rows of the DataFrame
data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2013-10-07,15.762000,15.767000,15.487000,15.501500,15.501500,41664000
1,2013-10-08,15.575000,15.577500,15.013500,15.161500,15.161500,63324000
2,2013-10-09,15.171000,15.175000,14.825000,14.911500,14.911500,64948000
3,2013-10-10,15.231500,15.335000,15.129500,15.258500,15.258500,51102000
4,2013-10-11,15.238500,15.546500,15.192000,15.544500,15.544500,43262000
...,...,...,...,...,...,...,...
2512,2023-09-29,128.199997,129.149994,126.320000,127.120003,127.120003,62377600
2513,2023-10-02,127.279999,130.470001,126.540001,129.460007,129.460007,48029700
2514,2023-10-03,128.059998,128.520004,124.250000,124.720001,124.720001,51565000
2515,2023-10-04,126.059998,127.360001,125.680000,127.000000,127.000000,44203900


## 3. Adding indicators 

In [3]:
# Adding indicators
data['RSI']=ta.rsi(data.Close, length=15)
data['EMAF']=ta.ema(data.Close, length=20)
data['EMAM']=ta.ema(data.Close, length=100)
data['EMAS']=ta.ema(data.Close, length=150)

data['Target'] = data['Adj Close']-data.Open
data['Target'] = data['Target'].shift(-1)

data['TargetClass'] = [1 if data.Target[i]>0 else 0 for i in range(len(data))]

data['TargetNextClose'] = data['Adj Close'].shift(-1)

data.dropna(inplace=True)
data.reset_index(inplace = True)
data.drop(['Volume', 'Close', 'Date', 'index'], axis=1, inplace=True)

In [4]:
data_set = data.iloc[:, 0:11]#.values
pd.set_option('display.max_columns', None)

data_set.head(20)
#print(data_set.shape)
#print(data.shape)
#print(type(data_set))

Unnamed: 0,Open,High,Low,Adj Close,RSI,EMAF,EMAM,EMAS,Target,TargetClass,TargetNextClose
0,14.715,15.167,14.708,15.143,42.670897,15.45676,17.13421,17.84464,0.102,1,15.232
1,15.13,15.28,15.0375,15.232,43.803754,15.435354,17.096542,17.810036,-0.244,0,14.881
2,15.125,15.232,14.833,14.881,40.428059,15.382559,17.05267,17.77124,-0.1415,0,14.7595
3,14.901,14.96,14.519,14.7595,39.304674,15.32322,17.007261,17.73135,0.245,1,14.885
4,14.64,14.944,14.5775,14.885,41.115502,15.281485,16.965236,17.69365,0.05,1,14.838
5,14.788,14.95,14.6905,14.838,40.629121,15.239248,16.923112,17.655827,0.2045,1,15.0595
6,14.855,15.223,14.8375,15.0595,43.975595,15.222129,16.886209,17.621438,0.14,1,15.2505
7,15.1105,15.298,15.076,15.2505,46.748705,15.224831,16.853819,17.590035,-0.007,0,15.2455
8,15.2525,15.427,15.1505,15.2455,46.683893,15.2268,16.821971,17.558982,0.339,1,15.612
9,15.273,15.6175,15.245,15.612,51.919024,15.263485,16.798011,17.533194,-0.1795,0,15.541


## 4. Scaling the data and arranging array

In [5]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0,1))
data_set_scaled = sc.fit_transform(data_set)
print(data_set_scaled)

[[0.0028616  0.00370981 0.00297704 ... 0.41476568 1.         0.00513578]
 [0.00526072 0.00435874 0.00490801 ... 0.3970321  0.         0.00309773]
 [0.00523182 0.00408309 0.00370958 ... 0.40228554 0.         0.00239225]
 ...
 [0.65360158 0.66586458 0.65834702 ... 0.23835255 0.         0.64086971]
 [0.65811076 0.65466626 0.6449269  ... 0.45771591 1.         0.65410835]
 [0.64654873 0.64800467 0.65330715 ... 0.37109801 0.         0.64806966]]


## 5. Data transformation and preproccessing 

In [6]:
# multiple feature from data provided to the model
X = []
#print(data_set_scaled[0].size)
#data_set_scaled=data_set.values
backcandles = 30
print(data_set_scaled.shape[0])
for j in range(8):#data_set_scaled[0].size):#2 columns are target not X
    X.append([])
    for i in range(backcandles, data_set_scaled.shape[0]):#backcandles+2
        X[j].append(data_set_scaled[i-backcandles:i, j])

#move axis from 0 to position 2
X=np.moveaxis(X, [0], [2])

#Erase first elements of y because of backcandles to match X length
#del(yi[0:backcandles])
#X, yi = np.array(X), np.array(yi)
# Choose -1 for last column, classification else -2...
X, yi =np.array(X), np.array(data_set_scaled[backcandles:,-1])
y=np.reshape(yi,(len(yi),1))
#y=sc.fit_transform(yi)
#X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
print(X)
print(X.shape)
print(y)
print(y.shape)

2367
[[[0.0028616  0.00370981 0.00297704 ... 0.00324608 0.00993868 0.01287781]
  [0.00526072 0.00435874 0.00490801 ... 0.00311636 0.00969694 0.01265296]
  [0.00523182 0.00408309 0.00370958 ... 0.00279644 0.00941539 0.01240087]
  ...
  [0.01470112 0.01392039 0.01230079 ... 0.0070414  0.0064441  0.00886162]
  [0.01253324 0.01085377 0.01067162 ... 0.00711465 0.00639585 0.00877413]
  [0.01125275 0.0106614  0.0109617  ... 0.00726864 0.00636787 0.00870088]]

 [[0.00526072 0.00435874 0.00490801 ... 0.00311636 0.00969694 0.01265296]
  [0.00523182 0.00408309 0.00370958 ... 0.00279644 0.00941539 0.01240087]
  [0.00393687 0.00252106 0.00186944 ... 0.00243686 0.00912397 0.01214166]
  ...
  [0.01253324 0.01085377 0.01067162 ... 0.00711465 0.00639585 0.00877413]
  [0.01125275 0.0106614  0.0109617  ... 0.00726864 0.00636787 0.00870088]
  [0.01246966 0.01128735 0.01135141 ... 0.00731909 0.00632088 0.00861534]]

 [[0.00523182 0.00408309 0.00370958 ... 0.00279644 0.00941539 0.01240087]
  [0.00393687 0.0

In [9]:
# split data into train test sets
splitlimit = int(len(X)*0.8)
print(splitlimit)
X_train, X_test = X[:splitlimit], X[splitlimit:]
y_train, y_test = y[:splitlimit], y[splitlimit:]
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
print(y_train)

1869
(1869, 30, 8)
(468, 30, 8)
(1869, 1)
(468, 1)
[[0.01175511]
 [0.01124705]
 [0.01092189]
 ...
 [0.98407884]
 [0.95388536]
 [0.95605409]]


## 6. Creating and training LSTM model

In [None]:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Dense
from keras.layers import TimeDistributed

import tensorflow as tf
import keras
from keras import optimizers
from keras.callbacks import History
from keras.models import Model
from keras.layers import Dense, Dropout, LSTM, Input, Activation, concatenate
import numpy as np
#tf.random.set_seed(20)
np.random.seed(10)

lstm_input = Input(shape=(backcandles, 8), name='lstm_input')
inputs = LSTM(150, name='first_layer')(lstm_input)
inputs = Dense(1, name='dense_layer')(inputs)
output = Activation('linear', name='output')(inputs)
model = Model(inputs=lstm_input, outputs=output)
adam = optimizers.Adam()
model.compile(optimizer=adam, loss='mse')
model.fit(x=X_train, y=y_train, batch_size=15, epochs=30, shuffle=True, validation_split = 0.1)

## 7. Making predicitions and visualising  

In [None]:
y_pred = model.predict(X_test)
#y_pred=np.where(y_pred > 0.43, 1,0)
for i in range(10):
    print(y_pred[i], y_test[i])

In [None]:
plt.figure(figsize=(16,8))
plt.plot(y_test, color = 'black', label = 'Test')
plt.plot(y_pred, color = 'green', label = 'pred')
plt.legend()
plt.show()