In [1]:
import json 
import pandas as pd 
import requests 
import yfinance as yf 
import os
import plotly.express as px 
from datetime import datetime
import plotly.graph_objects as go
import numpy as np
import matplotlib.pyplot as plt
from datetime import date
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import * 


In [2]:
ticker = ["AAPL"] 

start_date = "2021-05-28"
current_date = date.today()
## I grabbed the current date, will update as day changes 
end_date = current_date.strftime("%Y-%m-%d")
data = yf.download(ticker, start=start_date, end=end_date)
df = pd.DataFrame(data).reset_index()


df

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-05-28,125.570000,125.800003,124.550003,124.610001,122.515999,71311100
1,2021-06-01,125.080002,125.349998,123.940002,124.279999,122.191551,67637100
2,2021-06-02,124.279999,125.239998,124.050003,125.059998,122.958450,59278900
3,2021-06-03,124.680000,124.849998,123.129997,123.540001,121.463982,76229200
4,2021-06-04,124.070000,126.160004,123.849998,125.889999,123.774498,75169300
...,...,...,...,...,...,...,...
755,2024-05-30,190.759995,192.179993,190.630005,191.289993,191.289993,49947900
756,2024-05-31,191.440002,192.570007,189.910004,192.250000,192.250000,75158300
757,2024-06-03,192.899994,194.990005,192.520004,194.029999,194.029999,50080500
758,2024-06-04,194.639999,195.320007,193.029999,194.350006,194.350006,47471400


In [3]:
columns_to_round = ['Open', "High", "Low", "Close", "Adj Close"]
df[columns_to_round] = np.round(df[columns_to_round], 4)
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-05-28,125.57,125.80,124.55,124.61,122.5160,71311100
1,2021-06-01,125.08,125.35,123.94,124.28,122.1916,67637100
2,2021-06-02,124.28,125.24,124.05,125.06,122.9585,59278900
3,2021-06-03,124.68,124.85,123.13,123.54,121.4640,76229200
4,2021-06-04,124.07,126.16,123.85,125.89,123.7745,75169300
...,...,...,...,...,...,...,...
755,2024-05-30,190.76,192.18,190.63,191.29,191.2900,49947900
756,2024-05-31,191.44,192.57,189.91,192.25,192.2500,75158300
757,2024-06-03,192.90,194.99,192.52,194.03,194.0300,50080500
758,2024-06-04,194.64,195.32,193.03,194.35,194.3500,47471400


In [4]:
df.drop(columns = ['Adj Close'], inplace=True)
df 

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2021-05-28,125.57,125.80,124.55,124.61,71311100
1,2021-06-01,125.08,125.35,123.94,124.28,67637100
2,2021-06-02,124.28,125.24,124.05,125.06,59278900
3,2021-06-03,124.68,124.85,123.13,123.54,76229200
4,2021-06-04,124.07,126.16,123.85,125.89,75169300
...,...,...,...,...,...,...
755,2024-05-30,190.76,192.18,190.63,191.29,49947900
756,2024-05-31,191.44,192.57,189.91,192.25,75158300
757,2024-06-03,192.90,194.99,192.52,194.03,50080500
758,2024-06-04,194.64,195.32,193.03,194.35,47471400


In [5]:
df["Polarity"] = df["High"] - df["Low"]
df

Unnamed: 0,Date,Open,High,Low,Close,Volume,Polarity
0,2021-05-28,125.57,125.80,124.55,124.61,71311100,1.25
1,2021-06-01,125.08,125.35,123.94,124.28,67637100,1.41
2,2021-06-02,124.28,125.24,124.05,125.06,59278900,1.19
3,2021-06-03,124.68,124.85,123.13,123.54,76229200,1.72
4,2021-06-04,124.07,126.16,123.85,125.89,75169300,2.31
...,...,...,...,...,...,...,...
755,2024-05-30,190.76,192.18,190.63,191.29,49947900,1.55
756,2024-05-31,191.44,192.57,189.91,192.25,75158300,2.66
757,2024-06-03,192.90,194.99,192.52,194.03,50080500,2.47
758,2024-06-04,194.64,195.32,193.03,194.35,47471400,2.29


In [6]:
# create an array of close values, create a 90/10 split for training and test data
df_close = df[["Close"]]
df_close_data = df_close.values
df_size = len(df_close_data)
df_training_size = int(len(df_close_data) * .9)
step = 70

In [7]:
# scale the data between 0 and 1, sort the training and test data
df_scaler = MinMaxScaler(feature_range = (0,1))
df_scaled_data = df_scaler.fit_transform(df_close_data)

df_training_data = df_scaled_data[0:df_training_size, :]
df_testing_data = df_scaled_data[df_training_size-step: , :]

In [8]:
# creating arrays of testing and training x and y data made up of arrays of the data used for each following 
# predictions, reshaping the data
df_x_training = []
df_y_training = []
df_x_testing = []
df_y_testing = df_close_data[df_training_size:, :]

for i in range(step, df_training_size):
    df_x_training.append(df_training_data[i-step:i, 0])
    df_y_training.append(df_training_data[i, 0])
    
for i in range(step, len(df_testing_data)):
    df_x_testing.append(df_testing_data[i-step:i, 0])
    
df_x_training = np.array(df_x_training)
df_y_training = np.array(df_y_training)
df_x_testing = np.array(df_x_testing)

df_x_training = np.reshape(df_x_training, (df_x_training.shape[0], df_x_training.shape[1], 1))
df_x_testing = np.reshape(df_x_testing, (df_x_testing.shape[0], df_x_testing.shape[1], 1))

In [None]:
# creating, building, compiling, and training the LSTM model
n = 128  #number of neurons used
n2 = 64 #layering with another number of neurons

df_model = Sequential()
df_model.add(LSTM(n, return_sequences=True, input_shape=(df_x_training.shape[1], 1)))
df_model.add(LSTM(n2, return_sequences=False))
df_model.add(Dense(25))
df_model.add(Dense(1))

df_model.compile(optimizer='adam', loss='mean_squared_error')

df_model.fit(df_x_training, df_y_training, batch_size=1, epochs=1)

  super().__init__(**kwargs)


In [None]:
# getting predictions from the models
df_testing_predictions = df_model.predict(df_x_testing)
df_testing_predictions = df_scaler.inverse_transform(df_testing_predictions)

df_training_predictions = df_model.predict(df_x_training)
df_training_predictions = df_scaler.inverse_transform(df_training_predictions)

In [None]:
# plotting the AAPL testing data and predictions
df_training_closes = df_close[:df_training_size]
df_actual_closes = df_close[df_training_size:].copy()
df_actual_closes['predictions'] = df_testing_predictions

plt.figure(figsize=(16, 8))
plt.plot(df_training_closes['Close'])
plt.plot(df_actual_closes['Close'], color='g')
plt.plot(df_actual_closes['predictions'], color='orange')
plt.title('Training, Actual, and Predicted Values of AAPL Closes')
plt.xlabel('Day')
plt.ylabel('Closing Price ($)')
plt.legend(('Training Data', 'Actual Close', 'Predicted Close'))
plt.show()

In [None]:
# plotting the actual vs predicited closing prices
df_actual_closes.plot(color=['green', 'red'])
plt.title('Actual and Predicted Closing Prices')
plt.xlabel('Day')
plt.ylabel('Closing Price ($)')
plt.legend(('Actual Close', 'Predicted Close'))
plt.show()