<a href="https://colab.research.google.com/github/rogerarpagaus/Stock-Market/blob/main/AdvStockPattern.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Description: This code is to predict the Stock Pattern using LSTM with the Attention Mechanism in TensorFlow. The code is based on an article of Dr. Ernesto Lee
#             published in Medium on April 8th, 2024.

In [1]:
#Setting up the environment first
!pip install tensorflow -qqq
!pip install keras -qqq
!pip install yfinance -qqq

In [54]:
import tensorflow as tf
import keras
import yfinance as yf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime

In [None]:
#Check TensorFlow version
print("TensorFlow Version: ", tf.__version__)

In [6]:
# Prompt the user to enter a stock ticker symbol
ticker_symbol = input("Enter the stock ticker symbol (e.g., AAPL): ")

# Print the entered ticker symbol (optional)
print(f"You entered: {ticker_symbol}")

# Now you can use the `ticker_symbol` variable in your code, for example:
# data = yf.download(ticker_symbol)

Enter the stock ticker symbol (e.g., AAPL): IBM
You entered: IBM


In [8]:
#Get today's date
today = datetime.date.today()

In [None]:
#Fetch Stock Ticker data
ticker_data = yf.download(ticker_symbol,start='2012-01-01', end=today)

#Display teh first few rows of the dataframe
ticker_data.head()

In [None]:
#Check for missing values
ticker_data.isnull().sum()

#Filling missing values, if any
ticker_data.fillna(method='ffill', inplace=True)

In [12]:
#Normalize values in dataset to common scale
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
ticker_data_scaled = scaler.fit_transform(ticker_data['Close'].values.reshape(-1,1))

In [14]:
#Create sequence format for training the LSTM model
X = []
Y = []

for i in range(60, len(ticker_data_scaled)):
  X.append(ticker_data_scaled[i-60:i,0])
  Y.append(ticker_data_scaled[i,0])

In [15]:
#Train Test Split
train_size = int(len(X) *0.8)
test_size = len(X) - train_size

X_train, X_test = X[:train_size], X[train_size:]
Y_train, Y_test = Y[:train_size], Y[train_size:]

In [16]:
#Reshape data into a 3D format (samples, time steps, features)
X_train, Y_train = np.array(X_train), np.array(Y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

In [29]:
#Creating the LSTM layers
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Dropout, AdditiveAttention, Permute, Reshape, Multiply, Flatten, BatchNormalization

#Define Input
inputs = Input(shape=(X_train.shape[1], 1))

#LTSM layers
lstm_out = LSTM(50, return_sequences=True)(inputs)
lstm_out = LSTM(50, return_sequences=True)(lstm_out)

In [32]:
# Adding self-attention mechanism
# The attention mechanism
attention = AdditiveAttention(name='attention_weight') # Define attention layer
attention_result = attention([lstm_out, lstm_out])

# Multiply attention weights with LSTM output
multiply_layer = Multiply()([lstm_out, attention_result])

# Reshape for compatibility
reshaped = Reshape((-1, 50))(multiply_layer)

# Flatten before Dense layer
flattened = Flatten()(reshaped)

# Dense layer
dense_out = Dense(1)(flattened)

# Add Dropout and Batch Normalization
dropout_out = Dropout(0.2)(dense_out)
batchnorm_out = BatchNormalization()(dropout_out)

# Define the model
model = Model(inputs=inputs, outputs=batchnorm_out)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

model.summary()

In [34]:
#Assuming X_train and Y_train are already defined and preprocessed
history = model.fit(X_train, Y_train, epochs=100, batch_size=32, verbose=1)

Epoch 1/100
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 170ms/step - loss: 0.0062
Epoch 2/100
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 160ms/step - loss: 0.0069
Epoch 3/100
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 157ms/step - loss: 0.0065
Epoch 4/100
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 152ms/step - loss: 0.0062
Epoch 5/100
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 160ms/step - loss: 0.0066
Epoch 6/100
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 162ms/step - loss: 0.0071
Epoch 7/100
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 152ms/step - loss: 0.0064
Epoch 8/100
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 201ms/step - loss: 0.0069
Epoch 9/100
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 145ms/step - loss: 0.0064
Epoch 10/100
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s

In [35]:
#Overfitting and how to avoid it
from keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=10)
history = model.fit(X_train, Y_train, epochs=100, batch_size=25, validation_split=0.2, callbacks=[early_stopping])

Epoch 1/100
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 132ms/step - loss: 0.0067 - val_loss: 0.0032
Epoch 2/100
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 133ms/step - loss: 0.0056 - val_loss: 0.0014
Epoch 3/100
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 129ms/step - loss: 0.0067 - val_loss: 0.0013
Epoch 4/100
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 109ms/step - loss: 0.0067 - val_loss: 6.7220e-04
Epoch 5/100
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 124ms/step - loss: 0.0070 - val_loss: 0.0012
Epoch 6/100
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 143ms/step - loss: 0.0057 - val_loss: 0.0013
Epoch 7/100
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 115ms/step - loss: 0.0068 - val_loss: 0.0025
Epoch 8/100
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 130ms/step - loss: 0.0070 - val_loss: 0.0015
Epoch 9/100
[1m83/8

In [None]:
# Convert X_test and y_test to Numpy arrays if they are not already
X_test = np.array(X_test)
Y_test = np.array(Y_test)

# Ensure X_test is reshaped similarly to how X_train was reshaped
# This depends on how you preprocessed the training data
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# Now evaluate the model on the test data
test_loss = model.evaluate(X_test, Y_test)
print("Test Loss: ", test_loss)

In [61]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Making predictions
# Reshape X_test to include a batch dimension if it's not already present
if X_test.ndim == 3:  # Check if it has 3 dimensions (samples, timesteps, features)
    X_test = X_test.reshape(1, X_test.shape[0], X_test.shape[1], X_test.shape[2])

Y_pred = model.predict(X_test)

# If Y_pred has an extra dimension due to the batch dimension in X_test,
# remove it for comparison with Y_test
if Y_pred.ndim > Y_test.ndim:
    Y_pred = Y_pred.squeeze()  # Removes dimensions of size 1

# Calculating MAE and RMSE
mae = mean_absolute_error(Y_test, Y_pred)
rmse = mean_squared_error(Y_test, Y_pred, squared=False)

print("Mean Absolute Error: ", mae)
print("Root Mean Square Error: ", rmse)

ValueError: as_list() is not defined on an unknown TensorShape.

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Making predictions
Y_pred = model.predict(X_test)

# Calculating MAE and RMSE
mae = mean_absolute_error(Y_test, Y_pred)
rmse = mean_squared_error(Y_test, Y_pred, squared=False)

print("Mean Absolute Error: ", mae)
print("Root Mean Square Error: ", rmse)

In [62]:
import yfinance as yf
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Fetching the latest 60 days of AAPL stock data
data = yf.download('AAPL', period='60d', interval='1d')

# Selecting the 'Close' price and converting to numpy array
closing_prices = data['Close'].values

# Scaling the data
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(closing_prices.reshape(-1,1))

# Since we need the last 60 days to predict the next day, we reshape the data accordingly
X_latest = np.array([scaled_data[-60:].reshape(60)])

# Reshaping the data for the model (adding batch dimension)
X_latest = np.reshape(X_latest, (X_latest.shape[0], X_latest.shape[1], 1))

# Making predictions for the next 4 candles
predicted_stock_price = model.predict(X_latest)
predicted_stock_price = scaler.inverse_transform(predicted_stock_price)

print("Predicted Stock Prices for the next 4 days: ", predicted_stock_price)

[*********************100%***********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['AAPL']: YFInvalidPeriodError("%ticker%: Period '60d' is invalid, must be one of ['1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', '5y', '10y', 'ytd', 'max']")


ValueError: Found array with 0 sample(s) (shape=(0, 1)) while a minimum of 1 is required by MinMaxScaler.

In [None]:
import yfinance as yf
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Fetch the latest 60 days of AAPL stock data
data = yf.download('AAPL', period='5d', interval='1d')

# Select 'Close' price and scale it
closing_prices = data['Close'].values.reshape(-1, 1)
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(closing_prices)

# Predict the next 4 days iteratively
predicted_prices = []
current_batch = scaled_data[-60:].reshape(1, 5, 1)  # Most recent 60 days

for i in range(4):  # Predicting 4 days
    # Get the prediction (next day)
    next_prediction = model.predict(current_batch)

    # Reshape the prediction to fit the batch dimension
    next_prediction_reshaped = next_prediction.reshape(1, 1, 1)

    # Append the prediction to the batch used for predicting
    current_batch = np.append(current_batch[:, 1:, :], next_prediction_reshaped, axis=1)

    # Inverse transform the prediction to the original price scale
    predicted_prices.append(scaler.inverse_transform(next_prediction)[0, 0])

print("Predicted Stock Prices for the next 4 days: ", predicted_prices)

In [72]:
!pip install mplfinance -qqq
import pandas as pd
import mplfinance as mpf
import matplotlib.dates as mpl_dates
import matplotlib.pyplot as plt

# Assuming 'data' is your DataFrame with the fetched AAPL stock data
# Make sure it contains Open, High, Low, Close, and Volume columns

# Creating a list of dates for the predictions
last_date = data.index[0]
next_day = last_date + pd.Timedelta(days=1)
prediction_dates = pd.date_range(start=next_day, periods=4)

# Assuming 'predicted_prices' is your list of predicted prices for the next 4 days
predictions_df = pd.DataFrame(index=prediction_dates, data=predicted_prices, columns=['Close'])

# Plotting the actual data with mplfinance
mpf.plot(data, type='candle', style='charles', volume=True)

# Overlaying the predicted data
plt.figure(figsize=(10,6))
plt.plot(predictions_df.index, predictions_df['Close'], linestyle='dashed', marker='o', color='red')

plt.title("AAPL Stock Price with Predicted Next 4 Days")
plt.show()

ValueError: Data for column "Open" must be ALL float or int.

In [50]:
import pandas as pd
import mplfinance as mpf
import matplotlib.dates as mpl_dates
import matplotlib.pyplot as plt

# Fetch the latest 60 days of AAPL stock data
data = yf.download('AAPL', period='64d', interval='1d') # Fetch 64 days to display last 60 days in the chart

# Select 'Close' price and scale it
closing_prices = data['Close'].values.reshape(-1, 1)
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(closing_prices)

# Predict the next 4 days iteratively
predicted_prices = []
current_batch = scaled_data[-60:].reshape(1, 60, 1)  # Most recent 60 days

for i in range(4):  # Predicting 4 days
    next_prediction = model.predict(current_batch)
    next_prediction_reshaped = next_prediction.reshape(1, 1, 1)
    current_batch = np.append(current_batch[:, 1:, :], next_prediction_reshaped, axis=1)
    predicted_prices.append(scaler.inverse_transform(next_prediction)[0, 0])

# Creating a list of dates for the predictions
last_date = data.index[-1]
next_day = last_date + pd.Timedelta(days=1)
prediction_dates = pd.date_range(start=next_day, periods=4)

# Adding predictions to the DataFrame
predicted_data = pd.DataFrame(index=prediction_dates, data=predicted_prices, columns=['Close'])

# Combining both actual and predicted data
combined_data = pd.concat([data['Close'], predicted_data['Close']])
combined_data = combined_data[-64:] # Last 60 days of actual data + 4 days of predictions

# Plotting the actual data
plt.figure(figsize=(10,6))
plt.plot(data.index[-60:], data['Close'][-60:], linestyle='-', marker='o', color='blue', label='Actual Data')

# Plotting the predicted data
plt.plot(prediction_dates, predicted_prices, linestyle='-', marker='o', color='red', label='Predicted Data')

plt.title("AAPL Stock Price: Last 60 Days and Next 4 Days Predicted")
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

[*********************100%***********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['AAPL']: YFInvalidPeriodError("%ticker%: Period '64d' is invalid, must be one of ['1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', '5y', '10y', 'ytd', 'max']")


ValueError: Found array with 0 sample(s) (shape=(0, 1)) while a minimum of 1 is required by MinMaxScaler.