<a href="https://colab.research.google.com/github/orevs-com/Stock-Market-Price-Prediction/blob/main/Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Enhanced Stock Price Prediction Using LSTM with Technical Indicators: A Comparative Study with XGBoost and ARIMA

# by Orevaoghene Otiede

Importing the required libraries for the project


In [8]:
# Install pandas_ta
!pip install pandas_ta



In [16]:
# Corrcting the numpy import for pandas_ta
path = "/usr/local/lib/python3.11/dist-packages/pandas_ta/momentum/squeeze_pro.py"

# Read the file and replace the line
with open(path, 'r') as file:
    content = file.read()

# Replace the incorrect import
content = content.replace("from numpy import NaN as npNaN", "from numpy import nan as npNaN")

# Write the updated content back
with open(path, 'w') as file:
    file.write(content)

print("File updated successfully.")

File updated successfully.


In [10]:
#Importing the Libraries
import numpy as np
import pandas as pd
import pandas_ta as ta
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import linear_model
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
import keras.backend as K
import yfinance as yf
import os

In [11]:
# Import the drive module from the google.colab package.
from google.colab import drive

# Mount the Google Drive to the specified mount point in the Colab environment.
drive.mount('/content/drive')

Mounted at /content/drive


List of ticker symbols for Training Companies

In [12]:
# Create a list of Ticker Symbols
train_tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'NVDA', 'META']
test_tickers = ['TSLA', 'AVGO', 'COST', 'NFLX', 'ADBE', 'INTC']

Downloading and Saving the Stock Market Price Data

In [13]:
# Define the date range
start_date = "2015-01-01"
end_date = "2024-12-31"
# Define the file path
drive_path = '/content/drive/MyDrive/Colab Notebooks/Stock Market Data/'

# Function to download and save data for a list of tickers
def download_and_save_tickers(tickers, start, end, base_path):
    for ticker in tickers:
        try:
            # Dwonload data from Yahoo Finance with auto adjusted closed price
            df = yf.download(ticker, start=start, end=end, interval="1d",
                             auto_adjust=True)

            if not df.empty:
                file_path = os.path.join(base_path, f"{ticker}.csv")
                df.to_csv(file_path)
                print(f"Successfully downloaded and saved data for {ticker}")
            else:
                print(f"No data found for {ticker} in the specified date range.")

        except Exception as e:
            print(f"Error downloading data for {ticker}: {e}")

# Download and save training data
download_and_save_tickers(train_tickers, start_date, end_date, drive_path)

# Download and save testing data
download_and_save_tickers(test_tickers, start_date, end_date, drive_path)

print("\nAll data download attempts complete.")

[*********************100%***********************]  1 of 1 completed


Successfully downloaded and saved data for AAPL


[*********************100%***********************]  1 of 1 completed


Successfully downloaded and saved data for MSFT


[*********************100%***********************]  1 of 1 completed


Successfully downloaded and saved data for GOOGL


[*********************100%***********************]  1 of 1 completed


Successfully downloaded and saved data for AMZN


[*********************100%***********************]  1 of 1 completed


Successfully downloaded and saved data for NVDA


[*********************100%***********************]  1 of 1 completed


Successfully downloaded and saved data for META


[*********************100%***********************]  1 of 1 completed


Successfully downloaded and saved data for TSLA


[*********************100%***********************]  1 of 1 completed


Successfully downloaded and saved data for AVGO


[*********************100%***********************]  1 of 1 completed


Successfully downloaded and saved data for COST


[*********************100%***********************]  1 of 1 completed


Successfully downloaded and saved data for NFLX


[*********************100%***********************]  1 of 1 completed


Successfully downloaded and saved data for ADBE


[*********************100%***********************]  1 of 1 completed


Successfully downloaded and saved data for INTC

All data download attempts complete.


Calculating for Technical Indicators

In [15]:
# Defining the directory where the stock data CSV files are saved
data_dir = '/content/drive/MyDrive/Colab Notebooks/Stock Market Data/'
output_dir = '/content/drive/MyDrive/Colab Notebooks/Data with Indicators/'

# Create an Output Directory
os.makedirs(output_dir, exist_ok=True)

# Get a list of all CSV files
csv_files = [f for f in os.listdir(data_dir) if f.endswith('.csv')]

for file_name in csv_files:
    ticker = file_name.replace('.csv', '') # Extract ticker from file name
    file_path = os.path.join(data_dir, file_name)

    try:
        df = pd.read_csv(file_path, skiprows=3) # Considering Multi Index Rows
        df.columns = ['Date', 'Close', 'High', 'Low', 'Open', 'Volume']
        df['Date'] = pd.to_datetime(df['Date'])
        df.set_index('Date', inplace=True)
        print(f"Calculating indicators for {ticker}...")

        if not all(col in df.columns for col in ['Open', 'High', 'Low', 'Volume']):
            print(f"Skipping {ticker}: Missing OHLCV columns after parsing.")
            continue

        # Calculating Technical Indicators using pandas_ta

        # Trend Indicators
        df.ta.sma(length=200, append=True)
        df.ta.sma(length=50, append=True)
        df.ta.ema(length=26, append=True)
        df.ta.macd(append=True)

        # Momentum Indicators
        df.ta.rsi(length=14, append=True)
        df.ta.willr(append=True)

        # Volatility Indicator
        df.ta.bbands(append=True)

        # Volume Indicator
        df.ta.obv(append=True)
        df.ta.cmf(append=True)

        # Adding Daily Returns and Log Returns
        df['Daily_Return'] = df['Close'].pct_change()
        df['Log_Return'] = np.log(df['Close'] / df['Close'].shift(1))
        df.replace([np.inf, -np.inf], np.nan, inplace=True)

        # Add Lagged Features for all calculated indicators

        # Columns from SMA (Trend Indicators)
        lag_cols = ['SMA_200', 'SMA_50', 'EMA_26']

        # Columns from MACD (Trend Indicators)
        lag_cols.extend(['MACD_12_26_9', 'MACDh_12_26_9', 'MACDs_12_26_9'])

        # Columns from RSI, Williams (Momentum Indicators)
        lag_cols.extend(['RSI_14', 'WILLR_14'])

        # Columns from Bollinger Bands (Volatiity Indicators)
        lag_cols.extend(['BBL_5_2.0', 'BBM_5_2.0', 'BBU_5_2.0', 'BBB_5_2.0', 'BBP_5_2.0'])

        # Columns from OBV, CMF (Volume Indicators)
        lag_cols.extend(['OBV', 'CMF_20'])

        # Columns from Daily Returns and Log Returns
        lag_cols.extend(['Daily_Return', 'Log_Return'])

        # Apply lagging
        for col in lag_cols:
            if col in df.columns:
                df[f'Lag_{col}'] = df[col].shift(1)
            else:
                print(f"  Column '{col}' not found for {ticker} .")

         # Data Cleaning after adding Technical Indicators and Lagged Features
        rows = len(df)
        df.dropna(inplace=True)
        print(f"  Dropped {rows - len(df)} rows with NaN values for {ticker}.")

        # Save the processed DataFrame to a new CSV
        output_file_path = os.path.join(output_dir, f"{ticker}_indicators.csv")
        df.to_csv(output_file_path)
        print(f"Saved Technical Indicators for {ticker} to {output_file_path}")

    except Exception as e:
        print(f"Error processing {ticker}: {e}")

Calculating indicators for GOOGL...
  Dropped 200 rows with NaN values for GOOGL.
Saved Technical Indicators for GOOGL to /content/drive/MyDrive/Colab Notebooks/Data with Indicators/GOOGL_indicators.csv
Calculating indicators for AMZN...
  Dropped 200 rows with NaN values for AMZN.
Saved Technical Indicators for AMZN to /content/drive/MyDrive/Colab Notebooks/Data with Indicators/AMZN_indicators.csv
Calculating indicators for AAPL...
  Dropped 200 rows with NaN values for AAPL.
Saved Technical Indicators for AAPL to /content/drive/MyDrive/Colab Notebooks/Data with Indicators/AAPL_indicators.csv
Calculating indicators for META...
  Dropped 200 rows with NaN values for META.
Saved Technical Indicators for META to /content/drive/MyDrive/Colab Notebooks/Data with Indicators/META_indicators.csv
Calculating indicators for MSFT...
  Dropped 200 rows with NaN values for MSFT.
Saved Technical Indicators for MSFT to /content/drive/MyDrive/Colab Notebooks/Data with Indicators/MSFT_indicators.csv
C