<a href="https://colab.research.google.com/github/mohamedhakkim26/Stock_Price_Prediction/blob/main/Stock_Price_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **STOCK_PRICE_PREDICTION BY LSTM**

## **Data Collection**

**Using yfinance**

In [None]:
pip install yfinance



### **Download Packages**

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from scipy.stats import zscore
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input

### **Download Stock Data**

In [None]:
import yfinance as yf

# Download data for a stock
df = yf.download("AAPL", start="2015-01-01", end="2024-01-01")
df = df[['Open', 'High', 'Low', 'Close', 'Volume']]

# Display the first few rows
print(df.head())


[*********************100%***********************]  1 of 1 completed

Price            Open       High        Low      Close     Volume
Ticker           AAPL       AAPL       AAPL       AAPL       AAPL
Date                                                             
2015-01-02  24.805924  24.817059  23.906238  24.347174  212818400
2015-01-05  24.115573  24.195743  23.474213  23.661276  257142000
2015-01-06  23.725850  23.924048  23.300503  23.663496  263188400
2015-01-07  23.872827  24.095522  23.761480  23.995310  160423600
2015-01-08  24.324903  24.975170  24.206873  24.917269  237458000





## **Data Preprocessing**

### **Load Stock Data**

In [None]:
import pandas as pd
import yfinance as yf

# Download stock data
df = yf.download("AAPL", start="2015-01-01", end="2024-01-01")

# Keep only relevant columns
df = df[['Open', 'High', 'Low', 'Close', 'Volume']]

# Display first few rows
print(df.head())


[*********************100%***********************]  1 of 1 completed

Price            Open       High        Low      Close     Volume
Ticker           AAPL       AAPL       AAPL       AAPL       AAPL
Date                                                             
2015-01-02  24.805924  24.817059  23.906238  24.347174  212818400
2015-01-05  24.115573  24.195743  23.474213  23.661276  257142000
2015-01-06  23.725850  23.924048  23.300503  23.663496  263188400
2015-01-07  23.872827  24.095522  23.761480  23.995310  160423600
2015-01-08  24.324903  24.975170  24.206873  24.917269  237458000





### **Handle MIssing Values**

In [None]:
df = df.ffill()

### **Feature Engineering**

In [None]:
def compute_rsi(data, window=14):
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

df['SMA_50'] = df['Close'].rolling(window=50).mean()
df['SMA_200'] = df['Close'].rolling(window=200).mean()
df['RSI'] = compute_rsi(df['Close'])
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['MACD_signal'] = df['MACD'].ewm(span=9, adjust=False).mean()
df['Rolling_STD_20'] = df['Close'].rolling(window=20).std()
df['Pct_Change'] = df['Close'].pct_change()
df['Lag_1'] = df['Close'].shift(1)
df = df.dropna()

### **Remove Outliers**

In [None]:
from scipy.stats import zscore

# Compute Z-score for 'Close' prices
df['Z_Score'] = zscore(df['Close'])

# Remove outliers (Z-score threshold of 3)
df = df[df['Z_Score'].abs() < 3]

# Reset index before dropping
df = df.reset_index()

# Drop the 'Z_Score' column safely
df.drop(columns=['Z_Score'], inplace=True)

# Set the original index back (if needed)
df.set_index('Date', inplace=True)

print(df.head())


Price            Open       High        Low      Close     Volume     SMA_50  \
Ticker           AAPL       AAPL       AAPL       AAPL       AAPL              
Date                                                                           
2015-10-16  25.208995  25.258611  24.927091  25.042109  156930400  25.337004   
2015-10-19  24.987992  25.202239  24.832381  25.197729  119036800  25.319910   
2015-10-20  25.109764  25.747995  24.992492  25.657785  195871200  25.293073   
2015-10-21  25.709664  26.065991  25.642006  25.655539  167180800  25.294291   
2015-10-22  25.784081  26.047943  25.732210  26.047943  166616400  25.295463   

Price         SMA_200        RSI     EMA_12     EMA_26      MACD MACD_signal  \
Ticker                                                                         
Date                                                                           
2015-10-16  27.207879  45.695022  25.102757  25.240235 -0.137479   -0.160620   
2015-10-19  27.212132  59.837822  25.11

  df.drop(columns=['Z_Score'], inplace=True)


### **Normalize Data for ML Models**

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
df['Scaled_Close'] = scaler.fit_transform(df[['Close']])

### **Create Time-Series Sequences**

In [None]:
def create_sequences(data, time_step=60):
    X, Y = [], []
    for i in range(len(data) - time_step - 1):
        X.append(data[i:(i + time_step), 0])
        Y.append(data[i + time_step, 0])
    return np.array(X), np.array(Y)

time_step = 60  # 60-day sequence
X, Y = create_sequences(df[['Scaled_Close']].values)
X = X.reshape(X.shape[0], X.shape[1], 1)


### **Train-Test Split**

In [None]:
split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
Y_train, Y_test = Y[:split], Y[split:]



### **Statistical Features**

#### **Percentage Change (Daily Returns)**

In [None]:
df['Pct_Change_AAPL'] = df['Close'].pct_change()

#### **Moving Standard Deviation**

In [None]:
df['Std_Dev_50_AAPL'] = df['Close'].rolling(window=50).std()

### **Lag Features**

#### **Lagging Close Price**

In [None]:
df['Lag_1_AAPL'] = df['Close'].shift(1)
df['Lag_2_AAPL'] = df['Close'].shift(2)

#### **Rolling Window Features**

In [None]:
df['Rolling_Mean_10_AAPL'] = df['Close'].rolling(window=10).mean()
df['Rolling_Max_10_AAPL'] = df['Close'].rolling(window=10).max()

#### **Display the last few rows to check the results**

In [None]:
print(df[['Close', 'Lag_1_AAPL', 'Lag_2_AAPL', 'Rolling_Mean_10_AAPL', 'Rolling_Max_10_AAPL']].tail())

Price            Close  Lag_1_AAPL  Lag_2_AAPL Rolling_Mean_10_AAPL  \
Ticker            AAPL                                                
Date                                                                  
2023-12-22  192.656174  193.730896  193.880188           194.792711   
2023-12-26  192.108856  192.656174  193.730896           194.779774   
2023-12-27  192.208374  192.108856  192.656174           194.624533   
2023-12-28  192.636292  192.208374  192.108856           194.188669   
2023-12-29  191.591370  192.636292  192.208374           193.633388   

Price      Rolling_Max_10_AAPL  
Ticker                          
Date                            
2023-12-22          197.144180  
2023-12-26          197.144180  
2023-12-27          197.144180  
2023-12-28          197.144180  
2023-12-29          196.606827  


## **Model Selection**

### **LSTM for Stock Price Prediction**


#### **Data Preprocessing**

In [None]:
X = X.reshape(X.shape[0], X.shape[1], 1)  # Reshaping for LSTM (samples, time steps, features)

#### **Split Data into Training and Testing Sets**

In [None]:
split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
Y_train, Y_test = Y[:split], Y[split:]


print(f"Training Samples: {len(X_train)}, Testing Samples: {len(X_test)}")


Training Samples: 1603, Testing Samples: 401


#### **Building the LSTM Model**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input

# Build the LSTM model

model = Sequential([
    Input(shape=(X_train.shape[1], 1)),
    LSTM(50, return_sequences=True),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(1)
])
# Summary of the model architecture
model.summary()


#### **Training the Model**

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error')
history = model.fit(X_train, Y_train, epochs=50, batch_size=32, validation_data=(X_test, Y_test))


Epoch 1/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 97ms/step - loss: 0.0353 - val_loss: 0.0018
Epoch 2/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 61ms/step - loss: 0.0026 - val_loss: 0.0027
Epoch 3/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 62ms/step - loss: 0.0023 - val_loss: 0.0015
Epoch 4/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 80ms/step - loss: 0.0017 - val_loss: 0.0014
Epoch 5/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 60ms/step - loss: 0.0015 - val_loss: 0.0012
Epoch 6/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 62ms/step - loss: 0.0021 - val_loss: 0.0015
Epoch 7/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 60ms/step - loss: 0.0016 - val_loss: 0.0024
Epoch 8/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 61ms/step - loss: 0.0017 - val_loss: 0.0075
Epoch 9/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━

## **Model Evaluation**

### **Evaluate model**

In [None]:
Y_pred = model.predict(X_test)
Y_pred_inv = scaler.inverse_transform(Y_pred)
Y_test_inv = scaler.inverse_transform(Y_test.reshape(-1, 1))

### **Plot Results**

In [None]:
plt.plot(Y_test_inv, label='Actual Price')
plt.plot(Y_pred_inv, label='Predicted Price')
plt.title('Stock Price Prediction - LSTM')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()

### **Performance Evaluation**

In [None]:
from sklearn.metrics import mean_absolute_percentage_error

rmse = np.sqrt(mean_squared_error(Y_test_inv, Y_pred_inv))
mape = mean_absolute_percentage_error(Y_test_inv, Y_pred_inv)
print(f'RMSE: {rmse}, MAPE: {mape}')

## **Alternative Models for Comparison**

### **GRU (Gated Recurrent Unit)**

In [None]:
from tensorflow.keras.layers import GRU, Dense, Dropout, Input
from tensorflow.keras.models import Sequential

# Initialize Sequential model
model = Sequential()

# Input layer - this defines the input shape at the beginning
model.add(Input(shape=(X_train.shape[1], 1)))  # X_train.shape[1] is the time step length

# GRU layer with 50 units
model.add(GRU(units=50, return_sequences=True))  # Note: No need to pass input_shape here anymore
model.add(Dropout(0.2))  # Dropout for regularization

# Add another GRU layer
model.add(GRU(units=50, return_sequences=False))  # No sequences returned here
model.add(Dropout(0.2))

# Fully connected output layer
model.add(Dense(units=1))  # Predict the next value (closing stock price)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Model summary
model.summary()


## **Deployment**

In [None]:
from tensorflow.keras.models import load_model

# Save the LSTM model
model.save('stock_price_lstm.h5')

# Load the LSTM model
loaded_model = load_model('stock_price_lstm.h5')