In [161]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.neighbors import KNeighborsRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,LSTM

In [179]:
try:
    import google.colab
    In_colab=True
except:
    In_colab=False

if(In_colab):
    print("Running in google colab")
    from google.colab import drive
    drive.mount('/content/drive')
    df = pd.read_csv('drive/My Drive/Colab Notebooks/Stock Price Prediction RNN/SBIN.csv')
else:
    print("Running in local system")
    path=r'C:\Users\SHRISTI\OneDrive\Desktop\GitHub\Stock-Price-Prediction\Data\SBIN.csv'
    df=pd.read_csv(path)


Running in local system


In [26]:
# Load the dataset
#df = pd.read_csv('/content/SBIN.NS.csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,01-01-1996,18.691147,18.978922,18.540184,18.82324,12.409931,43733533.0
1,02-01-1996,18.894005,18.964767,17.738192,18.224106,12.014931,56167280.0
2,03-01-1996,18.327892,18.568489,17.643839,17.738192,11.694577,68296318.0
3,04-01-1996,17.502312,17.832542,17.223972,17.676863,11.654142,86073880.0
4,05-01-1996,17.738192,17.785366,17.459852,17.577793,11.588827,76613039.0


In [28]:
# Drop the 'Date' and 'Adj Close' columns
df.drop(['Date', 'Adj Close'], axis=1, inplace=True)

In [30]:
df.head()

Unnamed: 0,Open,High,Low,Close,Volume
0,18.691147,18.978922,18.540184,18.82324,43733533.0
1,18.894005,18.964767,17.738192,18.224106,56167280.0
2,18.327892,18.568489,17.643839,17.738192,68296318.0
3,17.502312,17.832542,17.223972,17.676863,86073880.0
4,17.738192,17.785366,17.459852,17.577793,76613039.0


In [32]:
# Handle missing values
imputer = SimpleImputer(strategy='mean')
df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

In [34]:
# Select features and target variable
X = df[['Open', 'High', 'Low', 'Volume']]
y = df['Close']

In [36]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [38]:
# Scale the features using Min-Max scaling
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [40]:
X_train.shape

(5659, 4)

In [42]:
X_test.shape

(1415, 4)

In [44]:
y_train.shape

(5659,)

In [46]:
y_test.shape

(1415,)

In [48]:
# Function to evaluate and print RMSE, MAE, and MAPE
def evaluate_model(model, X_test, y_test):
    predictions = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    mae = mean_absolute_error(y_test, predictions)
    mape = mean_absolute_percentage_error(y_test, predictions)

    print(f"RMSE: {rmse}")
    print(f"MAE: {mae}")
    print(f"MAPE: {mape}\n")
    
    return rmse, mae, mape


In [50]:
metrics = {
    "Model": [],
    "RMSE": [],
    "MAE": [],
    "MAPE": []
}

## 1. LINEAR REGRESSION

In [53]:
# Create a linear regression model
model1 = LinearRegression()

In [55]:
y_train.head()

5286    257.350006
3408    129.464996
5477    279.350006
6906    588.500000
530      21.644367
Name: Close, dtype: float64

In [163]:
# Train the model
model1.fit(X_train_scaled, y_train)

In [63]:
rmse, mae, mape = evaluate_model(model1, X_test_scaled, y_test)
metrics["Model"].append("Linear Regressor")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

RMSE: 1.6881364651923558
MAE: 0.9433353486266928
MAPE: 0.006085435968276741



## 2. SVR

In [65]:
# Create an SVR model
model2 = SVR()

In [165]:
# Train the model
model2.fit(X_train_scaled, y_train)

In [68]:
rmse, mae, mape = evaluate_model(model2, X_test_scaled, y_test)
metrics["Model"].append("SVR")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

RMSE: 17.574809673127547
MAE: 6.278157692070486
MAPE: 0.09040265035344064



## 3. Random Forest

In [72]:
model3 = RandomForestRegressor()

In [167]:
# Train the model
model3.fit(X_train_scaled, y_train)

In [80]:
rmse, mae, mape = evaluate_model(model3, X_test_scaled, y_test)
metrics["Model"].append("Random Forest")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

RMSE: 2.2053909891328036
MAE: 1.2608162799481166
MAPE: 0.008015308194076972



## 4. Gradient Boosting Models (GBM)

In [83]:
model4 = GradientBoostingRegressor()

In [169]:
# Train the model
model4.fit(X_train_scaled, y_train)

In [87]:
rmse, mae, mape = evaluate_model(model4, X_test_scaled, y_test)
metrics["Model"].append("GBM")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

RMSE: 2.6985863368468084
MAE: 1.692542658558929
MAPE: 0.011883244132236716



## 5. Extreme Gradient Boosting (XGBoost)

In [90]:
import xgboost as xgb
# Create an XGBoost model
model5 = xgb.XGBRegressor()

In [171]:
# Train the model
model5.fit(X_train_scaled, y_train)

In [94]:
rmse, mae, mape = evaluate_model(model5, X_test_scaled, y_test)
metrics["Model"].append("XGBoost")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

RMSE: 2.733930065274145
MAE: 1.502457380471909
MAPE: 0.010026410639661481



## 6. AdaBoostRegressor

In [97]:
model6 = AdaBoostRegressor()

In [173]:
# Train the model
model6.fit(X_train_scaled, y_train)

In [105]:
rmse, mae, mape = evaluate_model(model6, X_test_scaled, y_test)
metrics["Model"].append("AdaBoost Regressor")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

RMSE: 9.175482477551942
MAE: 7.527617905792734
MAPE: 0.1858930099598583



## 7. Decision Tree

In [108]:
model7 = DecisionTreeRegressor()

In [175]:
# Train the model
model7.fit(X_train_scaled, y_train)

In [112]:
rmse, mae, mape = evaluate_model(model7, X_test_scaled, y_test)
metrics["Model"].append("Decision Tree")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

RMSE: 3.12966540689625
MAE: 1.6497286032971983
MAPE: 0.010286427942970355



## 8. KNeighborsRegressor(KNN)

In [115]:
# Create a KNN model
model8 = KNeighborsRegressor()

In [177]:
# Train the model
model8.fit(X_train_scaled, y_train)

In [119]:
rmse, mae, mape = evaluate_model(model8, X_test_scaled, y_test)
metrics["Model"].append("KNN")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

RMSE: 3.0274590148039873
MAE: 1.7525904376439672
MAPE: 0.013668115353592272



## 9. Artificial Neural Networks (ANN)

In [None]:
# Create an ANN model
model9 = Sequential()
model9.add(Dense(32, activation='relu', input_shape=(X_train.shape[1],)))
model9.add(Dense(16, activation='relu'))
model9.add(Dense(1, activation='linear'))

In [124]:
# Compile the model
model9.compile(loss='mean_squared_error', optimizer='adam')

In [126]:
# Train the model
model9.fit(X_train_scaled, y_train, epochs=100, batch_size=32, verbose=0)

<keras.src.callbacks.history.History at 0x1a508bc2c90>

In [128]:
rmse, mae, mape = evaluate_model(model9, X_test_scaled, y_test)
metrics["Model"].append("ANN")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
RMSE: 2.801001091255311
MAE: 1.7605365826618848
MAPE: 0.0126215060590655



## 10. LSTM(Long Short term Memory)

In [131]:
# Reshape the input data for LSTM
n_features = X_train_scaled.shape[1]
n_steps = 10
n_samples_train = X_train_scaled.shape[0] - n_steps + 1
n_samples_test = X_test_scaled.shape[0] - n_steps + 1

# Reshape the input data
X_train_reshaped = np.array([X_train_scaled[i:i+n_steps, :] for i in range(n_samples_train)])
X_test_reshaped = np.array([X_test_scaled[i:i+n_steps, :] for i in range(n_samples_test)])


In [None]:
# Create an LSTM model
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(1))


In [135]:
# Compile the model
model.compile(loss='mean_squared_error', optimizer='adam')


In [137]:
# Train the model
model.fit(X_train_reshaped, y_train[n_steps-1:], epochs=100, batch_size=32, verbose=0)

<keras.src.callbacks.history.History at 0x1a508dda390>

In [None]:
rmse, mae, mape = evaluate_model(model10, X_test_scaled, y_test)
metrics["Model"].append("LSTM")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

In [None]:
# Create a DataFrame for metrics
metrics_df = pd.DataFrame(metrics)

# Plot RMSE, MAE, and MAPE for each model
plt.figure(figsize=(15, 5))

# RMSE Plot
plt.subplot(1, 3, 1)
plt.bar(metrics_df['Model'], metrics_df['RMSE'], color='lightblue')
plt.xlabel('RMSE')
plt.title('RMSE for Different Models')
plt.tight_layout()
plt.show()

In [None]:
# MAE Plot
plt.subplot(1, 3, 2)
plt.bar(metrics_df['Model'], metrics_df['MAE'], color='lightgreen')
plt.xlabel('MAE')
plt.title('MAE for Different Models')
plt.tight_layout()
plt.show()

In [None]:
# MAPE Plot
plt.subplot(1, 3, 3)
plt.bar(metrics_df['Model'], metrics_df['MAPE'], color='salmon')
plt.xlabel('MAPE')
plt.title('MAPE for Different Models')
plt.tight_layout()
plt.show()

# Using of LightGBM and CatBoost For Optimizing the model accuracy and time complexity

In [None]:
# Import necessary libraries
import lightgbm as lgb
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, accuracy_score, precision_score, confusion_matrix, recall_score, f1_score

# Function to train and evaluate a model
def train_and_evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, pred))
    mae = mean_absolute_error(y_test, pred)
    mape = mean_absolute_percentage_error(y_test, pred)
    accuracy = accuracy_score(y_test > pred, y_test > pred.round())
    precision = precision_score(y_test > pred, y_test > pred.round())
    confusion = confusion_matrix(y_test > pred, y_test > pred.round())
    recall = recall_score(y_test > pred, y_test > pred.round())
    f1 = f1_score(y_test > pred, y_test > pred.round())
    return rmse, mae, mape, accuracy, precision, confusion, recall, f1

# Train and evaluate LightGBM model for from this directly print accuracy 
model_lightgbm = lgb.LGBMRegressor()
metrics_lightgbm = train_and_evaluate_model(model_lightgbm, X_train, X_test, y_train, y_test)
print("LightGBM Metrics:", metrics_lightgbm)

# Train and evaluate CatBoost model
model_catboost = CatBoostRegressor(verbose=0)
metrics_catboost = train_and_evaluate_model(model_catboost, X_train, X_test, y_train, y_test)
print("CatBoost Metrics:", metrics_catboost)