## Importing Necessary Libraries

In [2]:
import pandas as pd
import numpy as np
import sqlalchemy.types
from sqlalchemy import create_engine
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score, mean_squared_error
print ("Libraries are Imported")

Libraries are Imported


In [4]:
import warnings
warnings.filterwarnings('ignore')

## Data fetching

In [6]:
my_connection = create_engine('mysql+pymysql://root:password@localhost/capstone1')

In [4]:
query = "select * from stock_cleaned"
df = pd.read_sql_query(query, my_connection)
print ("Data fetching has been sucessful")

Data fetching has been sucessful


In [5]:
df.sample(10)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Stock
107,2024-02-05,143.876271,146.503287,143.746429,144.765259,29254400,Google
176,2024-05-14,171.394963,172.583613,170.226295,171.734573,18729500,Google
45,2023-11-03,128.943258,130.581393,128.863347,130.221802,19517900,Google
472,2024-07-18,443.540556,443.849996,433.618438,439.577698,20794800,Microsft
787,2023-10-26,294.395023,294.395023,278.827009,287.758667,66684100,Meta
118,2024-02-21,142.477865,143.816339,141.748699,143.676498,16499600,Google
71,2023-12-12,133.118524,134.387069,132.679021,133.488098,26584000,Google
65,2023-12-04,131.144768,131.300582,129.252909,130.481522,24117100,Google
246,2024-08-23,166.550003,167.949997,165.660004,167.429993,14281600,Google
1157,2024-05-06,181.892483,183.737832,179.967317,181.254089,78569700,Apple


## Data splitting

In [6]:
X = df[['Open', 'High', 'Low', 'Volume']]
y = df['Close']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 1234)

In [8]:
X_train.shape

(984, 4)

In [9]:
X_test.shape

(247, 4)

### Model 1: Lasso Regression

In [10]:
lasso_reg = Lasso()

In [11]:
lasso_reg.fit(X_train, y_train)

In [12]:
lasso_pred = lasso_reg.predict(X_test)

In [13]:
lasso_r2 = r2_score(y_test, lasso_pred)
lasso_mse = mean_squared_error(y_test, lasso_pred)
print ("R-square value of Lasso Regression Model: ", lasso_r2)
print ("MSE of Lasso Regression: ", lasso_mse)
print ("RMSE of Lasso Regression: ", np.sqrt (lasso_mse))

R-square value of Lasso Regression Model:  0.999291186135348
MSE of Lasso Regression:  11.002582282012787
RMSE of Lasso Regression:  3.3170140611720034


### Model 2: Ridge Regression

In [14]:
ridge_reg = Ridge()

In [15]:
ridge_reg.fit(X_train, y_train)

In [16]:
ridge_pred = ridge_reg.predict (X_test)

In [17]:
ridge_r2 = r2_score(y_test, ridge_pred)
ridge_mse = mean_squared_error (y_test, ridge_pred)
print ("R-square value of Ridge Regression Model: ", ridge_r2)
print ("MSE of Ridge Regression: ", ridge_mse)
print ("RMSE of Ridge Regression: ", np.sqrt (ridge_mse))

R-square value of Ridge Regression Model:  0.9998021685561475
MSE of Ridge Regression:  3.0708439091075737
RMSE of Ridge Regression:  1.7523823524298496


### Model 3: KNN Model

In [18]:
knn_reg = KNeighborsRegressor(n_neighbors = 19)

In [19]:
knn_reg.fit(X_train, y_train)

In [20]:
knn_pred = knn_reg.predict(X_test)

In [21]:
knn_r2 = r2_score(y_test, knn_pred)
knn_mse = mean_squared_error(y_test, knn_pred)
print ("R-square value of KNN Regression Model: ", knn_r2)
print ("MSE of KNN Regression: ", knn_mse)
print ("RMSE of KNN Regression: ", np.sqrt (knn_mse))

R-square value of KNN Regression Model:  0.3463382254927828
MSE of KNN Regression:  10146.482479082248
RMSE of KNN Regression:  100.72974972212651


### Model 4: Random Forest Regression

In [22]:
rf_reg = RandomForestRegressor(n_estimators=300)

In [23]:
rf_reg.fit(X_train, y_train)

In [24]:
rf_pred = rf_reg.predict(X_test)

In [25]:
rf_r2 = r2_score(y_test, rf_pred)
rf_mse = mean_squared_error(y_test, rf_pred)
print ("R-square value of Random Forest Regression Model: ", rf_r2)
print ("MSE of Random Forest Regression: ", rf_mse)
print ("RMSE of Random Forest Regression: ", np.sqrt (rf_mse))

R-square value of Random Forest Regression Model:  0.9996480746044925
MSE of Random Forest Regression:  5.462771418985178
RMSE of Random Forest Regression:  2.337257242792324


### Model 5: XGBoost Regressor

In [26]:
xgb_reg = GradientBoostingRegressor(loss='squared_error', learning_rate=0.05, n_estimators=300)

In [27]:
xgb_reg.fit(X_train, y_train)

In [28]:
xgb_pred = xgb_reg.predict(X_test)

In [29]:
xgb_r2 = r2_score(y_test, xgb_pred)
xgb_mse = mean_squared_error(y_test, xgb_pred)
print ("R-square value of XGBoost Regression Model: ", xgb_r2)
print ("MSE of XGBoost Regression: ", xgb_mse)
print ("RMSE of XGBoost Regression: ", np.sqrt (xgb_mse))

R-square value of XGBoost Regression Model:  0.9996516330402984
MSE of XGBoost Regression:  5.407535503461803
RMSE of XGBoost Regression:  2.325410824663419


### Comparision

In [34]:
# Create lists of your variables
r_squared_values = [lasso_r2, ridge_r2, knn_r2, rf_r2, xgb_r2]
mse_values = [lasso_mse, ridge_mse, knn_mse, rf_mse, xgb_mse]

model_names = ['Lasso Regression', 'Ridge Regression', 'KNN Regression', 'Random Forest Regression', 'XGBoost Regression']

comparison_df = pd.DataFrame({
    'Model': model_names,
    'R² Score': r_squared_values,
    'RMSE': mse_values
})
comparison_df = comparison_df.sort_values(by=['R² Score', 'RMSE'], ascending=[False, True])

In [35]:
comparison_df

Unnamed: 0,Model,R² Score,RMSE
1,Ridge Regression,0.999802,3.070844
4,XGBoost Regression,0.999652,5.407536
3,Random Forest Regression,0.999648,5.462771
0,Lasso Regression,0.999291,11.002582
2,KNN Regression,0.346338,10146.482479


## Storing Model Performance in the Database
In this section, we will save the results of model performance metrics into the database.

In [36]:
comparison_df.to_sql(con = my_connection, name = 'model_performance', if_exists = 'append', index = False)

5

## Saving the Best-Fitted Model as an API
In this section, we will:

Export the Model: Save the best-performing model in a format suitable for deployment, such as a serialized file.
Create API Endpoint: Develop an API to serve the model, allowing for real-time predictions.

Integration: Ensure the API is integrated and accessible for making predictions based on new input data.
This process facilitates the deployment of the model for practical use and ensures it can be easily accessed for making predictions.

In [37]:
import joblib

In [38]:
joblib.dump(ridge_reg, "Ridge_Regression.joblib")

['Ridge_Regression.joblib']