In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df=pd.read_csv(r'C:\Users\KISHORE\Desktop\FAANG\facebook.csv')

In [3]:
X = df[['Open', 'High', 'Low', 'Volume']]
y = df['Close']

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

#model Instances
lr = LinearRegression()
dt = DecisionTreeRegressor()
rf = RandomForestRegressor(n_estimators=100)
xgb = XGBRegressor(n_estimators=100)
lgbm = lgb.LGBMRegressor(n_estimators=100)

#Training the models
lr.fit(X_train_scaled, y_train)
dt.fit(X_train, y_train)
rf.fit(X_train, y_train)
xgb.fit(X_train, y_train)
lgbm.fit(X_train, y_train)

#predictions
y_pred_lr = lr.predict(X_test_scaled)
y_pred_dt = dt.predict(X_test)
y_pred_rf = rf.predict(X_test)
y_pred_xgb = xgb.predict(X_test)
y_pred_lgbm = lgbm.predict(X_test)

#Evaluations
def evaluate_model(y_test, y_pred):
    mae = mean_absolute_error(y_test, y_pred)
    mse=mean_squared_error(y_test, y_pred)
    rmse=np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    return mae, rmse, r2

lr_metrics = evaluate_model(y_test, y_pred_lr)
dt_metrics = evaluate_model(y_test, y_pred_dt)
rf_metrics = evaluate_model(y_test, y_pred_rf)
xgb_metrics = evaluate_model(y_test, y_pred_xgb)
lgbm_metrics = evaluate_model(y_test, y_pred_lgbm)

#Results
print(f"Linear Regression: MAE={lr_metrics[0]}, RMSE={lr_metrics[1]}, R²={lr_metrics[2]}")
print(f"Decision Tree: MAE={dt_metrics[0]}, RMSE={dt_metrics[1]}, R²={dt_metrics[2]}")
print(f"Random Forest: MAE={rf_metrics[0]}, RMSE={rf_metrics[1]}, R²={rf_metrics[2]}")
print(f"XGBoost: MAE={xgb_metrics[0]}, RMSE={xgb_metrics[1]}, R²={xgb_metrics[2]}")
print(f"LightGBM: MAE={lgbm_metrics[0]}, RMSE={lgbm_metrics[1]}, R²={lgbm_metrics[2]}")


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000138 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1020
[LightGBM] [Info] Number of data points in the train set: 2500, number of used features: 4
[LightGBM] [Info] Start training from score 147.701320
Linear Regression: MAE=1.2931352243033216, RMSE=1.7517237590262722, R²=0.9998016055769003
Decision Tree: MAE=23.470130044951986, RMSE=40.44687889130271, R²=0.8942286010892512
Random Forest: MAE=23.11584274769049, RMSE=40.333168192180096, R²=0.8948224878593231
XGBoost: MAE=23.876050805189717, RMSE=41.37339251709561, R²=0.8893273049295837
LightGBM: MAE=24.923070089031, RMSE=43.44089722283001, R²=0.877989897469094


In [6]:
import mlflow
import mlflow.sklearn
mlflow.set_tracking_uri("file:///C:/Users/KISHORE/Desktop/FAANG/mlruns")

mlflow.set_experiment("FACEBOOK - STOCK PREDICTION")
with mlflow.start_run():
    mlflow.log_metric("lr_mae", lr_metrics[0])
    mlflow.log_metric("lr_rmse", lr_metrics[1])
    mlflow.log_metric("lr_r2", lr_metrics[2])

    mlflow.log_metric("dt_mae", dt_metrics[0])
    mlflow.log_metric("dt_rmse", dt_metrics[1])
    mlflow.log_metric("dt_r2", dt_metrics[2])

    mlflow.log_metric("rf_mae", rf_metrics[0])
    mlflow.log_metric("rf_rmse", rf_metrics[1])
    mlflow.log_metric("rf_r2", rf_metrics[2])

    mlflow.log_metric("xgb_mae", xgb_metrics[0])
    mlflow.log_metric("xgb_rmse", xgb_metrics[1])
    mlflow.log_metric("xgb_r2", xgb_metrics[2])

    mlflow.log_metric("lgbm_mae", lgbm_metrics[0])
    mlflow.log_metric("lgbm_rmse", lgbm_metrics[1])
    mlflow.log_metric("lgbm_r2", lgbm_metrics[2])

    # Log models
    mlflow.sklearn.log_model(lr, "Linear_Regression_Model")
    mlflow.sklearn.log_model(dt, "Decision_Tree_Model")
    mlflow.sklearn.log_model(rf, "Random_Forest_Model")
    mlflow.sklearn.log_model(xgb, "XGBoost_Model")
    mlflow.sklearn.log_model(lgbm, "LightGBM_Model")

2025/01/30 01:03:10 INFO mlflow.tracking.fluent: Experiment with name 'FACEBOOK - STOCK PREDICTION' does not exist. Creating a new experiment.
The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh(<full-path-to-git-executable>)

All git commands will error until this is rectified.

This initial message can be silenced or aggravated in the future by setting the
$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - quiet|q|silence|s|silent|none|n|0: for no message or exception
    - error|e|exception|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet



In [7]:
import pickle

best_model = lr
with open("facebook_best_model.pkl", "wb") as file:
    pickle.dump(best_model, file)

print("Best model saved as 'facebook_best_model.pkl'.")

Best model saved as 'facebook_best_model.pkl'.
