# Sales Forecasting

## BY: Tahsin Jahin Khalid

## Part III: Model Building

#### Data Loading

In [18]:
import pandas as pd
import numpy as np

In [19]:
from sklearn.model_selection import train_test_split

In [20]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [21]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [22]:
x_path = "/content/drive/MyDrive/sales_forecasting_train_X.pkl"
y_path = "/content/drive/MyDrive/sales_forecasting_train_y.pkl"

In [23]:
X_data = pd.read_pickle(x_path)
y_data = pd.read_pickle(y_path)

#### Data Partitioning

In [24]:
X_train, X_test, y_train, y_test = train_test_split(
    X_data, y_data,
    test_size=0.20, random_state=3007
)

In [25]:
print(f"""
Train Data size: {X_train.shape}
Validation Data size: {X_test.shape}
Train Target size: {y_train.shape}
Validation Target size: {y_test.shape}
""")


Train Data size: (270776, 36)  
Validation Data size: (67695, 36)  
Train Target size: (270776,) 
Validation Target size: (67695,)    



#### Model Building

#### 1. Linear Regression

In [26]:
linreg = LinearRegression()

In [27]:
linreg.fit(X_train, y_train)

In [28]:
y_pred_linreg = linreg.predict(X_test)

In [29]:
rmse_linreg = mean_squared_error(y_test, y_pred_linreg, squared=False)
print(f"RMSE of Linear Regression Model: {np.round(rmse_linreg, 3)}")

RMSE of Linear Regression Model: 1.238


#### 2. Lasso Regression

In [30]:
lasso = Lasso(alpha=0.05)
lasso.fit(X_train, y_train)
y_pred_lasso = lasso.predict(X_test)
rmse_lasso = mean_squared_error(y_test, y_pred_lasso, squared=False)
print(f"RMSE of Lasso Regression Model: {np.round(rmse_lasso, 3)}")

RMSE of Lasso Regression Model: 1.218


#### 3. Ridge Regression

In [31]:
ridge = Ridge(alpha=0.05)
ridge.fit(X_train, y_train)
y_pred_ridge = ridge.predict(X_test)
rmse_ridge = mean_squared_error(y_test, y_pred_ridge, squared=False)
print(f"RMSE of Ridge Regression Model: {np.round(rmse_ridge, 3)}")

RMSE of Ridge Regression Model: 1.238


#### 4. Random Forest Regressor

In [17]:
rf_reg = RandomForestRegressor(
    n_estimators = 100,
    random_state = 3007
)
rf_reg.fit(X_train, y_train)
y_pred_rf = rf_reg.predict(X_test)
rmse_rf = mean_squared_error(y_test, y_pred_rf, squared=False)
print(f"RMSE of Random Forest Model: {np.round(rmse_rf, 3)}")

RMSE of Random Forest Model: 1.157


#### 5. XGBoost Regressor

In [32]:
!pip install xgboost --quiet

In [34]:
from xgboost import XGBRegressor

In [36]:
xgb_reg = XGBRegressor(
    objective ='reg:squarederror',
    colsample_bytree = 0.80,
    learning_rate = 0.1,
    max_depth = 8,
    alpha = 10,
    # enable_categorical = True,
    n_estimators = 100)

In [37]:
xgb_reg.fit(X_train, y_train)
y_pred_xgb = xgb_reg.predict(X_test)
rmse_xgb = mean_squared_error(y_test, y_pred_xgb, squared=False)
print(f"RMSE of XGBoost Regressor: {np.round(rmse_xgb, 3)}")

RMSE of XGBoost Regressor: 0.958


#### Model Packaging

In [38]:
import pickle

In [39]:
with open("/content/drive/MyDrive/Sales_Forecasting_model_XGB.pkl", "wb") as package:
    pickle.dump(xgb_reg, package)