# Import thư viện cần thiết

In [20]:
import pandas as pd 
import numpy as np 
import sklearn
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Load data 

In [21]:
data_train = pd.read_csv('data/processed/train_preprocessed.csv')
data_test = pd.read_csv('data/processed/test_preprocessed.csv')

x_train = data_train.drop(columns=['revenue'])
y_train = data_train['revenue']
x_test = data_test.drop(columns=['revenue'])
y_test = data_test['revenue']

# Model 1: XGBoost

In [22]:
from xgboost import XGBRegressor

xgb_model = XGBRegressor(
    n_estimators=500,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    objective="reg:squarederror",
    random_state=42
)
xgb_model.fit(x_train, y_train)

xgb_pred = xgb_model.predict(x_test)
print("XGBoost RMSE: ", np.sqrt(mean_squared_error(y_test, xgb_pred)))
print("XGBoost MAE: ", mean_absolute_error(y_test, xgb_pred))

XGBoost RMSE:  2.106807858685727
XGBoost MAE:  1.6416012267789457


# Model 2: Huber Regression

In [23]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import HuberRegressor

huber_model = Pipeline([    
    ('scaler', StandardScaler()),
    ('huber', HuberRegressor(
        epsilon=1.35,
        max_iter=1000,
        alpha=0.0001
    ))
])

huber_model.fit(x_train, y_train)
huber_pred = huber_model.predict(x_test)
print("Huber RMSE: ", np.sqrt(mean_squared_error(y_test, huber_pred)))
print("Huber MAE: ", mean_absolute_error(y_test, huber_pred))

Huber RMSE:  2.0562685517707946
Huber MAE:  1.571883988105231


# Model 3: Quantile Regression

In [24]:
from sklearn.linear_model import QuantileRegressor

quantile_model = Pipeline([    
    ('scaler', StandardScaler()),
    ('quantile', QuantileRegressor(
        quantile=0.5,
        alpha=0.0001
    ))
])

quantile_model.fit(x_train, y_train)
quantile_pred = quantile_model.predict(x_test)
print("Quantile RMSE: ", np.sqrt(mean_squared_error(y_test, quantile_pred)))
print("Quantile MAE: ", mean_absolute_error(y_test, quantile_pred))

Quantile RMSE:  2.0464184029620247
Quantile MAE:  1.545656936174374
