# **Thư viện**

In [6]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
import joblib

import pandas as pd

# **Dữ liệu**

In [7]:
df = pd.read_csv("../data/raw/EUR_VND_Exchange_Rate.csv")
df.head()

Unnamed: 0,Date,Buy,Transfer,Sell
0,2020-04-01,25297.47,25553.0,26506.55
1,2020-04-02,25297.47,25553.0,26506.55
2,2020-04-03,24742.47,24992.39,25936.23
3,2020-04-04,24742.47,24992.39,25936.23
4,2020-04-05,24742.47,24992.39,25936.23


# **Mô hình Random Forest**

In [8]:
def train_rf(X_train, y_train, model_path):
    """
    Huấn luyện Random Forest và lưu mô hình với tham số tối ưu
    """
    param_grid = {
        'n_estimators': [100, 200, 300, 400, 500],
        'max_depth': [5, 10, 20, None],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }

    model = RandomForestRegressor(random_state=42)
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1, verbose=1)
    grid_search.fit(X_train, y_train)

    best_model = grid_search.best_estimator_

    joblib.dump(best_model, model_path)

    return best_model

def build_rf_model(best_hp):
    """
    Xây dựng mô hình Random Forest từ tham số tối ưu
    """
    model = RandomForestRegressor(
        n_estimators=best_hp.n_estimators,
        max_depth=best_hp.max_depth,
        min_samples_split=best_hp.min_samples_split,
        min_samples_leaf=best_hp.min_samples_leaf,
        random_state=42
    )
    return model