In [None]:
!pip install tensorflow



In [None]:
import pandas as pd
import numpy as np
import requests
import json
import ast
import datetime
import os
import random
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Conv1D, MaxPooling1D, Flatten, Dropout, GRU, Bidirectional
from tensorflow.keras.optimizers import Adam
from collections import defaultdict, Counter
from copy import deepcopy

In [None]:
# ✅ Cố định seed toàn cục để đảm bảo tính nhất quán giữa các lần chạy

def set_seeds(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

    # Cấu hình để TensorFlow không sử dụng các thuật toán không định hướng
    os.environ['TF_DETERMINISTIC_OPS'] = '1'

set_seeds(42)


#Crawling & Preprocessing Data

In [None]:
# Cấu hình crawling dữ liệu từ CafeF
def crawl_data(symbol, page_index, start_date="01/01/2022", end_date="02/28/2025"):
    url = "https://cafef.vn/du-lieu/Ajax/PageNew/DataHistory/PriceHistory.ashx"
    params = {
        "Symbol": symbol,
        "StartDate": start_date,
        "EndDate": end_date,
        "PageIndex": page_index,
        "PageSize": 20,
    }
    try:
        response = requests.get(url, params=params, timeout=10)
        response.raise_for_status()
        return pd.DataFrame(response.json().get("Data", []))
    except Exception as e:
        print(f"Failed to fetch page {page_index} for {symbol}: {e}")
        return None

def merged_df(symbol, start_page, end_page):
    dfs = []
    for i in range(start_page, end_page):
        df = crawl_data(symbol, i)
        if df is not None:
            dfs.append(df)
    return pd.concat(dfs, ignore_index=True) if dfs else None

def parse_data(data_string):
    if isinstance(data_string, str):
        try:
            return ast.literal_eval(data_string.strip("()"))
        except:
            return {}
    return data_string if isinstance(data_string, dict) else {}

def parse_df(symbol, start_page, end_page):
    df = merged_df(symbol, start_page, end_page)
    if df is None or df.empty:
        return pd.DataFrame()

    df['Data'] = df['Data'].apply(parse_data)
    df = pd.concat([df, pd.DataFrame(df['Data'].tolist())], axis=1)

    # Tách dữ liệu thay đổi giá
    df[['Price_change', '%Price_change']] = df['ThayDoi'].apply(
        lambda val: pd.Series([val.split('(')[0], val.split('(')[1].rstrip('%)')]) if '(' in val else pd.Series([0, 0])
    )

    df.drop(columns=['Data', 'index', 'TotalCount', 'ThayDoi'], inplace=True, errors='ignore')
    df[['Price_change', '%Price_change']] = df[['Price_change', '%Price_change']].astype(float)

    # Chuyển đổi đơn vị giá từ nghìn đồng sang đồng
    cols_to_multiply = ['GiaDieuChinh', 'GiaDongCua', 'GiaMoCua', 'GiaCaoNhat', 'GiaThapNhat', 'Price_change']
    for col in cols_to_multiply:
        if col in df.columns:
            df[col] = df[col].astype(float) * 1000

    return df

def preprocess_stock_dataframe(symbol, start_page, end_page):
    df = parse_df(symbol, start_page, end_page)
    if df.empty:
        return df

    df['Ngay'] = pd.to_datetime(df['Ngay'], format="%d/%m/%Y", errors='coerce')
    df = df.dropna(subset=['Ngay'])
    df = df.sort_values('Ngay').reset_index(drop=True)
    df['Return'] = df['GiaDongCua'].pct_change()
    return df.dropna().reset_index(drop=True)

In [None]:
#Adding new features
def add_technical_features(symbol, start_page, end_page):
    df = preprocess_stock_dataframe(symbol, start_page, end_page)
    df['MA5'] = df['GiaDongCua'].rolling(window=5).mean()
    df['MA10'] = df['GiaDongCua'].rolling(window=10).mean()
    df['MA20'] = df['GiaDongCua'].rolling(window=20).mean()
    df['Volatility_5'] = df['Return'].rolling(window=5).std()
    df['OpenCloseGap'] = df['GiaDongCua'] - df['GiaMoCua']
    df['HighLowRange'] = df['GiaCaoNhat'] - df['GiaThapNhat']
    df['VolumeChange'] = df['KhoiLuongKhopLenh'].pct_change()
    df = df.dropna().reset_index(drop=True)
    return df

In [None]:
def add_lag_features(symbol, start_page, end_page, lags=[1]):
    df = add_technical_features(symbol, start_page, end_page)
    for lag in lags:
        df[f'{target_col}_lag{lag}'] = df[target_col].shift(lag)
    return df

#Training Models

In [None]:
def prepare_dl_tuning_data(symbols, start_page, end_page, target_col, feature_cols,
                           window_size=10, test_size=0.2, stride=1, scaler_type='minmax'):
    dl_data = {}

    for symbol in symbols:
        print(f"📦 Chuẩn bị dữ liệu cho: {symbol}")
        try:
            X_train, y_train, X_test, y_test, y_true_log, _, _ = create_log_return_data_scaled(
                symbol=symbol,
                start_page=start_page,
                end_page=end_page,
                target_col=target_col,
                feature_cols=feature_cols,
                window_size=window_size,
                test_size=test_size,
                stride=stride,
                scaler_type=scaler_type
            )

            dl_data[symbol] = (X_train, y_train, X_test, y_test)

        except Exception as e:
            print(f"⚠️ Không thể chuẩn bị dữ liệu cho {symbol}: {e}")

    return dl_data

In [None]:
configs = [
    {"units": 32, "dropout": 0.1, "learning_rate": 0.001, "batch_size": 32, "epochs": 50},
    {"units": 64, "dropout": 0.2, "learning_rate": 0.001, "batch_size": 64, "epochs": 100},
    {"units": 128, "dropout": 0.3, "learning_rate": 0.0005, "batch_size": 32, "epochs": 80},
]

In [None]:
GRU_BEST_CONFIG = {
    "units": 1,
    "dropout": 0.0,
    "learning_rate": 0.001,
    "batch_size": 1,
    "epochs": 1
}

LSTM_BEST_CONFIG = deepcopy(GRU_BEST_CONFIG)
CNN_BEST_CONFIG = deepcopy(GRU_BEST_CONFIG)
LSTM_CNN_HYBRID_BEST_CONFIG = deepcopy(GRU_BEST_CONFIG)
BILSTM_BEST_CONFIG = deepcopy(GRU_BEST_CONFIG)

In [None]:
def auto_tune_model_and_update_config(dl_data, configs, train_model_fn, config_var):
    """
    Tuning mô hình DL bất kỳ (LSTM, BiLSTM, CNN, Hybrid) và cập nhật config tốt nhất.
    - dl_data: dict[symbol] -> (X_train, y_train, X_test, y_test)
    - configs: danh sách dict cấu hình
    - train_model_fn: hàm train tương ứng
    - config_var: biến toàn cục để cập nhật cấu hình tốt nhất
    """
    results = []

    for cfg in configs:
        mape_list, rmse_list, r2_list = [], [], []
        print(f"\n🔧 Testing config: {cfg}")

        for symbol, (X_train, y_train, X_test, y_test) in dl_data.items():
            try:
                model, _ = train_model_fn(
                    X_train, y_train, X_test, y_test,
                    **cfg
                )
                y_pred = model.predict(X_test).flatten()
                mape = mean_absolute_percentage_error(y_test, y_pred)
                rmse = np.sqrt(mean_squared_error(y_test, y_pred))
                r2 = r2_score(y_test, y_pred)

                mape_list.append(mape)
                rmse_list.append(rmse)
                r2_list.append(r2)

            except Exception as e:
                print(f"❌ {symbol} - lỗi: {e}")

        results.append({
            **cfg,
            "MAPE_avg": np.mean(mape_list),
            "RMSE_avg": np.mean(rmse_list),
            "R2_avg": np.mean(r2_list)
        })

    df = pd.DataFrame(results)
    best = df.sort_values("MAPE_avg").iloc[0].to_dict()

    # Ép kiểu để đảm bảo đúng định dạng
    for k in best:
        if isinstance(best[k], float) and best[k].is_integer():
            best[k] = int(best[k])

    print(f"\n✅ Cập nhật config tốt nhất: {best}")
    config_var.update(best)

    return df.sort_values("MAPE_avg")

In [None]:
symbols = ["AGR", "FPT", "VNG", "HPG", "VNM", "VIC", "NVL", "SSI", "VND", "MWG", "PNJ", "TCB"]
start_page = 1
end_page = 41
target_col = 'GiaDongCua'

target_col_log = f'log_{target_col}'
target_col_lag = f'{target_col}_lag1'
target_col_log_lag = f'log_{target_col}_lag1'
evaluation_metric = "MAPE"

feature_cols = [
    'Return', 'MA5', 'MA10', 'MA20', 'Volatility_5',
    'OpenCloseGap', 'HighLowRange', 'VolumeChange',
    target_col_log_lag
]

random_state=42

In [None]:
batch_size=32
epochs=30

def train_lstm_model(X_train, y_train, X_test, y_test, units=64, epochs=epochs, batch_size=batch_size):
    model = Sequential()
    model.add(LSTM(units, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                        epochs=epochs, batch_size=batch_size, verbose=0)
    return model, history

def train_cnn_model(X_train, y_train, X_test, y_test,
                    filters=64, kernel_size=2, dropout_rate=0.2, epochs=epochs, batch_size=batch_size):
    model = Sequential()
    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu',
                     input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(dropout_rate))
    model.add(Flatten())
    model.add(Dense(50, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                        epochs=epochs, batch_size=batch_size, verbose=0)
    return model, history

def train_lstm_cnn_hybrid(X_train, y_train, X_test, y_test,
                          filters=64, kernel_size=2, dropout_rate=0.2, lstm_units=64,
                          epochs=epochs, batch_size=batch_size):
    model = Sequential()
    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu',
                     input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(dropout_rate))
    model.add(LSTM(lstm_units, return_sequences=False))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                        epochs=epochs, batch_size=batch_size, verbose=0)
    return model, history

def train_gru_model(X_train, y_train, X_test, y_test,
                    units=None, dropout=None, learning_rate=None,
                    batch_size=None, epochs=None):

    config = GRU_BEST_CONFIG.copy()

    # Ép kiểu tất cả các số nguyên
    units = int(units or config["units"])
    dropout = float(dropout or config["dropout"])
    learning_rate = float(learning_rate or config["learning_rate"])
    batch_size = int(batch_size or config["batch_size"])
    epochs = int(epochs or config["epochs"])

    model = Sequential()
    model.add(GRU(units, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(dropout))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')

    history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                        epochs=epochs, batch_size=batch_size, verbose=0)
    return model, history

def train_bilstm_model(X_train, y_train, X_test, y_test, units=64, epochs=epochs, batch_size=batch_size):
    model = Sequential()
    model.add(Bidirectional(LSTM(units), input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                        epochs=epochs, batch_size=batch_size, verbose=0)
    return model, history

In [None]:
def train_lstm_model(X_train, y_train, X_test, y_test,
                     units=None, dropout=None, learning_rate=None,
                     batch_size=None, epochs=None):

    cfg = LSTM_BEST_CONFIG.copy()
    units = int(units or cfg["units"])
    dropout = float(dropout or cfg["dropout"])
    learning_rate = float(learning_rate or cfg["learning_rate"])
    batch_size = int(batch_size or cfg["batch_size"])
    epochs = int(epochs or cfg["epochs"])

    model = Sequential()
    model.add(LSTM(units, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(dropout))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                        epochs=epochs, batch_size=batch_size, verbose=0)
    return model, history


def train_bilstm_model(X_train, y_train, X_test, y_test,
                       units=None, dropout=None, learning_rate=None,
                       batch_size=None, epochs=None):

    cfg = BILSTM_BEST_CONFIG.copy()
    units = int(units or cfg["units"])
    dropout = float(dropout or cfg["dropout"])
    learning_rate = float(learning_rate or cfg["learning_rate"])
    batch_size = int(batch_size or cfg["batch_size"])
    epochs = int(epochs or cfg["epochs"])

    model = Sequential()
    model.add(Bidirectional(LSTM(units), input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(dropout))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                        epochs=epochs, batch_size=batch_size, verbose=0)
    return model, history


def train_cnn_model(X_train, y_train, X_test, y_test,
                    filters=None, kernel_size=None, dropout=None, learning_rate=None,
                    batch_size=None, epochs=None):

    cfg = CNN_BEST_CONFIG.copy()
    filters = int(filters or cfg["filters"])
    kernel_size = int(kernel_size or cfg["kernel_size"])
    dropout = float(dropout or cfg["dropout"])
    learning_rate = float(learning_rate or cfg["learning_rate"])
    batch_size = int(batch_size or cfg["batch_size"])
    epochs = int(epochs or cfg["epochs"])

    model = Sequential()
    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu',
                     input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(dropout))
    model.add(Flatten())
    model.add(Dense(50, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                        epochs=epochs, batch_size=batch_size, verbose=0)
    return model, history


def train_lstm_cnn_hybrid(X_train, y_train, X_test, y_test,
                          filters=None, kernel_size=None, dropout=None, lstm_units=None,
                          learning_rate=None, batch_size=None, epochs=None):

    cfg = LSTM_CNN_HYBRID_BEST_CONFIG.copy()
    filters = int(filters or cfg["filters"])
    kernel_size = int(kernel_size or cfg["kernel_size"])
    dropout = float(dropout or cfg["dropout"])
    lstm_units = int(lstm_units or cfg["lstm_units"])
    learning_rate = float(learning_rate or cfg["learning_rate"])
    batch_size = int(batch_size or cfg["batch_size"])
    epochs = int(epochs or cfg["epochs"])

    model = Sequential()
    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu',
                     input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(dropout))
    model.add(LSTM(lstm_units))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                        epochs=epochs, batch_size=batch_size, verbose=0)
    return model, history

def train_gru_model(X_train, y_train, X_test, y_test,
                    units=None, dropout=None, learning_rate=None,
                    batch_size=None, epochs=None):

    config = GRU_BEST_CONFIG.copy()

    # Ép kiểu tất cả các số nguyên
    units = int(units or config["units"])
    dropout = float(dropout or config["dropout"])
    learning_rate = float(learning_rate or config["learning_rate"])
    batch_size = int(batch_size or config["batch_size"])
    epochs = int(epochs or config["epochs"])

    model = Sequential()
    model.add(GRU(units, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(dropout))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')

    history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                        epochs=epochs, batch_size=batch_size, verbose=0)
    return model, history

In [None]:
# 🔧 Khởi tạo lại các mô hình ML
ml_models = {
    'LinearRegression': LinearRegression(),
    'KNN': KNeighborsRegressor(n_neighbors=5),
    'DecisionTree': DecisionTreeRegressor(random_state=random_state),
    'RandomForest': RandomForestRegressor(n_estimators=100, random_state=random_state),
    'SVR_RBF': SVR(kernel='rbf')
}

# 🔧 Khởi tạo lại các mô hình DL
dl_models = {
    "LSTM": train_lstm_model,
    "GRU": train_gru_model,
    "BiLSTM": train_bilstm_model,
    "CNN": train_cnn_model,
    "Hybrid": train_lstm_cnn_hybrid
}

In [None]:
window_size=10
test_size=0.2
scaler = MinMaxScaler()

def prepare_log_lag_df(symbol, start_page, end_page, target_col, lags=[1]):
    """
    Tiền xử lý: tạo log target, sau đó thêm lag cho log target.
    """
    df = add_technical_features(symbol, start_page, end_page)
    target_col_log = f'log_{target_col}'
    df[target_col_log] = np.log1p(df[target_col])

    for lag in lags:
        df[f'{target_col_log}_lag{lag}'] = df[target_col_log].shift(lag)

    df = df.replace([np.inf, -np.inf], np.nan).dropna().reset_index(drop=True)
    return df, target_col_log

def create_log_return_data_scaled(symbol, start_page, end_page, target_col, feature_cols,
                                     window_size=10, test_size=0.2, stride=1, scaler_type='minmax'):
    """
    Nâng cấp hàm tạo dữ liệu sliding window cho mô hình time series.
    """

    # B1: Chuẩn bị dữ liệu đầu vào
    df, target_col_log = prepare_log_lag_df(symbol, start_page, end_page, target_col)
    all_cols = feature_cols + [target_col_log]

    # B2: Chọn loại scaler
    scaler = MinMaxScaler() if scaler_type == 'minmax' else StandardScaler()

    # B3: Chuẩn hóa
    df = df.replace([np.inf, -np.inf], np.nan).dropna(subset=all_cols)
    df_scaled = scaler.fit_transform(df[all_cols])
    df_scaled = pd.DataFrame(df_scaled, columns=all_cols)

    # B4: Chia train/test
    split_idx = int(len(df_scaled) * (1 - test_size))
    train_df = df_scaled.iloc[:split_idx].reset_index(drop=True)
    test_df = df_scaled.iloc[split_idx:].reset_index(drop=True)

    # B5: Tạo sliding windows với stride
    def sliding(df):
        X, y = [], []
        for i in range(0, len(df) - window_size, stride):
            X.append(df.iloc[i:i+window_size][feature_cols].values)
            y.append(df.iloc[i+window_size][target_col_log])
        return np.array(X), np.array(y)

    X_train, y_train = sliding(train_df)
    X_test, y_test = sliding(test_df)

    # B6: Target gốc để đánh giá
    y_true_unscaled = df.iloc[split_idx + window_size:][target_col_log].reset_index(drop=True)

    return X_train, y_train, X_test, y_test, y_true_unscaled, scaler, all_cols

In [None]:
dl_data = prepare_dl_tuning_data(
    symbols=symbols,
    start_page=start_page,
    end_page=end_page,
    target_col=target_col,
    feature_cols=feature_cols
)

# Tune LSTM
df_lstm = auto_tune_model_and_update_config(
    dl_data, configs, train_lstm_model, LSTM_BEST_CONFIG
)

# Tune BiLSTM
df_bilstm = auto_tune_model_and_update_config(
    dl_data, configs, train_bilstm_model, BILSTM_BEST_CONFIG
)

# Tune CNN (khác structure nên cần configs riêng)
cnn_configs = [
    {"filters": 32, "kernel_size": 2, "dropout": 0.1, "learning_rate": 0.001, "batch_size": 32, "epochs": 50},
    {"filters": 64, "kernel_size": 3, "dropout": 0.2, "learning_rate": 0.001, "batch_size": 64, "epochs": 100},
]
df_cnn = auto_tune_model_and_update_config(
    dl_data, cnn_configs, train_cnn_model, CNN_BEST_CONFIG
)

# Tune Hybrid (khác cấu hình nên dùng configs riêng)
hybrid_configs = [
    {"filters": 32, "kernel_size": 2, "dropout": 0.1, "lstm_units": 64, "learning_rate": 0.001, "batch_size": 32, "epochs": 50},
    {"filters": 64, "kernel_size": 3, "dropout": 0.2, "lstm_units": 128, "learning_rate": 0.001, "batch_size": 64, "epochs": 100},
]
df_lstm_cnn_hybrid = auto_tune_model_and_update_config(
    dl_data, hybrid_configs, train_lstm_cnn_hybrid, LSTM_CNN_HYBRID_BEST_CONFIG
)

📦 Chuẩn bị dữ liệu cho: AGR
📦 Chuẩn bị dữ liệu cho: FPT
📦 Chuẩn bị dữ liệu cho: VNG
📦 Chuẩn bị dữ liệu cho: HPG
📦 Chuẩn bị dữ liệu cho: VNM
📦 Chuẩn bị dữ liệu cho: VIC
📦 Chuẩn bị dữ liệu cho: NVL
📦 Chuẩn bị dữ liệu cho: SSI
📦 Chuẩn bị dữ liệu cho: VND
📦 Chuẩn bị dữ liệu cho: MWG
📦 Chuẩn bị dữ liệu cho: PNJ
📦 Chuẩn bị dữ liệu cho: TCB

🔧 Testing config: {'units': 32, 'dropout': 0.1, 'learning_rate': 0.001, 'batch_size': 32, 'epochs': 50}


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step


  super().__init__(**kwargs)


[1m1/5[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 197ms/step



[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 75ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 72ms/step

🔧 Testing config: {'units': 64, 'dropout': 0.2, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 100}


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 66ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 68ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step

🔧 Testing config: {'units': 128, 'dropout': 0.3, 'learning_rate': 0.0005, 'batch_size': 32, 'epochs': 80}


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 75ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 75ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step

✅ Cập nhật config tốt nhất: {'units': 64, 'dropout': 0.2, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 100, 'MAPE_avg': 254326907472.12317, 'RMSE_avg': 0.02428603149286876, 'R2_avg': 0.7494186881968873}

🔧 Testing config: {'units': 32, 'dropout': 0.1, 'learning_rate': 0.001, 'batch_size': 32, 'epochs': 50}


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 94ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 98ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 98ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 134ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 88ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 90ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 135ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 89ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 95ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 92ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 88ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 87ms/step

🔧 Testing config: {'units': 64, 'dropout': 0.2, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 100}


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 92ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 96ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 103ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 94ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 94ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 106ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 91ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 95ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 96ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 88ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 87ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 98ms/step

🔧 Testing config: {'units': 128, 'dropout': 0.3, 'learning_rate': 0.0005, 'batch_size': 32, 'epochs': 80}


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 139ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 98ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 104ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 101ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 98ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 99ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 98ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 96ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 143ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 99ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 96ms/step


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 146ms/step

✅ Cập nhật config tốt nhất: {'units': 128, 'dropout': 0.3, 'learning_rate': 0.0005, 'batch_size': 32, 'epochs': 80, 'MAPE_avg': 205569712223.6211, 'RMSE_avg': 0.02425015027572662, 'R2_avg': 0.7329553978393136}

🔧 Testing config: {'filters': 32, 'kernel_size': 2, 'dropout': 0.1, 'learning_rate': 0.001, 'batch_size': 32, 'epochs': 50}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step

🔧 Testing config: {'filters': 64, 'kernel_size': 3, 'dropout': 0.2, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 100}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step

✅ Cập nhật config tốt nhất: {'filters': 64, 'kernel_size': 3, 'dropout': 0.2, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 100, 'MAPE_avg': 452332241316.4138, 'RMSE_avg': 0.03939288407877926, 'R2_avg': -0.022547645665480398}

🔧 Testing config: {'filters': 32, 'kernel_size': 2, 'dropout': 0.1, 'lstm_units': 64, 'learning_rate': 0.001, 'batch_size': 32, 'epochs': 50}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 73ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 86ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step

🔧 Testing config: {'filters': 64, 'kernel_size': 3, 'dropout': 0.2, 'lstm_units': 128, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 100}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step

✅ Cập nhật config tốt nhất: {'filters': 64, 'kernel_size': 3, 'dropout': 0.2, 'lstm_units': 128, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 100, 'MAPE_avg': 364295477100.2633, 'RMSE_avg': 0.033588277228336016, 'R2_avg': 0.4592899889293087}


In [None]:
def inverse_transform_predictions_safe(
    y_pred_scaled_dict,
    X_test,
    scaler,
    target_col_log,
    all_cols,
    return_log=False
):

    results = {}
    target_index = all_cols.index(target_col_log)
    X_last = X_test[-len(next(iter(y_pred_scaled_dict.values()))):, -1, :]

    for model_name, y_pred_scaled in y_pred_scaled_dict.items():
        n_samples = len(y_pred_scaled)
        temp = np.zeros((n_samples, len(all_cols)))

        # Ghép lại matrix đầy đủ
        temp[:, target_index] = y_pred_scaled
        temp[:, :target_index] = X_last[:, :target_index]
        temp[:, target_index + 1:] = X_last[:, target_index:]

        # Inverse transform
        full_inverse = scaler.inverse_transform(temp)
        log_price_pred = full_inverse[:, target_index]

        # Convert
        results[model_name] = log_price_pred if return_log else np.expm1(log_price_pred)

    return results

In [None]:
#evaluate model
def evaluate_model(y_true, y_pred):
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return {
        'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)),
        'MAE': mean_absolute_error(y_true, y_pred),
        'R2': r2_score(y_true, y_pred),
        'MAPE': mape,
        'CVRMSE': np.sqrt(mean_squared_error(y_true, y_pred)) / np.mean(y_true) * 100
    }

#train ML models
def train_ml_models(X_train, y_train, X_test, y_test, X_test_seq, y_true_log, target_col_log, scaler, all_cols):
    ml_results = {}
    y_true_real = np.expm1(y_true_log)

    X_train_flat = X_train.reshape(X_train.shape[0], -1)
    X_test_flat = X_test.reshape(X_test.shape[0], -1)

    for name, model in ml_models.items():
        model.fit(X_train_flat, y_train)
        y_pred_scaled = model.predict(X_test_flat)

        # Inverse transform
        y_pred_real_dict = inverse_transform_predictions_safe(
            y_pred_scaled_dict={name: y_pred_scaled},  # name là tên mô hình hiện tại
            X_test=X_test_seq,
            scaler=scaler,  # bạn cần truyền scaler ở đây nữa
            target_col_log=target_col_log,
            all_cols=all_cols,
            return_log=False
        )

        # Lấy kết quả thật:
        y_pred_real = y_pred_real_dict[name]
        metrics = evaluate_model(y_true_real, y_pred_real)
        ml_results[name] = metrics

    return ml_results

#train DL models
def train_dl_models(X_train, y_train, X_test, y_test, y_true_log, target_col_log, scaler, all_cols):
    dl_results = {}
    y_true_real = np.expm1(y_true_log)

    y_pred_scaled_dict = {}
    trained_models = {}

    print("📦 Kiểm tra các mô hình DL trong dl_models:", dl_models)

    for name, train_func in dl_models.items():
        print(f"🔍 Gọi mô hình: {name}")
        result = train_func(X_train, y_train, X_test, y_test)

        if not result or not isinstance(result, tuple) or len(result) != 2:
            print(f"❌ Mô hình {name} không trả về (model, history) như mong đợi → result = {result}")
            continue

        model, _ = result

        try:
            y_pred_scaled = model.predict(X_test).flatten()
            y_pred_scaled_dict[name] = y_pred_scaled
            trained_models[name] = model
        except Exception as e:
            print(f"❌ Lỗi khi predict với {name}: {e}")
            continue

    # Inverse toàn bộ một lần
    if y_pred_scaled_dict:
        y_pred_real_dict = inverse_transform_predictions_safe(
            y_pred_scaled_dict=y_pred_scaled_dict,
            X_test=X_test,
            scaler=scaler,
            target_col_log=target_col_log,
            all_cols=all_cols,
            return_log=False
        )

        for name, y_pred_real in y_pred_real_dict.items():
            metrics = evaluate_model(y_true_real, y_pred_real)
            dl_results[name] = metrics

    return dl_results

In [None]:
def get_all_symbol_data(symbols, start_page, end_page, target_col, feature_cols,
                        window_size=10, test_size=0.2, stride=1, scaler_type='minmax',
                        evaluation_metric=evaluation_metric, top_n=10):

    all_data = {}

    for symbol in symbols:
        print(f"✅ Đang xử lý: {symbol}")
        try:
            # ✅ Tạo dữ liệu đầu vào từ pipeline mới
            X_train, y_train, X_test, y_test, y_true_log, scaler, all_cols = create_log_return_data_scaled(
                symbol=symbol,
                start_page=start_page,
                end_page=end_page,
                target_col=target_col,
                feature_cols=feature_cols,
                window_size=window_size,
                test_size=test_size,
                stride=stride,
                scaler_type=scaler_type
            )

            # ✅ Huấn luyện mô hình ML
            ml_results = train_ml_models(
                X_train, y_train, X_test, y_test,
                X_test_seq=X_test,
                y_true_log=y_true_log,
                target_col_log=f"log_{target_col}",
                scaler=scaler,
                all_cols=all_cols
            )

            # ✅ Huấn luyện mô hình DL (tối ưu hoá)
            dl_results = train_dl_models(
                X_train, y_train, X_test, y_test,
                y_true_log=y_true_log,
                target_col_log=f"log_{target_col}",
                scaler=scaler,
                all_cols=all_cols
            )

            # ✅ Gộp kết quả
            all_results = pd.DataFrame({**ml_results, **dl_results}).T

            # ✅ Sắp xếp theo metric
            if evaluation_metric == "R2":
                sorted_results = all_results.sort_values(evaluation_metric, ascending=False)
            else:
                sorted_results = all_results.sort_values(evaluation_metric, ascending=True)

            # ✅ Lưu top N kết quả cho mỗi symbol
            all_data[symbol] = sorted_results.head(top_n)

        except Exception as e:
            print(f"⚠️ Không thể xử lý {symbol}: {e}")

    return all_data

In [None]:
results = get_all_symbol_data(
    symbols,
    start_page,
    end_page,
    target_col,
    feature_cols,
    window_size,
    stride=1,
    test_size=0.2,
    scaler_type="minmax",
    evaluation_metric=evaluation_metric,
    top_n=3
)

✅ Đang xử lý: AGR
📦 Kiểm tra các mô hình DL trong dl_models: {'LSTM': <function train_lstm_model at 0x7a22aae536a0>, 'GRU': <function train_gru_model at 0x7a22aae52de0>, 'BiLSTM': <function train_bilstm_model at 0x7a22aae52b60>, 'CNN': <function train_cnn_model at 0x7a22aae52fc0>, 'Hybrid': <function train_lstm_cnn_hybrid at 0x7a22aae52ac0>}
🔍 Gọi mô hình: LSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
🔍 Gọi mô hình: GRU
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
🔍 Gọi mô hình: BiLSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 105ms/step
🔍 Gọi mô hình: CNN


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
🔍 Gọi mô hình: Hybrid
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
✅ Đang xử lý: FPT
📦 Kiểm tra các mô hình DL trong dl_models: {'LSTM': <function train_lstm_model at 0x7a22aae536a0>, 'GRU': <function train_gru_model at 0x7a22aae52de0>, 'BiLSTM': <function train_bilstm_model at 0x7a22aae52b60>, 'CNN': <function train_cnn_model at 0x7a22aae52fc0>, 'Hybrid': <function train_lstm_cnn_hybrid at 0x7a22aae52ac0>}
🔍 Gọi mô hình: LSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
🔍 Gọi mô hình: GRU
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
🔍 Gọi mô hình: BiLSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 98ms/step
🔍 Gọi mô hình: CNN


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
🔍 Gọi mô hình: Hybrid
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
✅ Đang xử lý: VNG
📦 Kiểm tra các mô hình DL trong dl_models: {'LSTM': <function train_lstm_model at 0x7a22aae536a0>, 'GRU': <function train_gru_model at 0x7a22aae52de0>, 'BiLSTM': <function train_bilstm_model at 0x7a22aae52b60>, 'CNN': <function train_cnn_model at 0x7a22aae52fc0>, 'Hybrid': <function train_lstm_cnn_hybrid at 0x7a22aae52ac0>}
🔍 Gọi mô hình: LSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
🔍 Gọi mô hình: GRU
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
🔍 Gọi mô hình: BiLSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 95ms/step
🔍 Gọi mô hình: CNN


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
🔍 Gọi mô hình: Hybrid
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
✅ Đang xử lý: HPG
📦 Kiểm tra các mô hình DL trong dl_models: {'LSTM': <function train_lstm_model at 0x7a22aae536a0>, 'GRU': <function train_gru_model at 0x7a22aae52de0>, 'BiLSTM': <function train_bilstm_model at 0x7a22aae52b60>, 'CNN': <function train_cnn_model at 0x7a22aae52fc0>, 'Hybrid': <function train_lstm_cnn_hybrid at 0x7a22aae52ac0>}
🔍 Gọi mô hình: LSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
🔍 Gọi mô hình: GRU
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 79ms/step
🔍 Gọi mô hình: BiLSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 93ms/step
🔍 Gọi mô hình: CNN


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
🔍 Gọi mô hình: Hybrid
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
✅ Đang xử lý: VNM
📦 Kiểm tra các mô hình DL trong dl_models: {'LSTM': <function train_lstm_model at 0x7a22aae536a0>, 'GRU': <function train_gru_model at 0x7a22aae52de0>, 'BiLSTM': <function train_bilstm_model at 0x7a22aae52b60>, 'CNN': <function train_cnn_model at 0x7a22aae52fc0>, 'Hybrid': <function train_lstm_cnn_hybrid at 0x7a22aae52ac0>}
🔍 Gọi mô hình: LSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
🔍 Gọi mô hình: GRU
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
🔍 Gọi mô hình: BiLSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 96ms/step
🔍 Gọi mô hình: CNN


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
🔍 Gọi mô hình: Hybrid
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
✅ Đang xử lý: VIC
📦 Kiểm tra các mô hình DL trong dl_models: {'LSTM': <function train_lstm_model at 0x7a22aae536a0>, 'GRU': <function train_gru_model at 0x7a22aae52de0>, 'BiLSTM': <function train_bilstm_model at 0x7a22aae52b60>, 'CNN': <function train_cnn_model at 0x7a22aae52fc0>, 'Hybrid': <function train_lstm_cnn_hybrid at 0x7a22aae52ac0>}
🔍 Gọi mô hình: LSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
🔍 Gọi mô hình: GRU
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
🔍 Gọi mô hình: BiLSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 99ms/step
🔍 Gọi mô hình: CNN


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
🔍 Gọi mô hình: Hybrid
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
✅ Đang xử lý: NVL
📦 Kiểm tra các mô hình DL trong dl_models: {'LSTM': <function train_lstm_model at 0x7a22aae536a0>, 'GRU': <function train_gru_model at 0x7a22aae52de0>, 'BiLSTM': <function train_bilstm_model at 0x7a22aae52b60>, 'CNN': <function train_cnn_model at 0x7a22aae52fc0>, 'Hybrid': <function train_lstm_cnn_hybrid at 0x7a22aae52ac0>}
🔍 Gọi mô hình: LSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
🔍 Gọi mô hình: GRU
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
🔍 Gọi mô hình: BiLSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 96ms/step
🔍 Gọi mô hình: CNN


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
🔍 Gọi mô hình: Hybrid
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
✅ Đang xử lý: SSI
📦 Kiểm tra các mô hình DL trong dl_models: {'LSTM': <function train_lstm_model at 0x7a22aae536a0>, 'GRU': <function train_gru_model at 0x7a22aae52de0>, 'BiLSTM': <function train_bilstm_model at 0x7a22aae52b60>, 'CNN': <function train_cnn_model at 0x7a22aae52fc0>, 'Hybrid': <function train_lstm_cnn_hybrid at 0x7a22aae52ac0>}
🔍 Gọi mô hình: LSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
🔍 Gọi mô hình: GRU
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
🔍 Gọi mô hình: BiLSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 96ms/step
🔍 Gọi mô hình: CNN


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
🔍 Gọi mô hình: Hybrid
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
✅ Đang xử lý: VND
📦 Kiểm tra các mô hình DL trong dl_models: {'LSTM': <function train_lstm_model at 0x7a22aae536a0>, 'GRU': <function train_gru_model at 0x7a22aae52de0>, 'BiLSTM': <function train_bilstm_model at 0x7a22aae52b60>, 'CNN': <function train_cnn_model at 0x7a22aae52fc0>, 'Hybrid': <function train_lstm_cnn_hybrid at 0x7a22aae52ac0>}
🔍 Gọi mô hình: LSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
🔍 Gọi mô hình: GRU
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
🔍 Gọi mô hình: BiLSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 92ms/step
🔍 Gọi mô hình: CNN


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
🔍 Gọi mô hình: Hybrid
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
✅ Đang xử lý: MWG
📦 Kiểm tra các mô hình DL trong dl_models: {'LSTM': <function train_lstm_model at 0x7a22aae536a0>, 'GRU': <function train_gru_model at 0x7a22aae52de0>, 'BiLSTM': <function train_bilstm_model at 0x7a22aae52b60>, 'CNN': <function train_cnn_model at 0x7a22aae52fc0>, 'Hybrid': <function train_lstm_cnn_hybrid at 0x7a22aae52ac0>}
🔍 Gọi mô hình: LSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
🔍 Gọi mô hình: GRU
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
🔍 Gọi mô hình: BiLSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 91ms/step
🔍 Gọi mô hình: CNN


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
🔍 Gọi mô hình: Hybrid
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
✅ Đang xử lý: PNJ
📦 Kiểm tra các mô hình DL trong dl_models: {'LSTM': <function train_lstm_model at 0x7a22aae536a0>, 'GRU': <function train_gru_model at 0x7a22aae52de0>, 'BiLSTM': <function train_bilstm_model at 0x7a22aae52b60>, 'CNN': <function train_cnn_model at 0x7a22aae52fc0>, 'Hybrid': <function train_lstm_cnn_hybrid at 0x7a22aae52ac0>}
🔍 Gọi mô hình: LSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
🔍 Gọi mô hình: GRU
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
🔍 Gọi mô hình: BiLSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 95ms/step
🔍 Gọi mô hình: CNN


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
🔍 Gọi mô hình: Hybrid
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 82ms/step
✅ Đang xử lý: TCB
📦 Kiểm tra các mô hình DL trong dl_models: {'LSTM': <function train_lstm_model at 0x7a22aae536a0>, 'GRU': <function train_gru_model at 0x7a22aae52de0>, 'BiLSTM': <function train_bilstm_model at 0x7a22aae52b60>, 'CNN': <function train_cnn_model at 0x7a22aae52fc0>, 'Hybrid': <function train_lstm_cnn_hybrid at 0x7a22aae52ac0>}
🔍 Gọi mô hình: LSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
🔍 Gọi mô hình: GRU
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
🔍 Gọi mô hình: BiLSTM


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 97ms/step
🔍 Gọi mô hình: CNN


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
🔍 Gọi mô hình: Hybrid
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step


In [None]:
for symbol in results:
    print(f"\n📊 Kết quả cho {symbol}:\n", results[symbol])


📊 Kết quả cho AGR:
                         RMSE         MAE        R2      MAPE    CVRMSE
LSTM              322.519001  223.742577  0.829971  1.279217  1.845288
LinearRegression  336.793793  240.400846  0.814587  1.383600  1.926962
Hybrid            492.255066  387.523736  0.603912  2.229041  2.816431

📊 Kết quả cho FPT:
                          RMSE          MAE        R2      MAPE    CVRMSE
LinearRegression  2209.713011  1657.114210  0.920957  1.189996  1.590938
BiLSTM            2325.804935  1785.967290  0.912434  1.281857  1.674521
LSTM              2442.152506  1871.368542  0.903454  1.343593  1.758289

📊 Kết quả cho VNG:
               RMSE         MAE        R2      MAPE    CVRMSE
LSTM    221.030600  162.763061  0.645480  1.959382  2.662861
BiLSTM  249.226804  185.143072  0.549261  2.230359  3.002554
Hybrid  254.307189  186.738305  0.530697  2.248625  3.063760

📊 Kết quả cho HPG:
                         RMSE         MAE        R2      MAPE    CVRMSE
LinearRegression  408.128

In [None]:
# 1. Gom metrics lại theo từng mô hình
model_metrics = defaultdict(lambda: defaultdict(list))
model_counter = Counter()

def summarize_model_metrics(results, evaluation_metric=evaluation_metric, sort=True, top_n=None):
    model_metrics = defaultdict(lambda: defaultdict(list))
    model_counter = Counter()

    for df in results.values():
        for model in df.index:
            model = model.strip()
            model_counter[model] += 1
            for metric in df.columns:
                model_metrics[model][metric].append(df.loc[model, metric])

    avg_metrics = {
        model: {metric: sum(values) / len(values) for metric, values in metrics.items()}
        for model, metrics in model_metrics.items()
    }

    for model in avg_metrics:
        avg_metrics[model]['Count'] = model_counter[model]

    combined_df = pd.DataFrame(avg_metrics).T
    cols = ['Count'] + [col for col in combined_df.columns if col != 'Count']
    combined_df = combined_df[cols]

    if sort:
        ascending = evaluation_metric != "R2"
        combined_df = combined_df.sort_values(evaluation_metric, ascending=ascending)

    if top_n:
        combined_df = combined_df.head(top_n)

    return combined_df

In [None]:
top_models_df = summarize_model_metrics(results, evaluation_metric="MAPE")
display(top_models_df)

Unnamed: 0,Count,RMSE,MAE,R2,MAPE,CVRMSE
LinearRegression,11.0,792.686505,557.539422,0.87051,1.29816,1.867068
LSTM,8.0,925.19056,705.429366,0.78228,1.790117,2.258789
BiLSTM,10.0,962.059336,737.308472,0.710758,1.895154,2.402233
RandomForest,3.0,1087.386258,781.189024,0.712537,1.985369,2.838524
Hybrid,3.0,397.902503,311.43235,0.658968,2.36966,3.030792
SVR_RBF,1.0,1068.108143,684.062725,0.878449,2.389112,3.898901


In [None]:
industry_groups = {
    "Ngân hàng": ["TCB", "AGR"],
    "Công nghệ": ["FPT", "VNZ"],
    "Sản xuất": ["HPG", "VNM"],
    "Bất động sản": ["VIC", "NVL"],
    "Chứng khoán": ["SSI", "VND"],
    "Tiêu dùng/bán lẻ": ["MWG", "PNJ"]
}

In [None]:
def get_best_models_by_industry(results, industry_groups, metric="MAPE"):
    industry_best_models = {}

    for sector, symbols in industry_groups.items():
        all_sector_results = []

        for symbol in symbols:
            if symbol in results:
                all_sector_results.append(results[symbol])

        if not all_sector_results:
            continue

        # Gộp kết quả theo mô hình
        model_metrics = defaultdict(list)
        for df in all_sector_results:
            for model in df.index:
                model_metrics[model].append(df.loc[model, metric])

        # Tính trung bình metric mỗi mô hình trong nhóm ngành
        avg_metrics = {model: sum(vals)/len(vals) for model, vals in model_metrics.items()}

        # Chọn mô hình tốt nhất
        best_model = min(avg_metrics, key=avg_metrics.get) if metric != "R2" else max(avg_metrics, key=avg_metrics.get)
        industry_best_models[sector] = {
            "best_model": best_model,
            "avg_metric": avg_metrics[best_model],
            "metric_used": metric
        }

    return industry_best_models

# 📊 Chạy và in kết quả
industry_results = get_best_models_by_industry(results, industry_groups, metric="MAPE")

for sector, info in industry_results.items():
    print(f"🏭 {sector} → ✅ Mô hình tốt nhất: {info['best_model']} (MAPE trung bình: {info['avg_metric']:.4f})")

🏭 Ngân hàng → ✅ Mô hình tốt nhất: LinearRegression (MAPE trung bình: 1.3226)
🏭 Công nghệ → ✅ Mô hình tốt nhất: LinearRegression (MAPE trung bình: 1.1900)
🏭 Sản xuất → ✅ Mô hình tốt nhất: LinearRegression (MAPE trung bình: 1.0209)
🏭 Bất động sản → ✅ Mô hình tốt nhất: LinearRegression (MAPE trung bình: 1.3398)
🏭 Chứng khoán → ✅ Mô hình tốt nhất: LinearRegression (MAPE trung bình: 1.7455)
🏭 Tiêu dùng/bán lẻ → ✅ Mô hình tốt nhất: LinearRegression (MAPE trung bình: 1.1161)
