In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import optuna
from tqdm import tqdm
import gc

# Set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def clear_memory():
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

In [2]:
# =============================================================================
# MODEL AND TRAINING FUNCTIONS
# =============================================================================

class LSTMModel(nn.Module):
    """
    LSTM Model for time-series forecasting.
    
    Attributes:
        lstm (nn.LSTM): LSTM layer.
        fc (nn.Linear): Fully connected output layer.
    """
    def __init__(self, input_size, hidden_size=128, num_layers=2, dropout=0.2):
        """
        Initialize the LSTM model.
        
        Args:
            input_size (int): Number of input features.
            hidden_size (int): Number of hidden units.
            num_layers (int): Number of LSTM layers.
            dropout (float): Dropout rate.
        """
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, 
                            batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        """
        Forward pass through the model.
        
        Args:
            x (torch.Tensor): Input tensor.
        
        Returns:
            torch.Tensor: Output tensor from the last time step.
        """
        lstm_out, _ = self.lstm(x)
        return self.fc(lstm_out[:, -1, :])  # Use output from the last time step


def create_sequences(data, dates, seq_len=30):
    """
    Create sequences for LSTM input.
    
    Args:
        data (np.array): Scaled data as a NumPy array.
        dates (array-like): Corresponding dates.
        seq_len (int): Sequence length.
    
    Returns:
        tuple: Arrays for features (X), targets (y), and corresponding dates.
    """
    X, y, y_dates = [], [], []
    for i in range(len(data) - seq_len):
        X.append(data[i:i+seq_len])
        y.append(data[i+seq_len, 0])
        y_dates.append(dates[i+seq_len])
    return np.array(X), np.array(y), np.array(y_dates)


def inverse_scale_predictions(predictions, scaler):
    """
    Inverse scale predictions using the fitted scaler.
    
    Args:
        predictions (np.array): Scaled predictions.
        scaler (MinMaxScaler): Fitted scaler.
    
    Returns:
        np.array: Inverse-transformed predictions.
    """
    num_features = scaler.min_.shape[0]
    dummy = np.zeros((predictions.shape[0], num_features))
    dummy[:, 0] = predictions.flatten()
    return scaler.inverse_transform(dummy)[:, 0]


def train_model(params, X_train_tensor, y_train_tensor, epochs=50, early_stop_patience=10, trial=None):
    """
    Train the LSTM model on the training data with early stopping.
    
    Args:
        params (dict): Hyperparameters for training.
        X_train_tensor (torch.Tensor): Training features.
        y_train_tensor (torch.Tensor): Training targets.
        epochs (int): Maximum number of training epochs.
        early_stop_patience (int): Number of epochs with no improvement to trigger early stopping.
        trial (optuna.trial.Trial, optional): If provided, report intermediate results for pruning.
    
    Returns:
        tuple: (model, train_loss_list)
               model: The trained LSTM model (with the best observed weights).
               train_loss_list: List of training losses per epoch.
    """
    model = LSTMModel(
        input_size=X_train_tensor.shape[2],
        hidden_size=params['hidden_size'],
        num_layers=params['num_layers'],
        dropout=params['dropout']
    ).to(device)
    
    optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'], weight_decay=1e-5)
    criterion = nn.MSELoss()
    
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=params['batch_size'], shuffle=True)
    
    train_loss_list = []
    best_loss = np.inf
    patience_counter = 0
    best_model_state = None
    
    for epoch in range(epochs):
        model.train()
        epoch_train_loss = 0
        for batch_X, batch_y in tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{epochs}", leave=False):
            optimizer.zero_grad()
            loss = criterion(model(batch_X), batch_y)
            loss.backward()
            optimizer.step()
            epoch_train_loss += loss.item()
        avg_train_loss = epoch_train_loss / len(train_loader)
        train_loss_list.append(avg_train_loss)
        
        # Report intermediate loss to Optuna (if trial is provided)
        if trial is not None:
            trial.report(avg_train_loss, epoch)
            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()
        
        # Early stopping check
        if avg_train_loss < best_loss:
            best_loss = avg_train_loss
            patience_counter = 0
            best_model_state = model.state_dict()
        else:
            patience_counter += 1
        
        if patience_counter >= early_stop_patience:
            print(f"Early stopping triggered at epoch {epoch+1}")
            break
    
    # Load best model state if early stopping occurred
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    
    return model, train_loss_list


def evaluate_model(model, X_eval_tensor, y_eval_tensor, batch_size):
    """
    Evaluate the LSTM model on an evaluation (validation or test) set.
    
    Args:
        model (nn.Module): Trained LSTM model.
        X_eval_tensor (torch.Tensor): Evaluation features.
        y_eval_tensor (torch.Tensor): Evaluation targets.
        batch_size (int): Batch size for evaluation.
    
    Returns:
        tuple: (avg_eval_loss, y_pred_tensor)
               avg_eval_loss: Average loss over the evaluation set.
               y_pred_tensor: Model predictions on the evaluation set.
    """
    criterion = nn.MSELoss()
    eval_dataset = TensorDataset(X_eval_tensor, y_eval_tensor)
    eval_loader = DataLoader(eval_dataset, batch_size=batch_size, shuffle=False)
    
    model.eval()
    eval_loss = 0
    with torch.no_grad():
        for batch_X, batch_y in eval_loader:
            eval_loss += criterion(model(batch_X), batch_y).item()
    avg_eval_loss = eval_loss / len(eval_loader)
    
    # Obtain predictions on the evaluation set
    y_pred_list = []
    with torch.no_grad():
        for batch_X, _ in eval_loader:
            y_pred_list.append(model(batch_X))
    y_pred_tensor = torch.cat(y_pred_list, dim=0).cpu().numpy()
    
    return avg_eval_loss, y_pred_tensor


def objective(trial, X_train_tensor, y_train_tensor):
    """
    Standard Optuna objective function for hyperparameter tuning.
    This function only trains the model on the training set (with early stopping) and returns the final training loss.
    
    Args:
        trial (optuna.trial.Trial): Optuna trial object.
        X_train_tensor (torch.Tensor): Training features.
        y_train_tensor (torch.Tensor): Training targets.
    
    Returns:
        float: Final training loss (last epoch) for the given hyperparameters.
    """
    clear_memory()

    params = {
        'hidden_size': trial.suggest_categorical('hidden_size', [64, 128, 256]),
        'num_layers': trial.suggest_int('num_layers', 1, 3),
        'dropout': trial.suggest_float('dropout', 0.1, 0.5),
        'learning_rate': trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True),
        'batch_size': trial.suggest_categorical('batch_size', [32, 64])
    }
    
    # Train only on the training set with early stopping and report intermediate results for pruning
    _, train_loss_list = train_model(params, X_train_tensor, y_train_tensor, epochs=20, early_stop_patience=10, trial=trial)
    trial.set_user_attr("train_loss_list", train_loss_list)
    
    clear_memory()
    
    # Return the final training loss as the objective
    return train_loss_list[-1]


def lstm_model_pipeline(data, seq_len=30, tuning=True, best_params=None, early_stop_patience=10):
    """
    Pipeline for training, tuning, evaluation, and prediction using the LSTM model.
    After tuning, the pipeline prints the best hyperparameters, plots:
      - Training loss curve
      - Predicted vs. actual VN-INDEX
      - Residual histogram
    and computes evaluation metrics (RMSE, MAE, R2, and Directional Accuracy).
    
    Args:
        data (pd.DataFrame or pd.Series): Original dataset.
        seq_len (int): Length of input sequences.
        tuning (bool): Whether to perform hyperparameter tuning.
        best_params (dict): Default best parameters if tuning is not performed.
        early_stop_patience (int): Number of epochs with no improvement to trigger early stopping.
    
    Returns:
        tuple: (best_params, train_loss_list, val_loss, y_pred, scaler, y_dates_test)
               which can be used for further evaluation.
    """
    if best_params is None:
        best_params = {'hidden_size': 128, 'num_layers': 2, 'dropout': 0.3, 
                       'learning_rate': 1e-4, 'batch_size': 32}
    
    # Convert Series to DataFrame if needed
    if isinstance(data, pd.Series):
        data = data.to_frame()
    
    # Normalize data: fit scaler on first 90% of data, then transform all
    train_size = int(0.9 * len(data))
    scaler = MinMaxScaler()
    scaler.fit(data.iloc[:train_size])
    data_scaled = scaler.transform(data)
    
    # Create sequences and split into train/test based on original indices
    X, y, y_dates = create_sequences(data_scaled, data.index, seq_len)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    y_dates_train, y_dates_test = y_dates[:train_size], y_dates[train_size:]
    
    # Convert to PyTorch tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1).to(device)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1).to(device)
    
    if tuning:
        print("Starting hyperparameter tuning with Optuna...")
        # Use a pruner to cut unpromising trials early
        pruner = optuna.pruners.MedianPruner(n_warmup_steps=5)
        study = optuna.create_study(direction='minimize', pruner=pruner)
        # Reduce number of trials to ease computational load (e.g., n_trials=10)
        study.optimize(
            lambda trial: objective(trial, X_train_tensor, y_train_tensor),
            n_trials=10
        )
        best_trial = study.best_trial
        best_params = best_trial.params
        train_loss_list = best_trial.user_attrs["train_loss_list"]
        print("Best Hyperparameters:", best_params)
        print("Final Training Loss during tuning:", train_loss_list[-1])
        
        # After tuning, retrain on the training set using the best hyperparameters
        model, train_loss_list = train_model(best_params, X_train_tensor, y_train_tensor, epochs=20, early_stop_patience=early_stop_patience)
        # Evaluate on the test set
        val_loss, y_pred_tensor = evaluate_model(model, X_test_tensor, y_test_tensor, best_params['batch_size'])
        y_pred = inverse_scale_predictions(y_pred_tensor, scaler)
        print("Final Evaluation Loss on Test Set:", val_loss)
    else:
        # Train using the provided hyperparameters
        model, train_loss_list = train_model(best_params, X_train_tensor, y_train_tensor, epochs=50, early_stop_patience=early_stop_patience)
        print("Using provided Hyperparameters:", best_params)
        print("Final Training Loss:", train_loss_list[-1])
        # Evaluate on the test set
        val_loss, y_pred_tensor = evaluate_model(model, X_test_tensor, y_test_tensor, best_params['batch_size'])
        y_pred = inverse_scale_predictions(y_pred_tensor, scaler)
        print("Final Evaluation Loss on Test Set:", val_loss)

    # --------------------
    # Compute Evaluation Metrics
    # --------------------
    y_true = inverse_scale_predictions(y_test_tensor.cpu().numpy(), scaler)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    # Compute directional accuracy: percentage of times the sign of the change is predicted correctly.
    if len(y_true) > 1:
        directional_accuracy = np.mean(np.sign(np.diff(y_true)) == np.sign(np.diff(y_pred)))
    else:
        directional_accuracy = np.nan
        
    print("RMSE:", rmse)
    print("MAE:", mae)
    print("R2 Score:", r2)
    print("Directional Accuracy:", directional_accuracy)
    # --------------------
    # Plot Predicted vs. Actual VN-INDEX
    # --------------------
    y_true = inverse_scale_predictions(y_test_tensor.cpu().numpy(), scaler)
    plt.figure(figsize=(12, 6))
    plt.plot(y_dates_test, y_true, label="Actual VN-INDEX", marker='o', color="blue")
    plt.plot(y_dates_test, y_pred, label="Predicted VN-INDEX", marker='s', linestyle="dashed", color="red")
    plt.xlabel("Date")
    plt.ylabel("VN-INDEX")
    plt.title("Predicted vs. Actual VN-INDEX")
    plt.xticks(rotation=45)
    plt.legend()
    plt.grid(True, linestyle="--", alpha=0.7)
    plt.show()
    
    # --------------------
    # Plot Residual Histogram
    # --------------------
    residuals = y_true - y_pred
    plt.figure(figsize=(10,5))
    plt.hist(residuals, bins=30, edgecolor='k', alpha=0.7)
    plt.title("Residual Histogram")
    plt.xlabel("Residual")
    plt.ylabel("Frequency")
    plt.grid(True, linestyle="--", alpha=0.7)
    plt.show()
    
    return model, X_test_tensor, scaler, y_pred


def future_prediction(X_test, y_pred, data, scaler, model, num_days=30):
    """
    Generate future predictions based on the last available test sequence.
    
    Args:
        X_test (torch.Tensor): Test set features tensor.
        y_pred (array): Predicted VN-INDEX values on the test set.
        data (pd.DataFrame): Original dataset.
        scaler (MinMaxScaler): Fitted scaler.
        model (nn.Module): Trained LSTM model.
        num_days (int): Number of future days to predict.
    """
    if isinstance(data, pd.Series):
        data = data.to_frame()
    
    model.eval()
    input_seq = X_test[-1].cpu().numpy()
    future_preds = []
    for _ in range(num_days):
        input_tensor = torch.tensor(input_seq, dtype=torch.float32).unsqueeze(0).to(device)
        with torch.no_grad():
            pred = model(input_tensor).cpu().numpy()[0, 0]
        future_preds.append(pred)
        input_seq = np.roll(input_seq, -1, axis=0)
        input_seq[-1, 0] = pred  # update the VN-INDEX feature

    future_preds = inverse_scale_predictions(np.array(future_preds).reshape(-1,1), scaler)
    
    # Generate future trading dates (skipping weekends)
    last_date = data.index[-1]
    future_dates = []
    while len(future_dates) < num_days:
        last_date += pd.Timedelta(days=1)
        if last_date.weekday() < 5:
            future_dates.append(last_date)
    
    plt.figure(figsize=(12, 6))
    plt.plot(future_dates, future_preds, marker='o', linestyle="dashed", color="red", label="Predicted VN-INDEX")
    plt.xlabel("Date")
    plt.ylabel("VN-INDEX")
    plt.xticks(rotation=45)
    plt.legend()
    plt.grid(True, linestyle="--", alpha=0.7)
    plt.title(f"Predicted VN-INDEX for Next {num_days} Trading Days")
    plt.show()
    
    historical_dates = data.index[-100:]
    historical_values = data.iloc[-100:, 0].values
    plt.figure(figsize=(12, 6))
    plt.plot(historical_dates, historical_values, label="Historical VN-INDEX", color="blue")
    plt.plot(historical_dates, y_pred[-100:], label="Test Predictions", color="red")
    plt.plot(future_dates, future_preds, color="green", label="Future Predicted VN-INDEX")
    plt.xlabel("Date")
    plt.ylabel("VN-INDEX")
    plt.xticks(rotation=45)
    plt.legend()
    plt.grid(True, linestyle="--", alpha=0.7)
    plt.title("Historical VN-INDEX with Future Predictions")
    plt.show()
    
    future_df = pd.DataFrame({"Date": future_dates, "Predicted VN-INDEX": future_preds})
    print("Future Predictions:")
    print(future_df)


In [3]:
# =============================================================================
# FEATURE ENGINEERING FUNCTIONS
# =============================================================================

def compute_RSI(series, window=14):
    """
    Compute the Relative Strength Index (RSI) for a time-series.
    
    Args:
        series (pd.Series): Series of prices.
        window (int): Window size.
    
    Returns:
        pd.Series: RSI values.
    """
    delta = series.diff(1)
    gain = delta.where(delta > 0, 0).rolling(window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window).mean()
    RS = gain / loss
    return 100 - (100 / (1 + RS))


def lag_features_indicators(df, numerical_columns):
    """
    Generate lag features, moving averages, RSI, MACD, volatility, seasonality,
    and interaction features.
    
    Args:
        df (pd.DataFrame): Input data.
        numerical_columns (list): List of numerical column names.
    
    Returns:
        pd.DataFrame: DataFrame with additional features.
    """
    copy_df = df.copy()
    # Lagged Features
    lag_days = [1, 2, 3, 5, 10]
    for col in numerical_columns:
        for lag in lag_days:
            copy_df[f'{col}_Lag{lag}'] = copy_df[col].shift(lag)
    
    # Moving Averages and Exponential Moving Averages
    for col in numerical_columns:
        copy_df[f'{col}_SMA_10'] = copy_df[col].rolling(window=10).mean()
        copy_df[f'{col}_SMA_20'] = copy_df[col].rolling(window=20).mean()
        copy_df[f'{col}_EMA_10'] = copy_df[col].ewm(span=10, adjust=False).mean()
        copy_df[f'{col}_EMA_20'] = copy_df[col].ewm(span=20, adjust=False).mean()
    
    # Relative Strength Index (RSI)
    for col in numerical_columns:
        copy_df[f'{col}_RSI_14'] = compute_RSI(copy_df[col])
    
    # Moving Average Convergence Divergence (MACD)
    for col in numerical_columns:
        copy_df[f'{col}_EMA_12'] = copy_df[col].ewm(span=12, adjust=False).mean()
        copy_df[f'{col}_EMA_26'] = copy_df[col].ewm(span=26, adjust=False).mean()
        copy_df[f'{col}_MACD'] = copy_df[f'{col}_EMA_12'] - copy_df[f'{col}_EMA_26']
    
    # Additional Feature: Rolling Standard Deviation for Volatility
    for col in numerical_columns:
        copy_df[f'{col}_RollingStd_10'] = copy_df[col].rolling(window=10).std()
    
    # Seasonality Features: Day of Week and Month
    copy_df['DayOfWeek'] = copy_df.index.dayofweek
    copy_df['Month'] = copy_df.index.month
    
    # Interaction Feature: Ratio of EMA_10 to EMA_20
    for col in numerical_columns:
        copy_df[f'{col}_EMA_Ratio'] = copy_df[f'{col}_EMA_10'] / copy_df[f'{col}_EMA_20']
    
    # Drop NA values caused by shifting and rolling
    copy_df.dropna(inplace=True)
    
    return copy_df


def quicky_data(df):
    """
    Preprocess the data by converting the 'Date' column to datetime,
    setting it as index, and dropping unnecessary columns.
    
    Args:
        df (pd.DataFrame): Raw data.
    
    Returns:
        pd.DataFrame: Preprocessed data.
    """
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
    
    if 'Index' in df.columns:
        df.drop(columns=['Index'], inplace=True)
    
    return df

def select_features_by_correlation(df, target_col="VN_Index_Close", train_ratio=0.9, corr_threshold=0.05):
    """
    Splits the DataFrame by time (first train_ratio% of rows is 'training'),
    calculates correlation of each feature with the target on TRAIN rows only,
    and returns the subset of columns (target + selected features).
    
    Args:
        df (pd.DataFrame): Full dataset (includes the target column).
        target_col (str): Target column name, default = "VN_Index_Close".
        train_ratio (float): Proportion of data used for 'training'.
        corr_threshold (float): Minimum absolute correlation needed to keep a feature.
    
    Returns:
        pd.DataFrame: A filtered DataFrame with only 'target_col' + selected features.
    """
    # Sort by index if needed (assuming your index is Date or similar)
    df = df.sort_index()
    n_train = int(len(df) * train_ratio)
    
    # TRAIN portion (first 90% by default)
    df_train = df.iloc[:n_train]
    
    # Identify all potential features (exclude the target itself)
    all_features = [col for col in df.columns if col != target_col]
    
    # Calculate absolute correlation with the target on the training portion only
    corr_series = df_train[all_features].corrwith(df_train[target_col]).abs()
    
    # Filter by threshold
    selected_features = corr_series[corr_series >= corr_threshold].index.tolist()
    
    print(f"Features with abs(corr) >= {corr_threshold}:")
    print(selected_features)
    
    # Return only target + selected features
    return df[[target_col] + selected_features]

In [4]:
# 📂 Load dataset
file_path_1 = "../ready_data/cleaned_hose_historical_data.csv"
df_1 = pd.read_csv(file_path_1)
df_1 = quicky_data(df_1)

In [5]:
# 📊 Select only VN-INDEX for prediction
data = df_1["VN_Index_Close"]

# 🚀 Train the model and get the test set
model, X_test_tensor, scaler, y_pred = lstm_model_pipeline(data)

# 🔮 Generate future predictions
future_prediction(X_test_tensor, y_pred, data, scaler, model, num_days=30)

[I 2025-03-27 13:47:58,800] A new study created in memory with name: no-name-7fcba30c-e329-4a39-b429-991c7659d428


Starting hyperparameter tuning with Optuna...


[I 2025-03-27 13:49:16,597] Trial 0 finished with value: 0.0006023622341899681 and parameters: {'hidden_size': 128, 'num_layers': 3, 'dropout': 0.4789544148764239, 'learning_rate': 0.00035551470653832874, 'batch_size': 32}. Best is trial 0 with value: 0.0006023622341899681.
[I 2025-03-27 13:49:38,021] Trial 1 finished with value: 0.00012096099342784328 and parameters: {'hidden_size': 128, 'num_layers': 1, 'dropout': 0.23686695254840717, 'learning_rate': 0.0021636018321328264, 'batch_size': 32}. Best is trial 1 with value: 0.00012096099342784328.
[I 2025-03-27 13:50:09,279] Trial 2 finished with value: 0.00037826484094694024 and parameters: {'hidden_size': 64, 'num_layers': 3, 'dropout': 0.13098637177270886, 'learning_rate': 0.0072071827349487845, 'batch_size': 64}. Best is trial 1 with value: 0.00012096099342784328.
[I 2025-03-27 13:52:32,995] Trial 3 finished with value: 0.00043846268101788155 and parameters: {'hidden_size': 256, 'num_layers': 3, 'dropout': 0.4040687561350398, 'learni

Best Hyperparameters: {'hidden_size': 128, 'num_layers': 1, 'dropout': 0.23686695254840717, 'learning_rate': 0.0021636018321328264, 'batch_size': 32}
Final Training Loss during tuning: 0.00012096099342784328


Training Epoch 18/20:  27%|██▋       | 34/128 [00:00<00:00, 105.35it/s]

In [None]:
# 📊 Select additional VN-INDEX statistics for prediction
data = df_1

# 🚀 Train the model and get the test set
model, X_test_tensor, scaler, y_pred = lstm_model_pipeline(data)

# 🔮 Generate future predictions
future_prediction(X_test_tensor, y_pred, data, scaler, model, num_days=30)

In [None]:
# 📊 Select additional VN-INDEX statistics for prediction
data = lag_features_indicators(df_1[['VN_Index_Close']], ['VN_Index_Close'])

# 🚀 Train the model and get the test set
model, X_test_tensor, scaler, y_pred = lstm_model_pipeline(data)

# 🔮 Generate future predictions
future_prediction(X_test_tensor, y_pred, data, scaler, model, num_days=30)

In [None]:
# 📊 Select additional VN-INDEX statistics for prediction
data = lag_features_indicators(df_1, ['VN_Index_Close'])

# 🚀 Train the model and get the test set
model, X_test_tensor, scaler, y_pred = lstm_model_pipeline(data)

# 🔮 Generate future predictions
future_prediction(X_test_tensor, y_pred, data, scaler, model, num_days=30)

In [None]:
# 📊 Select additional VN-INDEX statistics for prediction
data = lag_features_indicators(df_1, df_1.columns)

# 🚀 Train the model and get the test set
model, X_test_tensor, scaler, y_pred = lstm_model_pipeline(data)

# 🔮 Generate future predictions
future_prediction(X_test_tensor, y_pred, data, scaler, model, num_days=30)

In [None]:
# 📂 Load dataset
file_path_2 = "../ready_data/vn_index_external_data.csv"
df_2 = pd.read_csv(file_path_2)
df_2 = quicky_data(df_2)

In [None]:
data = df_2

# 🚀 Train the model and get the test set
model, X_test_tensor, scaler, y_pred = lstm_model_pipeline(data)

# 🔮 Generate future predictions
future_prediction(X_test_tensor, y_pred, data, scaler, model, num_days=30)

In [None]:
data = lag_features_indicators(df_2, ['VN_Index_Close'])

# 🚀 Train the model and get the test set
model, X_test_tensor, scaler, y_pred = lstm_model_pipeline(data)

# 🔮 Generate future predictions
future_prediction(X_test_tensor, y_pred, data, scaler, model, num_days=30)

In [None]:
data = lag_features_indicators(df_2, df_2.columns)

# 🚀 Train the model and get the test set
model, X_test_tensor, scaler, y_pred = lstm_model_pipeline(data)

# 🔮 Generate future predictions
future_prediction(X_test_tensor, y_pred, data, scaler, model, num_days=30)

In [None]:
# 📂 Load dataset
file_path_3 = "../ready_data/merged_data.csv"
df_3 = pd.read_csv(file_path_3)
df_3 = quicky_data(df_3)

In [None]:
data = df_3

# 🚀 Train the model and get the test set
model, X_test_tensor, scaler, y_pred = lstm_model_pipeline(data)

# 🔮 Generate future predictions
future_prediction(X_test_tensor, y_pred, data, scaler, model, num_days=30)

In [None]:
data = lag_features_indicators(df_3, ['VN_Index_Close'])

# 🚀 Train the model and get the test set
model, X_test_tensor, scaler, y_pred = lstm_model_pipeline(data)

# 🔮 Generate future predictions
future_prediction(X_test_tensor, y_pred, data, scaler, model, num_days=30)

In [None]:
data = lag_features_indicators(df_3, df_3.columns)

# 🚀 Train the model and get the test set
model, X_test_tensor, scaler, y_pred = lstm_model_pipeline(data)

# 🔮 Generate future predictions
future_prediction(X_test_tensor, y_pred, data, scaler, model, num_days=30)

In [None]:
data = lag_features_indicators(df_3, df_1.columns)

# 🚀 Train the model and get the test set
model, X_test_tensor, scaler, y_pred = lstm_model_pipeline(data)

# 🔮 Generate future predictions
future_prediction(X_test_tensor, y_pred, data, scaler, model, num_days=30)

In [None]:
data = lag_features_indicators(df_3, df_2.columns)

# 🚀 Train the model and get the test set
model, X_test_tensor, scaler, y_pred = lstm_model_pipeline(data)

# 🔮 Generate future predictions
future_prediction(X_test_tensor, y_pred, data, scaler, model, num_days=30)

In [None]:
data = lag_features_indicators(df_3, df_3.columns)

data = select_features_by_correlation(data)

# 🚀 Train the model and get the test set
model, X_test_tensor, scaler, y_pred = lstm_model_pipeline(data, tuning=True)

# 🔮 Generate future predictions
future_prediction(X_test_tensor, y_pred, data, scaler, model, num_days=30)