In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, GlobalAveragePooling1D, LSTM
import ta
from xgboost import XGBRegressor
from sklearn.ensemble import VotingRegressor
from scikeras.wrappers import KerasRegressor

# Đọc dữ liệu từ các file CSV
file_paths = {
    'MSN': './mnt/data/MSN.csv',
    'PNJ': './mnt/data/PNJ.csv',
    'VIC': './mnt/data/VIC.csv',
    'FPT': './mnt/data/FPT.csv'
}

data = {}
for key, path in file_paths.items():
    data[key] = pd.read_csv(path)

# Thêm các chỉ số kỹ thuật
def add_technical_indicators(df):
    df['SMA'] = ta.trend.sma_indicator(df['Close'], window=14)
    df['EMA'] = ta.trend.ema_indicator(df['Close'], window=14)
    df['RSI'] = ta.momentum.rsi(df['Close'], window=14)
    df['MACD'] = ta.trend.macd_diff(df['Close'])
    df['Bollinger_High'] = ta.volatility.bollinger_hband(df['Close'])
    df['Bollinger_Low'] = ta.volatility.bollinger_lband(df['Close'])
    df['ATR'] = ta.volatility.average_true_range(df['High'], df['Low'], df['Close'], window=14)
    df['ADX'] = ta.trend.adx(df['High'], df['Low'], df['Close'], window=14)
    df['Stochastic'] = ta.momentum.stoch(df['High'], df['Low'], df['Close'], window=14)
    df = df.dropna()
    return df

for key in data.keys():
    data[key] = add_technical_indicators(data[key])

# Tiền xử lý dữ liệu
def preprocess_data(df):
    df = df[['Date/Time', 'Close', 'Volume', 'SMA', 'EMA', 'RSI', 'MACD', 'Bollinger_High', 'Bollinger_Low', 'ATR', 'ADX', 'Stochastic']].copy()
    df['Date/Time'] = pd.to_datetime(df['Date/Time'])
    df = df.sort_values('Date/Time')
    scaler = MinMaxScaler(feature_range=(0, 1))
    df[['Close', 'Volume', 'SMA', 'EMA', 'RSI', 'MACD', 'Bollinger_High', 'Bollinger_Low', 'ATR', 'ADX', 'Stochastic']] = scaler.fit_transform(df[['Close', 'Volume', 'SMA', 'EMA', 'RSI', 'MACD', 'Bollinger_High', 'Bollinger_Low', 'ATR', 'ADX', 'Stochastic']])
    df['Price_Change'] = df['Close'].shift(-1) - df['Close']
    df = df.dropna()
    return df

for key in data.keys():
    data[key] = preprocess_data(data[key])

# Chia dữ liệu thành tập huấn luyện và kiểm tra
def split_data(df):
    X = df[['Close', 'Volume', 'SMA', 'EMA', 'RSI', 'MACD', 'Bollinger_High', 'Bollinger_Low', 'ATR', 'ADX', 'Stochastic']]
    y = df['Price_Change']
    return train_test_split(X, y, test_size=0.2, random_state=42)

splits = {}
for key in data.keys():
    splits[key] = split_data(data[key])

# Chuẩn bị dữ liệu cho mô hình LSTM
def reshape_data(X):
    return X.values.reshape(X.shape[0], X.shape[1], 1)

reshaped_data = {}
for key in splits.keys():
    X_train, X_test, y_train, y_test = splits[key]
    reshaped_data[key] = {
        'X_train': reshape_data(X_train),
        'X_test': reshape_data(X_test),
        'y_train': y_train,
        'y_test': y_test,
        'X_train_2d': X_train,  # Thêm phiên bản 2D
        'X_test_2d': X_test    # Thêm phiên bản 2D
    }

# Function to build LSTM model
def build_lstm_model(input_shape):
    inputs = Input(shape=input_shape)
    lstm1 = LSTM(128, return_sequences=True)(inputs)
    lstm2 = LSTM(64, return_sequences=True)(lstm1)
    x = Dense(64, activation='relu')(lstm2)
    x = Dropout(0.2)(x)
    x = GlobalAveragePooling1D()(x)
    outputs = Dense(1)(x)
    model = Model(inputs, outputs)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mean_squared_error')
    return model

# Function to train LSTM model
def train_lstm_model(X_train, y_train, X_test, y_test, key):
    model = build_lstm_model((X_train.shape[1], X_train.shape[2]))
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model_checkpoint = tf.keras.callbacks.ModelCheckpoint(f'lstm_model_{key}.h5', save_best_only=True, monitor='val_loss')
    model.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_test, y_test),
              callbacks=[early_stopping, model_checkpoint])
    return model

# Function to train XGBoost model
def train_xgb_model(X_train, y_train):
    model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, subsample=0.8, colsample_bytree=0.8)
    model.fit(X_train, y_train)
    return model

# Function to train and combine models
def train_combined_model(X_train, y_train, X_test, y_test, key):
    # Train LSTM model
    lstm_model = train_lstm_model(X_train, y_train, X_test, y_test, key)
    lstm_pred_train = lstm_model.predict(X_train).flatten()
    lstm_pred_test = lstm_model.predict(X_test).flatten()
    
    # Train XGBoost model
    xgb_model = train_xgb_model(X_train[:, :, 0], y_train)
    xgb_pred_train = xgb_model.predict(X_train[:, :, 0])
    xgb_pred_test = xgb_model.predict(X_test[:, :, 0])
    
    # Combine predictions as new features
    X_train_combined = np.column_stack((lstm_pred_train, xgb_pred_train))
    X_test_combined = np.column_stack((lstm_pred_test, xgb_pred_test))
    
    # Train Voting Regressor on combined predictions
    ensemble_model = VotingRegressor([('lstm', KerasRegressor(model=build_lstm_model, model__input_shape=(X_train_combined.shape[1], 1), epochs=100, batch_size=64, verbose=0)), 
                                      ('xgb', XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, subsample=0.8, colsample_bytree=0.8))])
    ensemble_model.fit(X_train_combined, y_train)
    
    return ensemble_model

# Prepare data and train models
models = {}
for key in reshaped_data.keys():
    X_train = reshaped_data[key]['X_train']
    X_train_2d = reshaped_data[key]['X_train_2d']
    y_train = reshaped_data[key]['y_train']
    X_test = reshaped_data[key]['X_test']
    X_test_2d = reshaped_data[key]['X_test_2d']
    y_test = reshaped_data[key]['y_test']
    models[key] = train_combined_model(X_train, y_train, X_test, y_test, key)

# Evaluate model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    return mse, mae, rmse, r2

# Evaluate all models
results = {}
for key in models.keys():
    model = models[key]
    X_test = reshaped_data[key]['X_test']
    y_test = reshaped_data[key]['y_test']
    results[key] = evaluate_model(model, X_test, y_test)

results


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
