In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, r2_score
from sklearn.model_selection import TimeSeriesSplit


In [None]:
# Chia dữ liệu thành tập huấn luyện và tập kiểm tra
def train_data(name):
    data = all_stock_data[all_stock_data['ticker'] == name]
    ts = data['close']
    train = ts[:int(0.8*(len(ts)))]
    return train

def test_data(name): 
    data = all_stock_data[all_stock_data['ticker'] == name]
    ts = data['close']
    test = ts[int(0.8*(len(ts))):]
    return test

# 1. Recurrent Neural Networks (RNN)

# 2. Long Short-Term Memory (LSTM)

In [None]:
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, GRU
from keras.layers import Dropout
from numpy.random import seed

In [None]:
def model_LSTM(name,number_days_predict):  
    # Reshape the train data into a 2D array
    train_reshaped = np.array(train_data(name)).reshape(-1, 1)
    # Initialize the MinMaxScaler
    scaler = MinMaxScaler()
    # Fit the scaler on the training data and transform it
    train_scaled = scaler.fit_transform(train_reshaped)

    timesteps = 12
    X_train = []
    y_train = []
    for i in range(timesteps, train_data(name).shape[0]):
        X_train.append(train_scaled[i-timesteps:i, 0])  
        y_train.append(train_scaled[i, 0]) 
    X_train, y_train = np.array(X_train), np.array(y_train)

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    
    seed(2019)

    model = Sequential()

    model.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
    model.add(Dropout(0.20))
    model.add(LSTM(units = 50, return_sequences = True))
    model.add(Dropout(0.25))
    model.add(LSTM(units = 50, return_sequences = True))
    model.add(Dropout(0.2))
    model.add(LSTM(units = 50))
    model.add(Dropout(0.25))
    model.add(Dense(units = 1))
    model.compile(optimizer = 'adam', loss = 'mean_squared_error')

    # Fitting the RNN to the Training set
    model.fit(X_train, y_train, epochs = 200, batch_size = 32)
    model_lstm = model
    
    # Convert numpy arrays to pandas Series
    series_train = pd.Series(train_data(name))
    series_test = pd.Series(test_data(name))

    # Combine the Series
    combine = pd.concat([series_train, series_test], axis=0)
    # Prepare test inputs
    test_inputs = combine[len(combine) - len(test_data(name)) - timesteps:].values
    test_inputs = test_inputs.reshape(-1,1)
    test_inputs = scaler.transform(test_inputs)

    # same steps as we followed while processing training data
    X_test = []
    for i in range(timesteps, test_data(name).shape[0]+timesteps):
        X_test.append(test_inputs[i-timesteps:i, 0])
    X_test = np.array(X_test)
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    predicted_dead = model_lstm.predict(X_test)
    
    # inverse_transform because prediction is done on scaled inputs
    
    y_pred_LSTM = scaler.inverse_transform(predicted_dead)

    # Calculate error metrics
    mape_LSTM = mean_absolute_percentage_error(test_data(name), y_pred_LSTM)

    # Lấy dữ liệu từ ngày gần nhất trong dữ liệu hiện có
    
    latest_data = combine[-30:].values
    latest_data = latest_data.reshape(-1, 1)
    latest_data = scaler.transform(latest_data)

    # Chuẩn bị dữ liệu đầu vào cho mô hình LSTM
    X_latest = latest_data.reshape(1, 30, 1)
    predicted_next_values = []

    # Dự đoán cho n ngày tiếp theo
    for _ in range(number_days_predict):
        # Dự đoán giá trị tiếp theo
        next_value = model_lstm.predict(X_latest)
        predicted_next_values.append(next_value[0, 0])

        # Cập nhật dữ liệu đầu vào cho lần dự đoán tiếp theo
        X_latest = np.roll(X_latest, -1)  # Dịch phải một bước
        X_latest[0, -1, 0] = next_value  # Cập nhật giá trị cuối cùng bằng giá trị dự đoán mới

    # Chuyển đổi lại các giá trị dự đoán thành đơn vị gốc
    Future_Price = scaler.inverse_transform(np.array(predicted_next_values).reshape(-1, 1))
    
    return Future_Price[number_days_predict-1] , mape_LSTM, y_pred_LSTM

In [None]:
###TESTING
y_pred_LSTM = []
Future_Price, mape_LSTM, y_pred_LSTM = model_LSTM('PHR', 7)
print(Future_Price)
print(mape_LSTM)
print(y_pred_LSTM)

# 3. Gated Recurrent Unit (GRU)

# 4. Transformer Models (e.g., Temporal Fusion Transformer)

# 5. Convolutional Neural Networks (CNN)