In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, r2_score
from sklearn.model_selection import TimeSeriesSplit


In [None]:
# Lấy data để phục vụ test model 
%run Collect_Data_Function.ipynb
# Lấy danh sách các mã cổ phiếu trên VN30
stock_symbols = ["PHR","MBB"]  # Cột chứa mã cổ phiếu
all_stock_data = data_collect(stock_symbols)
all_stock_data.head()

# 1. ARIMA (AutoRegressive Integrated Moving Average)

In [None]:
!pip install pmdarima
!pip install statsmodels

In [None]:
from pmdarima import auto_arima
import pmdarima as pm
from pmdarima.arima.utils import ndiffs
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [None]:
def model_ARIMA(name, number_days_predict):
    #Tìm model ARIMA tốt nhất
    model_ARIMA = pm.auto_arima(train_data(name), test = 'adf', 
                          start_p = 1, start_q = 1,     
                          max_p = 3, max_q = 3,
                          d = None, seasonal = False,   
                          start_P = 0, m = 3,
                          trace = True, error_action = 'ignore',  
                          suppress_warnings = True, stepwise = True,
                          D = 1, information_criterion = 'aic')
    #Lưu chỉ số tốt nhất
    best_order = model_ARIMA.order 
    
    #Tính độ chính xác
    y_pred_ARIMA = []
    history = [x for x in train_data(name)]
    y = pd.Series(test_data(name))

    ARIMA_model = ARIMA(history, order = best_order)
    ARIMA_fit = ARIMA_model.fit()

    for obs in y:
        # Forecast the next value
        yhat = ARIMA_fit.forecast()[0]
        # Append the prediction to the list
        y_pred_ARIMA.append(yhat)
        # Append the observed value to the history for the next iteration
        history.append(obs)
        # Update the model with the new history
        ARIMA_model = ARIMA(history, order = best_order)
        ARIMA_fit = ARIMA_model.fit()


    mape_ARIMA = mean_absolute_percentage_error(y, y_pred_ARIMA)
    
    #Dự báo trong n ngày tới
    history = pd.concat([train_data(name), test_data(name)], axis=0)

    ARIMA_model = ARIMA(history, order = best_order)
    ARIMA_fit = ARIMA_model.fit()

    Future_Price = ARIMA_fit.forecast(steps = number_days_predict)  # Dự báo n bước tiếp theo
    
    lenth = len(history) + number_days_predict - 1

    return Future_Price[lenth], mape_ARIMA, y_pred_ARIMA


In [None]:
# #EXAMPLE
# y_pred_ARIMA = []
# Future_Price, mape_ARIMA, y_pred_ARIMA = model_ARIMA('ACB',7)
# print(Future_Price)
# print(mape_ARIMA)
# print(y_pred_ARIMA)

# 2. SARIMA, SARIMAX

In [None]:
def model_SARIMA(name, number_days_predict):
    #Tìm model SARIMA tốt nhất
    model_SARIMA = pm.auto_arima(train_data(name), test = 'adf', 
                              start_p = 1, start_q = 1,     
                              max_p = 3, max_q = 3,
                              d = None, seasonal = True,   
                              start_P = 0, m = 3,
                              trace = True, error_action = 'ignore',  
                              suppress_warnings = True, stepwise = True,
                              D = 1, information_criterion = 'aic')
    
    best_order = model_SARIMA.order  # Non-seasonal (p, d, q)
    best_seasonal_order = model_SARIMA.seasonal_order  # Seasonal (P, D, Q, m)

    # Fit the SARIMAX model to the training data
    SARIMA_model = SARIMAX(train_data(name), order=best_order, seasonal_order=best_seasonal_order, enforce_stationarity=False, enforce_invertibility=False)
    SARIMA_fit = SARIMA_model.fit()

    # Forecast values for the test data
    y_pred_SARIMA = SARIMA_fit.forecast(steps=len(test_data(name)))

    # Calculate evaluation metrics
    mape_SARIMA = mean_absolute_percentage_error(test_data(name), y_pred_SARIMA)
    
    #Dự báo trong n ngày tới
    history = pd.concat([train_data(name), test_data(name)], axis=0)

    SARIMA_model = SARIMAX(history, order=best_order, seasonal_order=best_seasonal_order, enforce_stationarity=False, enforce_invertibility=False)
    SARIMA_fit = SARIMA_model.fit()

    Future_Price = SARIMA_fit.forecast(steps = number_days_predict)  # Dự báo 10 bước tiếp theo

    lenth = len(history) + number_days_predict - 1
    
    return Future_Price[lenth], mape_SARIMA, y_pred_SARIMA

In [None]:
# ###EXAMPLE
# y_pred_SARIMA = []
# Future_Price, mape_SARIMA, y_pred_SARIMA = model_SARIMA('ACB',7)
# print(Future_Price)
# print(mape_SARIMA)
# print(y_pred_SARIMA)

# 3. Prophet

In [None]:
!pip install prophet

In [None]:
from prophet import Prophet

In [None]:
def model_Prophet(name, number_days_predict):
    # Prepare data
    data_prepare = all_stock_data[all_stock_data['ticker'] == name]
    dfl = pd.DataFrame({
        'ds': data_prepare['time'],  # Ensure these columns exist
        'y': data_prepare['close']
    })

    # Train-Test Split
    train = dfl[:int(len(dfl) * 0.8)]
    test = dfl[int(len(dfl) * 0.8):]

    # Fit Prophet Model
    prophet_model = Prophet()
    prophet_model.fit(train)

    # Compute predictions on test data
    future = prophet_model.make_future_dataframe(periods=len(test))
    forecast = prophet_model.predict(future)
    y_pred_Prophet = forecast['yhat'].iloc[-len(test):].values

    # Compute MAPE
    mape_Prophet = mean_absolute_percentage_error(test['y'].values, y_pred_Prophet)

    # Predict future prices
    future_dataframe = prophet_model.make_future_dataframe(periods=number_days_predict)
    future_forecast = prophet_model.predict(future_dataframe)
    Future_Price = future_forecast['yhat'].iloc[-number_days_predict:].values

    return Future_Price[-1], mape_Prophet, y_pred_Prophet

In [None]:
# ###EXAMPLE
# y_pred_Prophet = []
# Future_Price, mape_Prophet, y_pred_Prophet = model_Prophet('ACB', 7)
# print(Future_Price)
# print(mape_Prophet)
# print(y_pred_Prophet)

# 4. VAR (Vector Autoregression)

Lưu ý khi sử dụng VAR
+ Kiểm tra tính ổn định: Sử dụng kiểm tra ADF (Augmented Dickey-Fuller) để đảm bảo dữ liệu là stationary.
+ Chọn độ trễ tối ưu: Tiêu chí AIC, BIC, hoặc FPE.
+ Diễn giải kết quả: VAR chỉ ra mối quan hệ giữa các biến, không đơn thuần chỉ là dự báo.

In [None]:
from statsmodels.tsa.api import VAR

In [None]:
def model_VAR(name, number_days_predict):

    # Huấn luyện mô hình VAR
    model = VAR(train_data(name))
    results = model.fit(ic='aic')  # Chọn độ trễ tối ưu bằng AIC

    # Lấy dữ liệu cuối cùng làm điểm khởi động
    last_data = train_data(name).values[-results.k_ar:]

    # Dự báo n ngày tới
    future_forecasts = []
    for _ in range(number_days_predict):
        # Dự báo ngày tiếp theo
        next_forecast = results.forecast(last_data, steps=1)[0]
        future_forecasts.append(next_forecast)

        # Cập nhật dữ liệu đầu vào
        last_data = np.vstack([last_data[1:], next_forecast])

    # Kết quả dự báo
    forecast_df = pd.DataFrame(future_forecasts, columns=train_data.columns)
    return forecast_df.tolist(), mape, forecast_df.tolist()

# 5.ETS

In [None]:
from statsmodels.tsa.exponential_smoothing.ets import ETSModel

In [None]:
def model_ETS(name, number_days_predict):
    # Tạo và fit mô hình ETS
    model = ETSModel(train_data(name), error='add', trend='add', seasonal='add', seasonal_periods=7)
    model_ETS = model.fit()

    # Dự đoán giá trị trong tương lai
    y_pred_ETS = model_ETS.forecast(steps=len(test_data(name)))

    mape_ETS = mean_absolute_percentage_error(test_data(name), y_pred_ETS)
    
    #Dự báo trong n ngày tới
    history = pd.concat([train_data(name), test_data(name)], axis=0)

    ETS_model = ETSModel(history, error='add', trend='add', seasonal='add', seasonal_periods=7)
    ETS_fit = ETS_model.fit()

    Future_Price = ETS_fit.forecast(steps = number_days_predict)  # Dự báo 10 bước tiếp theo
    
    lenth = len(history) + number_days_predict - 1
    
    return Future_Price[lenth], mape_ETS, y_pred_ETS


In [None]:
# ###EXAMPLE
# y_pred_ETS = []
# Future_Price, mape_ETS, y_pred_ETS = model_ETS('ACB', 7)
# print("Future_Prices:", Future_Price)
# print("mape_ETS:", mape_ETS)
# print("y_pred_ETS:", y_pred_ETS)


# 6. TBATS

In [None]:
pip install tbats

In [None]:
from tbats import TBATS

In [None]:
def model_TBATS(name, number_days_predict):

    # Fit TBATS Model
    TBATS_model = TBATS(seasonal_periods=(7, 30.5))  # Define seasonal periods
    TBATS_fit = TBATS_model.fit(train_data(name))  # Pass training data to fit method
    
    # Compute predictions on test data
    y_pred_TBATS = TBATS_fit.forecast(steps=len(test_data(name)))
    
    # Compute MAPE
    mape_TBATS = mean_absolute_percentage_error(test_data(name), y_pred_TBATS)
    
    # Predict future prices
    Future_Price = TBATS_fit.forecast(steps=number_days_predict)
    
    return Future_Price[-1], mape_TBATS, y_pred_TBATS


In [None]:
# ###EXAMPLE
# y_pred_TBATS = []
# Future_Price, mape_TBATS, y_pred_TBATS = model_TBATS('ACB', 7)
# print(Future_Price)
# print(mape_TBATS)
# print( y_pred_TBATS)