In [18]:
import numpy as np
import pandas as pd
import optuna
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
from statsmodels.tsa.stattools import adfuller
import matplotlib.pyplot as plt

In [19]:
# Load data
train_df = pd.read_csv(r"D:\Du An\Data FLow\v1\data\test.csv")
test_df = pd.read_csv(r"D:\Du An\Data FLow\v1\data\test.csv")

In [20]:
# 2. Xử lý dữ liệu
train_df.dropna(subset=['Revenue', 'Units'], inplace=True)
test_df.dropna(subset=['Revenue', 'Units'], inplace=True)
train_df['Date'] = pd.to_datetime(train_df['Date'])
test_df['Date'] = pd.to_datetime(test_df['Date'])
train_df.sort_values(by='Date', inplace=True)
test_df.sort_values(by='Date', inplace=True)

In [21]:
# 3. Kiểm tra tính dừng bằng Dickey-Fuller test
def check_stationarity(series):
    result = adfuller(series.dropna())
    return result[1] < 0.05  # True nếu dữ liệu dừng

if not check_stationarity(train_df['Revenue']):
    train_df['Revenue'] = train_df['Revenue'].diff().dropna()
if not check_stationarity(train_df['Units']):
    train_df['Units'] = train_df['Units'].diff().dropna()


In [22]:
# Normalize data
scaler = MinMaxScaler()
scaled_train = scaler.fit_transform(train_df[['Revenue', 'Units']])
scaled_test = scaler.transform(test_df[['Revenue', 'Units']])

In [23]:
# Convert to supervised learning
sequence_length = 30  # 30 ngày trước để dự đoán

def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i+sequence_length])
        y.append(data[i+sequence_length])
    return np.array(X), np.array(y)

X_train, y_train = create_sequences(scaled_train, sequence_length)
X_test, y_test = create_sequences(scaled_test, sequence_length)

In [24]:
# Define LSTM model
def build_lstm_model():
    model = Sequential([
        LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
        Dropout(0.2),
        LSTM(50, return_sequences=False),
        Dropout(0.2),
        Dense(2)  # 2 đầu ra: Revenue, Units
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

In [25]:
# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)


In [26]:
# Train model
model = build_lstm_model()
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32, callbacks=[early_stopping])


  super().__init__(**kwargs)


Epoch 1/100
[1m2332/2332[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 29ms/step - loss: 2.1867e-04 - val_loss: 2.1841e-04
Epoch 2/100
[1m2332/2332[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 36ms/step - loss: 2.0198e-04 - val_loss: 2.1817e-04
Epoch 3/100
[1m2332/2332[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 49ms/step - loss: 2.1979e-04 - val_loss: 2.1953e-04
Epoch 4/100
[1m2332/2332[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 58ms/step - loss: 2.0681e-04 - val_loss: 2.2068e-04
Epoch 5/100
[1m2332/2332[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 56ms/step - loss: 2.0181e-04 - val_loss: 2.1829e-04
Epoch 6/100
[1m2332/2332[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 41ms/step - loss: 2.4092e-04 - val_loss: 2.1799e-04
Epoch 7/100
[1m2332/2332[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 35ms/step - loss: 2.1055e-04 - val_loss: 2.2006e-04
Epoch 8/100
[1m2332/2332[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

In [27]:
# Predictions
y_pred = model.predict(X_test)
y_test_inv = scaler.inverse_transform(y_test)
y_pred_inv = scaler.inverse_transform(y_pred)


[1m2332/2332[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 6ms/step


In [28]:
# Evaluation
rmse = mean_squared_error(y_test_inv, y_pred_inv, squared=False)
mape = np.mean(np.abs((y_test_inv - y_pred_inv) / y_test_inv)) * 100
r2 = r2_score(y_test_inv, y_pred_inv)

print(f"RMSE: {rmse}")
print(f"MAPE: {mape}%")
print(f"R²: {r2}")

RMSE: 2897.2308427918633
MAPE: 62.4695512237941%
R²: 0.004061204764421178


