<a id="1"></a> <br>
## Thêm thư viện và dữ liệu

In [None]:
# Thêm thư viện
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, GRU
from keras.layers import Dropout
from sklearn.preprocessing import MinMaxScaler
from keras.models import load_model
from keras.callbacks import EarlyStopping, ModelCheckpoint # Added ModelCheckpoint
import itertools

In [None]:
# Thêm dữ liệu
data =  pd.read_csv('../content/Techcombank Stock Price History.csv', header=0)

<a id="2"></a> <br>
## Loại bỏ cột không cần thiết

In [None]:
data.head()

Unnamed: 0,Date,Price,Open,High,Low,Vol,Change %
0,4/23/2025,26100,25750,26300,25150,18230000,3.78%
1,4/22/2025,25150,25250,25650,23950,29520000,-2.33%
2,4/21/2025,25750,26000,26300,25600,12190000,-0.96%
3,4/18/2025,26000,26500,26500,26000,16890000,0.00%
4,4/17/2025,26000,25600,26000,25550,14260000,0.78%


In [None]:
print(data.columns)
print(data.shape)

Index(['Date', 'Price', 'Open', 'High', 'Low', 'Vol', 'Change %'], dtype='object')
(124, 7)


In [None]:
data.iloc[:, 2:-1].describe()

Unnamed: 0,Open,High,Low,Vol
count,124.0,124.0,124.0,124.0
mean,25017.33871,25252.822581,24779.435484,13447580.0
std,1574.995733,1589.88549,1566.260267,9405251.0
min,22300.0,22550.0,22200.0,1240000.0
25%,23800.0,24000.0,23587.5,8040000.0
50%,24400.0,24700.0,24150.0,11055000.0
75%,26212.5,26462.5,26000.0,14642500.0
max,28100.0,28500.0,27850.0,58890000.0


In [None]:
data = data.drop(['Date', 'Change %'], axis=1)

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 124 entries, 0 to 123
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Price   124 non-null    int64
 1   Open    124 non-null    int64
 2   High    124 non-null    int64
 3   Low     124 non-null    int64
 4   Vol     124 non-null    int64
dtypes: int64(5)
memory usage: 5.0 KB


<a id="24"></a> <br>
## Chuẩn hoá dữ liệu

In [None]:
sc = MinMaxScaler(feature_range=(0, 1))
data_scaled = sc.fit_transform(data)

In [None]:
data_scaled.shape

(124, 5)

In [None]:
def create_data(df, n_future, n_past, train_test_split_percentage, validation_split_percentage):
    n_feature = df.shape[1]
    x_data, y_data = [], []

    for i in range(n_past, len(df) - n_future + 1):
        x_data.append(df[i - n_past:i, 0:n_feature])
        y_data.append(df[i + n_future - 1:i + n_future, 0])

    split_training_test_starting_point = int(round(train_test_split_percentage*len(x_data)))
    split_train_validation_starting_point = int(round(split_training_test_starting_point*(1-validation_split_percentage)))

    x_train = x_data[:split_train_validation_starting_point]
    y_train = y_data[:split_train_validation_starting_point]

    x_val = x_data[split_train_validation_starting_point:split_training_test_starting_point]
    y_val =  x_data[split_train_validation_starting_point:split_training_test_starting_point]

    x_test = x_data[split_training_test_starting_point:]
    y_test = y_data[split_training_test_starting_point:]

    return np.array(x_train), np.array(x_test), np.array(x_val), np.array(y_train), np.array(y_test), np.array(y_val)

In [None]:
X_train, X_test, X_val, y_train, y_test, y_val = create_data(data_scaled, n_future=1, n_past=25, train_test_split_percentage=0.8,
                                               validation_split_percentage = 0)

In [None]:
print(X_train.shape)
print(X_test.shape)

print(y_train.shape)
print(y_test.shape)

(79, 25, 5)
(20, 25, 5)
(79, 1)
(20, 1)


<a id="3"></a> <br>
## Huấn luyện mô hình LSTM


In [None]:
regressor = Sequential()
regressor.add(LSTM(units=16, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units=16, return_sequences=False))
regressor.add(Dropout(0.2))
regressor.add(Dense(units=1, activation='linear'))
regressor.compile(optimizer='adam', loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError()])

regressor.summary()

  super().__init__(**kwargs)


In [None]:
#Kiểm tra giá trị RMSE của mô hình LSTM thông thường trước khi điều chỉnh siêu tham số
results = regressor.evaluate(X_test, y_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 755ms/step - loss: 0.0365 - root_mean_squared_error: 0.1910


<a id="4"></a> <br>
## Điều chỉnh siêu tham số LSTM

In [None]:
def LSTM_HyperParameter_Tuning(config, x_train, y_train, x_test, y_test):

    n_neurons, n_batch_size, dropout = config
    possible_combinations = list(itertools.product(n_neurons, n_batch_size, dropout))

    print(possible_combinations)
    print('\n')

    hist = []

    for i in range(0, len(possible_combinations)):

        print(f'{i+1}th combination: \n')
        print('--------------------------------------------------------------------')

        n_neurons, n_batch_size, dropout = possible_combinations[i]

        regressor = Sequential()
        regressor.add(LSTM(units=n_neurons, return_sequences=True, input_shape=(x_train.shape[1], x_train.shape[2])))
        regressor.add(Dropout(dropout))

        regressor.add(LSTM(units=n_neurons, return_sequences=False))
        regressor.add(Dropout(dropout))
        regressor.add(Dense(units=1, activation='linear'))
        regressor.compile(optimizer='adam', loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError()])

        es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)

        file_path = 'best_model.h5'

        mc = ModelCheckpoint(file_path, monitor='val_loss', mode='min', verbose=1, save_best_only=True)

        regressor.fit(x_train, y_train, validation_split=0.3, epochs=40, batch_size=n_batch_size, callbacks=[es, mc], verbose=0)

        train_accuracy = regressor.evaluate(x_train, y_train, verbose=0)
        test_accuracy = regressor.evaluate(x_test, y_test, verbose=0)

        hist.append(list((n_neurons, n_batch_size, dropout,
                          train_accuracy, test_accuracy)))

        print(f'{str(i)}-th combination = {possible_combinations[i]} \n train accuracy: {train_accuracy} and test accuracy: {test_accuracy}')

        print('--------------------------------------------------------------------')
        print('--------------------------------------------------------------------')
        print('--------------------------------------------------------------------')
        print('--------------------------------------------------------------------')

    return hist

In [None]:
config = [[16, 32], [8, 16, 32], [0.2]]

hist = LSTM_HyperParameter_Tuning(config, X_train, y_train, X_test, y_test)

[(16, 8, 0.2), (16, 16, 0.2), (16, 32, 0.2), (32, 8, 0.2), (32, 16, 0.2), (32, 32, 0.2)]


1th combination: 

--------------------------------------------------------------------


  super().__init__(**kwargs)



Epoch 1: val_loss improved from inf to 0.01143, saving model to best_model.h5





Epoch 2: val_loss did not improve from 0.01143

Epoch 3: val_loss did not improve from 0.01143

Epoch 4: val_loss did not improve from 0.01143

Epoch 5: val_loss did not improve from 0.01143

Epoch 6: val_loss improved from 0.01143 to 0.00914, saving model to best_model.h5





Epoch 7: val_loss did not improve from 0.00914

Epoch 8: val_loss did not improve from 0.00914

Epoch 9: val_loss did not improve from 0.00914

Epoch 10: val_loss improved from 0.00914 to 0.00881, saving model to best_model.h5





Epoch 11: val_loss improved from 0.00881 to 0.00747, saving model to best_model.h5





Epoch 12: val_loss improved from 0.00747 to 0.00619, saving model to best_model.h5





Epoch 13: val_loss improved from 0.00619 to 0.00457, saving model to best_model.h5





Epoch 14: val_loss improved from 0.00457 to 0.00436, saving model to best_model.h5





Epoch 15: val_loss improved from 0.00436 to 0.00362, saving model to best_model.h5





Epoch 16: val_loss improved from 0.00362 to 0.00296, saving model to best_model.h5





Epoch 17: val_loss improved from 0.00296 to 0.00281, saving model to best_model.h5





Epoch 18: val_loss improved from 0.00281 to 0.00270, saving model to best_model.h5





Epoch 19: val_loss improved from 0.00270 to 0.00268, saving model to best_model.h5





Epoch 20: val_loss improved from 0.00268 to 0.00257, saving model to best_model.h5





Epoch 21: val_loss did not improve from 0.00257

Epoch 22: val_loss improved from 0.00257 to 0.00250, saving model to best_model.h5





Epoch 23: val_loss did not improve from 0.00250

Epoch 24: val_loss did not improve from 0.00250

Epoch 25: val_loss did not improve from 0.00250

Epoch 26: val_loss did not improve from 0.00250

Epoch 27: val_loss did not improve from 0.00250
Epoch 27: early stopping
0-th combination = (16, 8, 0.2) 
 train accuracy: [0.007247017230838537, 0.08512941747903824] and test accuracy: [0.007919518277049065, 0.0889916718006134]
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
2th combination: 

--------------------------------------------------------------------


  super().__init__(**kwargs)



Epoch 1: val_loss improved from inf to 0.00388, saving model to best_model.h5





Epoch 2: val_loss did not improve from 0.00388

Epoch 3: val_loss did not improve from 0.00388

Epoch 4: val_loss did not improve from 0.00388

Epoch 5: val_loss did not improve from 0.00388

Epoch 6: val_loss improved from 0.00388 to 0.00286, saving model to best_model.h5





Epoch 7: val_loss improved from 0.00286 to 0.00233, saving model to best_model.h5





Epoch 8: val_loss improved from 0.00233 to 0.00230, saving model to best_model.h5





Epoch 9: val_loss did not improve from 0.00230

Epoch 10: val_loss did not improve from 0.00230

Epoch 11: val_loss did not improve from 0.00230

Epoch 12: val_loss improved from 0.00230 to 0.00211, saving model to best_model.h5





Epoch 13: val_loss did not improve from 0.00211

Epoch 14: val_loss did not improve from 0.00211

Epoch 15: val_loss improved from 0.00211 to 0.00201, saving model to best_model.h5





Epoch 16: val_loss did not improve from 0.00201

Epoch 17: val_loss did not improve from 0.00201

Epoch 18: val_loss improved from 0.00201 to 0.00190, saving model to best_model.h5





Epoch 19: val_loss did not improve from 0.00190

Epoch 20: val_loss did not improve from 0.00190

Epoch 21: val_loss did not improve from 0.00190

Epoch 22: val_loss did not improve from 0.00190

Epoch 23: val_loss did not improve from 0.00190
Epoch 23: early stopping
1-th combination = (16, 16, 0.2) 
 train accuracy: [0.005346021149307489, 0.0731164887547493] and test accuracy: [0.011707378551363945, 0.10820063948631287]
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
3th combination: 

--------------------------------------------------------------------


  super().__init__(**kwargs)



Epoch 1: val_loss improved from inf to 0.09088, saving model to best_model.h5





Epoch 2: val_loss improved from 0.09088 to 0.06669, saving model to best_model.h5





Epoch 3: val_loss improved from 0.06669 to 0.04737, saving model to best_model.h5





Epoch 4: val_loss improved from 0.04737 to 0.03211, saving model to best_model.h5





Epoch 5: val_loss improved from 0.03211 to 0.02016, saving model to best_model.h5





Epoch 6: val_loss improved from 0.02016 to 0.01122, saving model to best_model.h5





Epoch 7: val_loss improved from 0.01122 to 0.00562, saving model to best_model.h5





Epoch 8: val_loss improved from 0.00562 to 0.00367, saving model to best_model.h5





Epoch 9: val_loss did not improve from 0.00367

Epoch 10: val_loss did not improve from 0.00367

Epoch 11: val_loss did not improve from 0.00367

Epoch 12: val_loss did not improve from 0.00367

Epoch 13: val_loss did not improve from 0.00367
Epoch 13: early stopping
2-th combination = (16, 32, 0.2) 
 train accuracy: [0.030113346874713898, 0.17353197932243347] and test accuracy: [0.03053266368806362, 0.17473597824573517]
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
4th combination: 

--------------------------------------------------------------------


  super().__init__(**kwargs)



Epoch 1: val_loss improved from inf to 0.00366, saving model to best_model.h5





Epoch 2: val_loss did not improve from 0.00366

Epoch 3: val_loss did not improve from 0.00366

Epoch 4: val_loss did not improve from 0.00366

Epoch 5: val_loss did not improve from 0.00366

Epoch 6: val_loss did not improve from 0.00366
Epoch 6: early stopping
3-th combination = (32, 8, 0.2) 
 train accuracy: [0.011263368651270866, 0.10612902045249939] and test accuracy: [0.00978812389075756, 0.09893494844436646]
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
5th combination: 

--------------------------------------------------------------------


  super().__init__(**kwargs)



Epoch 1: val_loss improved from inf to 0.00349, saving model to best_model.h5





Epoch 2: val_loss did not improve from 0.00349

Epoch 3: val_loss did not improve from 0.00349

Epoch 4: val_loss did not improve from 0.00349

Epoch 5: val_loss improved from 0.00349 to 0.00309, saving model to best_model.h5





Epoch 6: val_loss improved from 0.00309 to 0.00300, saving model to best_model.h5





Epoch 7: val_loss did not improve from 0.00300

Epoch 8: val_loss did not improve from 0.00300

Epoch 9: val_loss did not improve from 0.00300

Epoch 10: val_loss did not improve from 0.00300

Epoch 11: val_loss improved from 0.00300 to 0.00300, saving model to best_model.h5





Epoch 12: val_loss improved from 0.00300 to 0.00256, saving model to best_model.h5





Epoch 13: val_loss did not improve from 0.00256

Epoch 14: val_loss did not improve from 0.00256

Epoch 15: val_loss did not improve from 0.00256

Epoch 16: val_loss improved from 0.00256 to 0.00230, saving model to best_model.h5





Epoch 17: val_loss improved from 0.00230 to 0.00227, saving model to best_model.h5





Epoch 18: val_loss did not improve from 0.00227

Epoch 19: val_loss did not improve from 0.00227

Epoch 20: val_loss improved from 0.00227 to 0.00217, saving model to best_model.h5





Epoch 21: val_loss improved from 0.00217 to 0.00210, saving model to best_model.h5





Epoch 22: val_loss improved from 0.00210 to 0.00209, saving model to best_model.h5





Epoch 23: val_loss did not improve from 0.00209

Epoch 24: val_loss did not improve from 0.00209

Epoch 25: val_loss improved from 0.00209 to 0.00204, saving model to best_model.h5





Epoch 26: val_loss improved from 0.00204 to 0.00196, saving model to best_model.h5





Epoch 27: val_loss did not improve from 0.00196

Epoch 28: val_loss did not improve from 0.00196

Epoch 29: val_loss improved from 0.00196 to 0.00193, saving model to best_model.h5





Epoch 30: val_loss improved from 0.00193 to 0.00191, saving model to best_model.h5





Epoch 31: val_loss did not improve from 0.00191

Epoch 32: val_loss did not improve from 0.00191

Epoch 33: val_loss did not improve from 0.00191

Epoch 34: val_loss did not improve from 0.00191

Epoch 35: val_loss did not improve from 0.00191
Epoch 35: early stopping
4-th combination = (32, 16, 0.2) 
 train accuracy: [0.004876057617366314, 0.06982877105474472] and test accuracy: [0.008704757317900658, 0.09329929202795029]
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
6th combination: 

--------------------------------------------------------------------


  super().__init__(**kwargs)



Epoch 1: val_loss improved from inf to 0.02849, saving model to best_model.h5





Epoch 2: val_loss improved from 0.02849 to 0.00868, saving model to best_model.h5





Epoch 3: val_loss improved from 0.00868 to 0.00391, saving model to best_model.h5





Epoch 4: val_loss did not improve from 0.00391

Epoch 5: val_loss did not improve from 0.00391

Epoch 6: val_loss did not improve from 0.00391

Epoch 7: val_loss did not improve from 0.00391

Epoch 8: val_loss did not improve from 0.00391
Epoch 8: early stopping
5-th combination = (32, 32, 0.2) 
 train accuracy: [0.012830052524805069, 0.11326982080936432] and test accuracy: [0.011545626446604729, 0.10745057463645935]
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------
--------------------------------------------------------------------


<a id="44"></a> <br>
## Chọn mô hình tốt nhất

In [None]:
hist = pd.DataFrame(hist)
hist = hist.sort_values(by=[4], ascending=True)
hist



Unnamed: 0,0,1,2,3,4
0,16,8,0.2,"[0.007247017230838537, 0.08512941747903824]","[0.007919518277049065, 0.0889916718006134]"
4,32,16,0.2,"[0.004876057617366314, 0.06982877105474472]","[0.008704757317900658, 0.09329929202795029]"
3,32,8,0.2,"[0.011263368651270866, 0.10612902045249939]","[0.00978812389075756, 0.09893494844436646]"
5,32,32,0.2,"[0.012830052524805069, 0.11326982080936432]","[0.011545626446604729, 0.10745057463645935]"
1,16,16,0.2,"[0.005346021149307489, 0.0731164887547493]","[0.011707378551363945, 0.10820063948631287]"
2,16,32,0.2,"[0.030113346874713898, 0.17353197932243347]","[0.03053266368806362, 0.17473597824573517]"


In [None]:
print(f'Best Combination: \n n_neurons = {hist.iloc[0, 0]}\n n_batch_size = {hist.iloc[0, 1]}\n dropout = {hist.iloc[0, 2]}')
print('**************************')
print(f'Results Before Tunning:\n Test Set RMSE: {np.round(results, 4)[1]}\n')
print(f'Results After Tunning:\n Test Set RMSE: {np.round(hist.iloc[0, -1], 4)[1]}\n')
print(f'{np.round((results[1] - hist.iloc[0, -1][1])*100/np.round(results, 4)[1])}% Improvement')

Best Combination: 
 n_neurons = 16
 n_batch_size = 8
 dropout = 0.2
**************************
Results Before Tunning:
 Test Set RMSE: 0.191

Results After Tunning:
 Test Set RMSE: 0.089

53.0% Improvement
