In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Input
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import numpy as np
import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df = pd.read_excel('/content/drive/MyDrive/my_data_DS317/train.xlsx')
df.drop(['gioitinh'], axis=1)
df.head()

Unnamed: 0,mssv,namsinh,gioitinh,drl,diem_tt,dtb_toankhoa,dtb_tichluy,sotc_tichluy,diemtbhk_1,diemtbhk_2,diemtbhk_3,diemtbhk_4,diemtbhk_5,diemtbhk_6,diemtbhk_7,diemtbhk_8
0,F6FD3E0FXPvAibaEXe+A+6mRsShfH4I/F30oKI05,1997.0,1.0,71.0,25.25,5.47,7.57,58.0,7.65,6.96,6.67,4.61,4.33,3.29,0.0,0.0
1,5C03BBC7XPvAibaEXe92Mw4ZEAxz0y7C/nhWwAH1,1998.0,0.0,74.0,21.75,8.12,8.12,152.0,8.25,7.81,7.77,8.76,7.58,8.06,7.89,8.48
2,E09A0934XPvAibaEXe8z3SRqW8+Nf8+oi46vCJ7g,1998.229202,0.871851,84.0,63.084912,7.54,7.54,60.0,7.68,7.55,7.34,6.782704,6.938936,6.884034,7.11743,6.755768
3,D26D2640XPvAibaEXe9mCnb3+Zz/Pc0K5w1JVWo1,1996.0,1.0,77.809338,23.5,0.0,0.0,0.0,0.0,6.533616,6.892076,6.782704,6.938936,6.884034,7.11743,6.755768
4,ADC5F05DXPvAibaEXe+3sZoWnetajz661V0/hMR1,1997.0,1.0,70.0,23.0,7.02,7.09,144.0,7.18,6.78,6.86,6.49,7.26,6.8,7.48,7.03


In [None]:
df_test = pd.read_excel('/content/drive/MyDrive/my_data_DS317/test.xlsx')
df_test.drop(['gioitinh'], axis=1)
df_test.head()

Unnamed: 0,mssv,namsinh,gioitinh,drl,diem_tt,dtb_toankhoa,dtb_tichluy,sotc_tichluy,diemtbhk_1,diemtbhk_2,diemtbhk_3,diemtbhk_4,diemtbhk_5,diemtbhk_6,diemtbhk_7,diemtbhk_8
0,B742F005XPvAibaEXe+3hDjf+K3BYYo/DWnzPahg,1999.0,1.0,80.0,24.75,6.49,6.49,145.0,6.3,5.25,5.58,5.65,6.72,7.02,6.17,5.64
1,D9B53F3BXPvAibaEXe+pX9RZhc1jhnwSxwIGVlLq,2001.0,1.0,90.0,22.8,8.0,8.0,122.0,8.41,8.48,7.05,8.75,8.7,7.44,7.91,6.755768
2,C60AAA47XPvAibaEXe+k7kwcfMfIzD1Pr6EnLJFI,2001.0,1.0,95.0,890.0,6.64,6.64,134.0,5.39,3.93,5.08,6.09,7.93,6.46,7.0,6.755768
3,BB726254XPvAibaEXe9fSn/pJ5xyKhcI2wYbq0bU,1998.229202,0.871851,98.0,63.084912,6.916518,7.313278,94.115988,6.987525,6.533616,6.892076,6.782704,6.938936,6.884034,7.11743,6.755768
4,DB98CBA3XPvAibaEXe+9I16dunN50FHH/hVCTI1x,1998.229202,0.871851,0.0,63.084912,6.916518,7.313278,94.115988,6.987525,6.533616,6.892076,6.782704,6.938936,6.884034,7.11743,6.755768


In [None]:

# Hàm xây dựng mô hình Phased LSTM
def build_phased_lstm(input_shape):
    model = Sequential([
        Input(shape=input_shape),
        LSTM(50, activation='tanh', recurrent_activation='sigmoid'),
        Dense(1)  # Đầu ra là một giá trị
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
    return model

# Độ đo
def calculate_metrics(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100 if np.all(y_true != 0) else np.nan
    mpe = np.mean((y_true - y_pred) / y_true) * 100 if np.all(y_true != 0) else np.nan
    r2 = r2_score(y_true, y_pred)
    return {"R2 Score": r2, "MSE": mse, "RMSE": rmse, "MAE": mae, "MAPE": mape, "MPE": mpe}

# Cột ban đầu cho X
initial_columns = ['namsinh', 'dtb_toankhoa', 'dtb_tichluy', 'sotc_tichluy', 'diemtbhk_1']

# Lặp qua từng cột làm y
results = []
current_X_columns = initial_columns.copy()

for i in range(len(initial_columns)+2, len(df.columns) - 1):  # Bắt đầu từ cột tiếp theo sau `initial_columns`
    if i +2 >= len(df.columns):
        break
    target_column = df.columns[i+2]  # Cột y hiện tại
    # Xác định X và y

    X_train = df[current_X_columns]
    y_train = df[target_column]

    X_test = df_test[current_X_columns]
    y_test = df_test[target_column]

    # Chuẩn hóa dữ liệu cho Phased LSTM
    X_train_lstm = np.expand_dims(X_train.to_numpy(), axis=-1)  # Thêm chiều
    X_test_lstm = np.expand_dims(X_test.to_numpy(), axis=-1)

    # Xây dựng và huấn luyện mô hình Phased LSTM
    lstm_model = build_phased_lstm(input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2]))
    lstm_model.fit(X_train_lstm, y_train, epochs=10, batch_size=32, verbose=1)

    # Dự đoán và tính toán độ đo
    y_pred = lstm_model.predict(X_test_lstm).flatten()
    metrics = calculate_metrics(y_test, y_pred)
    metrics["Target Column"] = target_column
    results.append(metrics)

    # Cập nhật X cho vòng tiếp theo
    current_X_columns.append(target_column)

# Tạo DataFrame kết quả
results_df = pd.DataFrame(results)

# Hiển thị kết quả
print(results_df)

# Lưu kết quả ra file CSV
results_df.to_csv("phased_lstm_results.csv", index=False)


Epoch 1/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 7.7110 - mae: 1.9273
Epoch 2/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 1.1724 - mae: 0.7014
Epoch 3/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.9334 - mae: 0.5901
Epoch 4/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.8789 - mae: 0.5690
Epoch 5/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.7934 - mae: 0.5395
Epoch 6/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.8037 - mae: 0.5356
Epoch 7/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.8181 - mae: 0.5410
Epoch 8/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.7687 - mae: 0.5185
Epoch 9/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms

In [None]:
results_df

Unnamed: 0,R2 Score,MSE,RMSE,MAE,MAPE,MPE,Target Column
0,0.676351,0.793789,0.890948,0.533659,,,diemtbhk_2
1,0.667771,0.566386,0.752586,0.42118,,,diemtbhk_3
2,0.525151,0.843481,0.918412,0.483507,,,diemtbhk_4
3,0.599793,0.567159,0.7531,0.405748,,,diemtbhk_5
4,0.44311,0.913689,0.955871,0.431936,,,diemtbhk_6
5,0.483732,0.688077,0.829504,0.447526,,,diemtbhk_7
6,0.186957,1.346249,1.16028,0.592345,,,diemtbhk_8


In [None]:
print(results_df['R2 Score'].mean())
print(results_df['MSE'].mean())
print(results_df['RMSE'].mean())
print(results_df['MAE'].mean())

0.5118377634307568
0.8169759155438222
0.8943859700058961
0.4737000722812522
