In [59]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

file_path = 'PNJ.csv'
df = pd.read_csv(file_path)

df['Date/Time'] = pd.to_datetime(df['Date/Time'])

# Tạo cột biến động giá và các đặc trưng bổ sung
df['Price Change'] = df['Close'].shift(-1) - df['Close']
df['Price Change %'] = (df['Close'] - df['Open']) / df['Open']
df['Price Change day'] = df['Close'].diff() 
df['Close-to-Open Ratio'] = df['Close'] / df['Open']
df['High-Low Difference'] = df['High'] - df['Low']
df['VMA_10'] = df['Volume'].rolling(window=10).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()
df['SMA_10'] = df['Close'].rolling(window=10).mean()
df['SMA_20'] = df['Close'].rolling(window=20).mean()
df['STD_20'] = df['Close'].rolling(window=20).std()
df['Upper Band'] = df['SMA_20'] + 2 * df['STD_20']
df['Lower Band'] = df['SMA_20'] - 2 * df['STD_20']
df['Momentum'] = df['Close'].diff(4)  
df = df.dropna()

# Xác định lại đặc trưng (X) và nhãn (y)
X = df[['Open', 'High', 'Low', 'Close', 'Volume', 'Price Change %', 'SMA_10', 'SMA_20', 'STD_20', 'Upper Band', 'Lower Band', 'Momentum', 'Price Change', 'Price Change day', 'Close-to-Open Ratio', 'High-Low Difference', 'VMA_10', 'EMA_10']]
y = df['Price Change']

# Chuẩn hóa dữ liệu (Standardization)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Chia tập dữ liệu thành tập huấn luyện và kiểm tra
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, shuffle=False)


In [60]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Mô hình Neuron Network 
model_nn = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])

model_nn.compile(optimizer='adam', loss='mean_squared_error')
model_nn.fit(X_train, y_train, epochs=100, batch_size=32)

y_pred_nn = model_nn.predict(X_test)

# Đánh giá mô hình
mae_nn = mean_absolute_error(y_test, y_pred_nn)
rmse_nn = np.sqrt(mean_squared_error(y_test, y_pred_nn))
r2_nn = r2_score(y_test, y_pred_nn)

print(f"MAE (NN): {mae_nn}")
print(f"RMSE (NN): {rmse_nn}")
print(f"R² Score (NN): {r2_nn}")


Epoch 1/100
[1m3133/3133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 381us/step - loss: 0.0372
Epoch 2/100
[1m3133/3133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 368us/step - loss: 5.8334e-04
Epoch 3/100
[1m3133/3133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 367us/step - loss: 2.1858e-04
Epoch 4/100
[1m3133/3133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 393us/step - loss: 1.5655e-04
Epoch 5/100
[1m3133/3133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 410us/step - loss: 8.5271e-05
Epoch 6/100
[1m3133/3133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 384us/step - loss: 6.8101e-05
Epoch 7/100
[1m3133/3133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 374us/step - loss: 6.4596e-05
Epoch 8/100
[1m3133/3133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 386us/step - loss: 4.4755e-05
Epoch 9/100
[1m3133/3133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 386us/step - loss: 3.0395e-05
Epoch 10/100


In [62]:
new_data = X.tail(1)[['Open', 'High', 'Low', 'Close', 'Volume', 'Price Change %', 'SMA_10', 'SMA_20', 'STD_20', 'Upper Band', 'Lower Band', 'Momentum', 'Price Change', 'Price Change day', 'Close-to-Open Ratio', 'High-Low Difference', 'VMA_10', 'EMA_10']].values

new_prediction = model_nn.predict(new_data)

prediction_time = df.iloc[-1]['Date/Time'] + pd.Timedelta(days=1)

print(f"Prediction for {prediction_time}) by Neuron network: {new_prediction[0][0]}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step
Prediction for 2020-12-23 14:25:00) by Neuron network: -4.473906517028809


In [63]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Mô hình Random Forest Regressor
model_rf = RandomForestRegressor(n_estimators=100, random_state=42)
model_rf.fit(X_train, y_train)

y_pred_rf = model_rf.predict(X_test)

mae_rf = mean_absolute_error(y_test, y_pred_rf)
rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_rf))
r2_rf = r2_score(y_test, y_pred_rf)

print(f"MAE (Random Forest): {mae_rf}")
print(f"RMSE (Random Forest): {rmse_rf}")
print(f"R² Score (Random Forest): {r2_rf}")

MAE (Random Forest): 0.00018703807167568605
RMSE (Random Forest): 0.002888497476371259
R² Score (Random Forest): 0.9996516036136233


In [67]:
new_data = X.tail(1).values
new_data = X.tail(1)

# Chuẩn hóa giữ lại tên cột
new_data_scaled = scaler.transform(new_data)

new_prediction = model_rf.predict(new_data_scaled)

print(f"Prediction for {prediction_time} by Ramdom Forest : {new_prediction[0]}")

Prediction for 2020-12-23 14:25:00 by Ramdom Forest : 0.20000000000000026


In [71]:
from sklearn.ensemble import GradientBoostingRegressor

# Mô hình Gradient Boosting Regressor
model_gb = GradientBoostingRegressor(n_estimators=100, random_state=42)
model_gb.fit(X_train, y_train)

y_pred_gb = model_gb.predict(X_test)

mae_gb = mean_absolute_error(y_test, y_pred_gb)
rmse_gb = np.sqrt(mean_squared_error(y_test, y_pred_gb))
r2_gb = r2_score(y_test, y_pred_gb)

print(f"MAE (Gradient Boosting): {mae_gb}")
print(f"RMSE (Gradient Boosting): {rmse_gb}")
print(f"R² Score (Gradient Boosting): {r2_gb}")

MAE (Gradient Boosting): 0.001028493957500107
RMSE (Gradient Boosting): 0.005656475058924286
R² Score (Gradient Boosting): 0.9986639540035934


In [70]:
new_data = X.tail(1)  # Lấy dòng cuối cùng trong dữ liệu (DataFrame)
new_data_scaled = scaler.transform(new_data)  # Chuẩn hóa dữ liệu mới

# Dự đoán giá cổ phiếu cho dòng dữ liệu mới
new_prediction = model_gb.predict(new_data_scaled)

# In kết quả dự đoán cho dòng dữ liệu mới
print(f"Prediction for {prediction_time} by Gradient Boosting: {new_prediction[0]}")


Prediction for 2020-12-23 14:25:00 by Gradient Boosting: 0.19924321606786835
