In [1]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, Dataset

from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from preprocessing_data.utils import format_Dataframes, preprocessing_dataframe

# Preprocessing Data

In [2]:
df = format_Dataframes(data_path="D:\-DSP391m-Forecasting-Financial-Time-Series-With-Transformer\craw_data\FPT_stock.xlsx",
                                 type_file="xlsx")
df

Unnamed: 0,Tên,Ngày,Đóng cửa,Điều chỉnh,Thay đổi,Thay đổi 1,%,Khối lượng (Khớp lệnh),Giá trị (Khớp lệnh),Khối lượng (Thỏa thuận),Giá trị (Thỏa thuận),Mở cửa,Cao nhất,Thấp nhất
0,FPT,2024-05-21,136.0,--,3(2.26,3(2.26%),%),,467.06,734.400,99.45,134.0,136.0,133.5
1,FPT,2024-05-20,133.0,--,-1.5(-1.12,-1.5(-1.12%),%),,270.50,,152.86,135.2,136.1,133.0
2,FPT,2024-05-17,134.5,--,-0.9(-0.66,-0.9(-0.66%),%),,310.27,642.601,86.43,135.4,136.0,133.7
3,FPT,2024-05-16,135.4,--,1.3(0.97,1.3(0.97%),%),,423.78,924.900,119.98,135.6,137.3,134.2
4,FPT,2024-05-15,134.1,--,3.6(2.76,3.6(2.76%),%),,485.07,,139.80,131.5,134.5,131.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4338,FPT,2006-12-19,486.0,2024-02-16 00:00:00,23(4.97,23(4.97%),%),137.52,0.07,0.000,0.00,486.0,486.0,486.0
4339,FPT,2006-12-18,463.0,15.44,22(4.99,22(4.99%),%),215.79,0.10,10.000,0.00,463.0,463.0,463.0
4340,FPT,2006-12-15,441.0,2024-07-14 00:00:00,21(5.00,21(5.00%),%),265.30,0.12,30.000,0.01,441.0,441.0,441.0
4341,FPT,2006-12-14,420.0,14,20(5.00,20(5.00%),%),280.71,0.12,96.000,0.04,420.0,420.0,420.0


In [3]:
df_process = preprocessing_dataframe(df)
df_process

Unnamed: 0_level_0,Đóng cửa,Khối lượng (Khớp lệnh),Giá trị (Khớp lệnh),Khối lượng (Thỏa thuận),Giá trị (Thỏa thuận),Mở cửa,Cao nhất,Thấp nhất
Ngày,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2006-12-13,0.581160,0.083545,0.000035,0.000000,0.000000,0.581226,0.574601,0.581292
2006-12-14,0.612771,0.280761,0.000142,0.096823,0.000049,0.612832,0.605881,0.612893
2006-12-15,0.645962,0.265348,0.000142,0.030257,0.000012,0.646018,0.638724,0.646074
2006-12-18,0.680733,0.215829,0.000118,0.010086,0.000000,0.680784,0.673131,0.680834
2006-12-19,0.717086,0.137545,0.000083,0.000000,0.000000,0.717130,0.709102,0.717175
...,...,...,...,...,...,...,...,...
2024-05-15,0.160898,0.386191,0.572746,0.099514,0.170915,0.156922,0.159368,0.156739
2024-05-16,0.162952,0.386191,0.500378,0.932829,0.146684,0.163401,0.163747,0.161321
2024-05-17,0.161530,0.386191,0.366351,0.648110,0.105667,0.163085,0.161714,0.160531
2024-05-20,0.159159,0.386191,0.319393,0.099514,0.186882,0.162769,0.161871,0.159425


In [4]:
scaler = MinMaxScaler()
data = scaler.fit_transform(df_process[["Đóng cửa"]].values.reshape(-1,1))
data

array([[0.58116011],
       [0.61277067],
       [0.64596175],
       ...,
       [0.16152995],
       [0.15915916],
       [0.16390074]])

# Data Loader

In [5]:
#Create dataset with time windows (window size);
def create_dataset(data, window_size=60):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i + window_size, 0])
        y.append(data[i + window_size, 0])
    return np.array(X), np.array(y)

window_size = 60
X, y = create_dataset(data, window_size)

#Convert to PyTorch tensor;
X = torch.tensor(X, dtype=torch.float32).unsqueeze(2)
y = torch.tensor(y, dtype=torch.float32)

#split train and test set;
train_size = int(len(X) * 0.75)
train_X, test_X = X[:train_size], X[train_size:]
train_y, test_y = y[:train_size], y[train_size:]

train_dataset = TensorDataset(train_X, train_y)
test_dataset = TensorDataset(test_X, test_y)

#data loader;
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Model and Training

In [6]:
class StockLSTM(nn.Module):
    def __init__(self, input_size=1, hidden_size=50, num_layers=10, output_size=1):
        super(StockLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = StockLSTM().to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 100

for epoch in range(num_epochs):
    model.train()
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [10/100], Loss: 0.0013
Epoch [20/100], Loss: 0.0427
Epoch [30/100], Loss: 0.0142
Epoch [40/100], Loss: 0.0009
Epoch [50/100], Loss: 0.0148
Epoch [60/100], Loss: 0.0012
Epoch [70/100], Loss: 0.0005
Epoch [80/100], Loss: 0.0045
Epoch [90/100], Loss: 0.0061
Epoch [100/100], Loss: 0.0007


# Evaluation

In [7]:
model.eval()
with torch.no_grad():
    predictions = []
    actuals = []
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        predictions.extend(outputs.cpu().numpy())
        actuals.extend(targets.cpu().numpy())

#Convert prediction and actual value to original value; 
predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))
actuals = scaler.inverse_transform(np.array(actuals).reshape(-1, 1))

#Evaluation;
mse = mean_squared_error(actuals, predictions)
mae = mean_absolute_error(actuals, predictions)
huber_loss = nn.SmoothL1Loss()(torch.tensor(predictions), torch.tensor(actuals)).item()

print(f'MSE: {mse:.4f}')
print(f'MAE: {mae:.4f}')
print(f'Huber Loss: {huber_loss:.4f}')

MSE: 0.0015
MAE: 0.0338
Huber Loss: 0.0007
