# 1 - Import requirements

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset

from tqdm import tqdm

# 2 - Prepare data

In [None]:
label_mapping = {
    'BUY': 0,
    'SELL': 1,
    'HOLD': 2
}

def map_label(x):
    return label_mapping[x] if x in label_mapping else x

In [None]:
def load_shape(shape_path):
    with open(shape_path, 'r') as f:
        shape = f.readlines()
        n_samples = int(shape[0])
        seq_len = int(shape[1])
        n_features = int(shape[2])
    return n_samples, seq_len, n_features

class TradingDataset(Dataset):
    def __init__(self, save_path, n_samples, sequence_length, n_features):
        self.save_path = save_path
        self.n_samples = n_samples
        self.sequences = np.memmap(f'{save_path}/sequences.dat', dtype=np.float32, mode='r', 
                                 shape=(n_samples, sequence_length, n_features))
        self.labels = np.memmap(f'{save_path}/labels.dat', dtype=np.int64, mode='r', 
                              shape=(n_samples,))
    
    def __len__(self):
        return self.n_samples
    
    def __getitem__(self, idx):
        seq = self.sequences[idx].copy()  # Tạo bản sao writable
        lbl = self.labels[idx].copy()
        return torch.from_numpy(seq).float(), torch.from_numpy(np.array([lbl])).long()[0]

def prepare_transformer_input(train_shape_path, val_shape_path, test_shape_path, data_path, batch_size=32):    
    n_train_samples, sequence_length, n_features = load_shape(train_shape_path)
    n_val_samples, _, _ = load_shape(val_shape_path)
    n_test_samples, _, _ = load_shape(test_shape_path)
    
    train_path = f'{data_path}/train'
    val_path = f'{data_path}/val'
    test_path = f'{data_path}/test'
    
    # Tạo datasets
    train_dataset = Subset(TradingDataset(train_path, n_train_samples, sequence_length, n_features), 
                           range(n_train_samples - 100000, n_train_samples))
    val_dataset = Subset(TradingDataset(val_path, n_val_samples, sequence_length, n_features), 
                           range(n_val_samples - 10000, n_val_samples))
    test_dataset = Subset(TradingDataset(test_path, n_test_samples, sequence_length, n_features), 
                           range(n_test_samples - 1000, n_test_samples))
    
    # Tạo dataloaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader, test_loader

In [None]:
train_shape_path = 'data/train/train_shape.txt'
val_shape_path = 'data/val/val_shape.txt'
test_shape_path = 'data/test/test_shape.txt'


train_loader, val_loader, test_loader = prepare_transformer_input(
    train_shape_path, val_shape_path, test_shape_path,
    data_path='data',
    batch_size=64
)

In [14]:
# Kiểm tra
sample_batch = next(iter(train_loader))
print("Batch input shape:", sample_batch[0].shape)
print("Batch labels shape:", sample_batch[1].shape)
print("\nExample input shape for Transformer:", sample_batch[0][0].shape)
print(sample_batch[0][0])
print("Number of batches:", len(train_loader))

Batch input shape: torch.Size([64, 60, 24])
Batch labels shape: torch.Size([64])

Example input shape for Transformer: torch.Size([60, 24])
tensor([[ 1.4084e+03,  1.4089e+03,  1.3930e+03,  ..., -9.6593e-01,
          7.8183e-01,  6.2349e-01],
        [ 1.3953e+03,  1.3971e+03,  1.3854e+03,  ..., -8.6603e-01,
          7.8183e-01,  6.2349e-01],
        [ 1.3876e+03,  1.3890e+03,  1.3816e+03,  ..., -7.0711e-01,
          7.8183e-01,  6.2349e-01],
        ...,
        [ 1.3674e+03,  1.3674e+03,  1.3651e+03,  ..., -2.5882e-01,
         -4.3388e-01, -9.0097e-01],
        [ 1.3654e+03,  1.3665e+03,  1.3605e+03,  ..., -5.0000e-01,
         -4.3388e-01, -9.0097e-01],
        [ 1.3629e+03,  1.3630e+03,  1.3566e+03,  ..., -7.0711e-01,
         -4.3388e-01, -9.0097e-01]])
Number of batches: 53851


  return (torch.from_numpy(self.sequences[idx]).float(),


In [15]:
from torch.nn import TransformerEncoder, TransformerEncoderLayer
class TradingTransformer(nn.Module):
    def __init__(self, 
                 num_features, 
                 num_classes=3, 
                 d_model=64,
                 nhead=8,
                 num_layers=3,
                 dropout=0.1):
        super().__init__()
        
        # 1. Feature Projection
        self.input_proj = nn.Linear(num_features, d_model)
        
        # 2. Positional Encoding (Learnable)
        self.pos_encoder = LearnablePositionalEncoding(d_model, dropout)
        
        # 3. Transformer Encoder
        encoder_layers = TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=d_model*4,
            dropout=dropout,
            batch_first=True 
        )
        self.transformer_encoder = TransformerEncoder(encoder_layers, num_layers)
        
        # 4. Classification Head
        self.classifier = nn.Sequential(
            nn.Linear(d_model, d_model//2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(d_model//2, num_classes)
        )
        
        # 5. Initialize weights
        self.init_weights()

    def init_weights(self):
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)

    def forward(self, src):
        """
        Args:
            src: Tensor shape [batch_size, seq_len, num_features]
        Returns:
            output: Tensor shape [batch_size, num_classes]
        """
        # Project input features
        x = self.input_proj(src)  # [B, S, D]
        
        # Add positional encoding
        x = self.pos_encoder(x)
        
        # Transformer processing
        memory = self.transformer_encoder(x)  # [B, S, D]
        
        # Get last time step output
        last_output = memory[:, -1, :]  # [B, D]
        
        # Classification
        return self.classifier(last_output)


class LearnablePositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        self.position_emb = nn.Parameter(torch.zeros(max_len, d_model))
        nn.init.normal_(self.position_emb, mean=0, std=0.02)

    def forward(self, x):
        """
        Args:
            x: Tensor shape [B, S, D]
        """
        positions = self.position_emb[:x.size(1), :]  # [S, D]
        x = x + positions.unsqueeze(0)  # [B, S, D]
        return self.dropout(x)

In [16]:
N_FEATURES = 24
model = TradingTransformer(N_FEATURES)

model(sample_batch[0]).shape, sample_batch[1].shape

(torch.Size([64, 3]), torch.Size([64]))

In [17]:
from torchinfo import summary
print(summary(model, (64, 60, 24)))

Layer (type:depth-idx)                        Output Shape              Param #
TradingTransformer                            [64, 3]                   --
├─Linear: 1-1                                 [64, 60, 64]              1,600
├─LearnablePositionalEncoding: 1-2            [64, 60, 64]              320,000
│    └─Dropout: 2-1                           [64, 60, 64]              --
├─TransformerEncoder: 1-3                     [64, 60, 64]              --
│    └─ModuleList: 2-2                        --                        --
│    │    └─TransformerEncoderLayer: 3-1      [64, 60, 64]              49,984
│    │    └─TransformerEncoderLayer: 3-2      [64, 60, 64]              49,984
│    │    └─TransformerEncoderLayer: 3-3      [64, 60, 64]              49,984
├─Sequential: 1-4                             [64, 3]                   --
│    └─Linear: 2-3                            [64, 32]                  2,080
│    └─ReLU: 2-4                              [64, 32]                  

  return torch._transformer_encoder_layer_fwd(


In [None]:
class EarlyStopping:
    def __init__(self, patience=3, min_delta=0.001):
        """
        patience: Số epoch chờ mà không cải thiện trước khi dừng
        min_delta: Độ cải thiện tối thiểu để coi là tốt hơn
        """
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter}/{self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0
            
def eval_model(model, val_loader, criterion, device):
    model.to(device)
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in tqdm(val_loader, unit='batch', desc='\tEvaluating: '):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / len(val_loader)
    epoch_acc = 100 * correct / total
    
    print(f'\tValidation Loss: {epoch_loss:.4f}, Validation Accuracy: {epoch_acc:.2f}%')
    return epoch_loss, epoch_acc

def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs, device, patience=3):
    model.to(device)
    
    train_losses = []
    train_accs = []
    val_losses = []
    val_accs = []
    
    early_stopping = EarlyStopping(patience=patience, min_delta=0.001)
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for images, labels in tqdm(train_loader, unit='batch', desc=f'Training [{epoch + 1}/{num_epochs}]: '):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100 * correct / total
        train_losses.append(epoch_loss)
        train_accs.append(epoch_acc)
        print(f'\tTrain Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_acc:.2f}%')

        val_loss, val_acc = eval_model(model, val_loader, criterion, device)
        val_losses.append(val_loss)
        val_accs.append(val_acc)
        
        scheduler.step()
        
        # Kiểm tra Early Stopping
        early_stopping(val_loss)
        if early_stopping.early_stop:
            print("Early stopping triggered!")
            break
        print('===================================================')
    
    return train_losses, train_accs, val_losses, val_accs

In [19]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [20]:
NUM_EPOCHS = 20
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

torch.cuda.empty_cache()
train_losses, train_accs, val_losses, val_accs = train_model(model, 
                                                             train_loader, val_loader, 
                                                             criterion, optimizer, scheduler, 
                                                             NUM_EPOCHS, DEVICE)

Training [1/20]: 100%|██████████| 53851/53851 [15:17<00:00, 58.69batch/s]


	Train Loss: 1.0288, Train Accuracy: 49.22%


	Evaluating: 100%|██████████| 5483/5483 [00:30<00:00, 179.51batch/s]


	Validation Loss: 1.0565, Validation Accuracy: 45.96%


Training [2/20]: 100%|██████████| 53851/53851 [15:58<00:00, 56.15batch/s]


	Train Loss: 1.0268, Train Accuracy: 49.41%


	Evaluating: 100%|██████████| 5483/5483 [00:32<00:00, 169.60batch/s]


	Validation Loss: 1.0553, Validation Accuracy: 45.96%


Training [3/20]: 100%|██████████| 53851/53851 [15:57<00:00, 56.26batch/s]


	Train Loss: 1.0266, Train Accuracy: 49.42%


	Evaluating: 100%|██████████| 5483/5483 [00:32<00:00, 169.46batch/s]


	Validation Loss: 1.0560, Validation Accuracy: 45.96%
EarlyStopping counter: 1/7


Training [4/20]: 100%|██████████| 53851/53851 [16:12<00:00, 55.39batch/s]


	Train Loss: 1.0300, Train Accuracy: 49.07%


	Evaluating: 100%|██████████| 5483/5483 [00:36<00:00, 149.30batch/s]


	Validation Loss: 1.0606, Validation Accuracy: 45.96%
EarlyStopping counter: 2/7


Training [5/20]: 100%|██████████| 53851/53851 [16:23<00:00, 54.77batch/s]


	Train Loss: 1.0316, Train Accuracy: 48.72%


	Evaluating: 100%|██████████| 5483/5483 [00:36<00:00, 152.09batch/s]


	Validation Loss: 1.0632, Validation Accuracy: 45.96%
EarlyStopping counter: 3/7


Training [6/20]: 100%|██████████| 53851/53851 [16:21<00:00, 54.84batch/s]


	Train Loss: 1.0328, Train Accuracy: 48.40%


	Evaluating: 100%|██████████| 5483/5483 [00:36<00:00, 151.69batch/s]


	Validation Loss: 1.0632, Validation Accuracy: 45.96%
EarlyStopping counter: 4/7


Training [7/20]: 100%|██████████| 53851/53851 [16:24<00:00, 54.69batch/s]


	Train Loss: 1.0328, Train Accuracy: 48.40%


	Evaluating: 100%|██████████| 5483/5483 [00:36<00:00, 150.52batch/s]


	Validation Loss: 1.0632, Validation Accuracy: 45.96%
EarlyStopping counter: 5/7


Training [8/20]: 100%|██████████| 53851/53851 [16:21<00:00, 54.85batch/s]


	Train Loss: 1.0451, Train Accuracy: 46.94%


	Evaluating: 100%|██████████| 5483/5483 [00:37<00:00, 145.71batch/s]


	Validation Loss: 1.0539, Validation Accuracy: 45.96%


Training [9/20]: 100%|██████████| 53851/53851 [16:22<00:00, 54.82batch/s]


	Train Loss: 1.0453, Train Accuracy: 46.72%


	Evaluating: 100%|██████████| 5483/5483 [00:37<00:00, 145.10batch/s]


	Validation Loss: 1.0539, Validation Accuracy: 45.96%
EarlyStopping counter: 1/7


Training [10/20]: 100%|██████████| 53851/53851 [16:19<00:00, 54.98batch/s]


	Train Loss: 1.0453, Train Accuracy: 46.72%


	Evaluating: 100%|██████████| 5483/5483 [00:35<00:00, 154.14batch/s]


	Validation Loss: 1.0539, Validation Accuracy: 45.96%
EarlyStopping counter: 2/7


Training [11/20]: 100%|██████████| 53851/53851 [16:19<00:00, 54.99batch/s]


	Train Loss: 1.0453, Train Accuracy: 46.72%


	Evaluating: 100%|██████████| 5483/5483 [00:34<00:00, 156.91batch/s]


	Validation Loss: 1.0539, Validation Accuracy: 45.96%
EarlyStopping counter: 3/7


Training [12/20]:  73%|███████▎  | 39173/53851 [11:52<04:27, 54.95batch/s]


KeyboardInterrupt: 

In [21]:
import matplotlib.pyplot as plt
def plot_training_results(train_losses, train_accs, val_losses, val_accs):
    """
    Vẽ biểu đồ kết quả huấn luyện: loss và accuracy cho train và validation.
    
    Parameters:
    - train_losses: List các giá trị loss của train qua các epoch
    - train_accs: List các giá trị accuracy của train qua các epoch
    - val_losses: List các giá trị loss của validation qua các epoch
    - val_accs: List các giá trị accuracy của validation qua các epoch
    """
    epochs = range(1, len(train_losses) + 1)
    
    # Tạo figure với 2x2 subplot
    plt.figure(figsize=(12, 8))
    
    # Subplot 1: Train Loss
    plt.subplot(2, 2, 1)
    plt.plot(epochs, train_losses, 'b-', label='Train Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training Loss')
    plt.legend()
    plt.grid(True)
    
    # Subplot 2: Train Accuracy
    plt.subplot(2, 2, 2)
    plt.plot(epochs, train_accs, 'g-', label='Train Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.title('Training Accuracy')
    plt.legend()
    plt.grid(True)
    
    # Subplot 3: Validation Loss
    plt.subplot(2, 2, 3)
    plt.plot(epochs, val_losses, 'r-', label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Validation Loss')
    plt.legend()
    plt.grid(True)
    
    # Subplot 4: Validation Accuracy
    plt.subplot(2, 2, 4)
    plt.plot(epochs, val_accs, 'm-', label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.title('Validation Accuracy')
    plt.legend()
    plt.grid(True)
    
    # Điều chỉnh layout và hiển thị
    plt.tight_layout()
    plt.show()

plot_training_results(train_losses, train_accs, val_losses, val_accs)

NameError: name 'train_losses' is not defined

In [22]:
torch.save(model.state_dict(), 'model.pth')

In [None]:
def test_model(model, test_loader, device='cuda' if torch.cuda.is_available() else 'cpu'):
    model.to(device)
    model.eval()

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            batch_x = batch_x.to(device)  # [B, seq_len, n_features]
            batch_y = batch_y.to(device)

            outputs = model(batch_x)  # Expecting [B, num_classes]
            preds = torch.argmax(outputs, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(batch_y.cpu().numpy())

    target_names = ['BUY', 'SELL', 'HOLD']
    print("Classification Report:")
    print(classification_report(all_labels, all_preds, target_names=target_names, digits=4))

In [None]:
test_model(model, train_loader, DEVICE)

In [None]:
test_model(model, val_loader, DEVICE)

In [None]:
test_model(model, test_loader, DEVICE)