# 1 - Import requirements

In [None]:
# !pip install pytorch-optimizer

In [None]:
import os
import math
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight

import torch
import torch.nn as nn
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.utils.data import Dataset, DataLoader, Subset
import torch.nn.functional as F
# from pytorch_optimizer import SAM

from tqdm import tqdm

# 2 - Prepare data

In [None]:
label_mapping = {
    'BUY': 0,
    'SELL': 1,
    'HOLD': 2
}

def map_label(x):
    return label_mapping[x] if x in label_mapping else x

In [None]:
def load_shape(shape_path):
    with open(shape_path, 'r') as f:
        shape = f.readlines()
        n_samples = int(shape[0])
        seq_len = int(shape[1])
        n_features = int(shape[2])
    return n_samples, seq_len, n_features

class TradingDataset(Dataset):
    def __init__(self, save_path, n_samples, sequence_length, n_features):
        self.save_path = save_path
        self.n_samples = n_samples
        self.sequences = np.memmap(f'{save_path}/sequences.dat', dtype=np.float32, mode='r', 
                                 shape=(n_samples, sequence_length, n_features))
        # self.sequences = self.sequences[:,:-10]
        self.labels = np.memmap(f'{save_path}/labels.dat', dtype=np.int64, mode='r', 
                              shape=(n_samples,))
    
    def __len__(self):
        return self.n_samples
    
    def __getitem__(self, idx):
        seq = self.sequences[idx].copy()  # Tạo bản sao writable
        lbl = self.labels[idx].copy()
        return torch.from_numpy(seq).float(), torch.from_numpy(np.array([lbl])).long()[0]

def prepare_transformer_input(train_shape_path, val_shape_path, test_shape_path, data_path, batch_size=32):    
    n_train_samples, sequence_length, n_features = load_shape(train_shape_path)
    n_val_samples, _, _ = load_shape(val_shape_path)
    n_test_samples, _, _ = load_shape(test_shape_path)
    
    train_path = f'{data_path}/train'
    val_path = f'{data_path}/val'
    test_path = f'{data_path}/test'
    
    # Tạo datasets
    train_dataset = TradingDataset(train_path, n_train_samples, sequence_length, n_features)
    val_dataset = TradingDataset(val_path, n_val_samples, sequence_length, n_features)
    test_dataset = TradingDataset(test_path, n_test_samples, sequence_length, n_features)
    
    # Tạo dataloaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader, test_loader

In [None]:
train_shape_path = '/kaggle/input/processed-xauusd/xau5m/train/shape.txt'
val_shape_path = '/kaggle/input/processed-xauusd/xau5m/val/shape.txt'
test_shape_path = '/kaggle/input/processed-xauusd/xau5m/test/shape.txt'

train_loader, val_loader, test_loader = prepare_transformer_input(
    train_shape_path, val_shape_path, test_shape_path,
    data_path='/kaggle/input/processed-xauusd/xau5m',
    batch_size=32
)

In [None]:
# Kiểm tra
sample_batch = next(iter(train_loader))
print("Batch input shape:", sample_batch[0].shape)
print("Batch labels shape:", sample_batch[1].shape)
print("\nExample input shape for Transformer:", sample_batch[0][0].shape)
print(sample_batch[0][0])
print("Number of batches:", len(train_loader))

# 3 - Build model

In [None]:
class InceptionModule(nn.Module):
    def __init__(self, in_channels):
        super().__init__()
        self.branch1 = nn.Conv1d(in_channels, 32, kernel_size=1, padding='same')
        self.branch3 = nn.Conv1d(in_channels, 32, kernel_size=3, padding='same')
        self.branch5 = nn.Conv1d(in_channels, 32, kernel_size=5, padding='same')
        self.branch_pool = nn.Sequential(
            nn.MaxPool1d(kernel_size=3, stride=1, padding=1),
            nn.Conv1d(in_channels, 32, kernel_size=1)
        )

    def forward(self, x):
        return torch.cat([self.branch1(x), self.branch3(x), self.branch5(x), self.branch_pool(x)], dim=1)

class Time2Vec(nn.Module):
    def __init__(self, kernel_size, max_len=512):
        super().__init__()
        self.linear = nn.Linear(1, 1)
        self.periodic = nn.Linear(1, kernel_size - 1)
        self.register_buffer('t', torch.arange(max_len).float().unsqueeze(1)) # [max_len, 1]

    def forward(self, x):
        # x = [batch_size, seq_len, n_features]
        seq_len = x.size(1)
        t = self.t[:seq_len] # [seq_len, 1]
        lin = self.linear(t) # [seq_len, 1]
        sin = torch.sin(self.periodic(t)) # [seq_len, k - 1]
        time_emb = torch.cat([lin, sin], dim=1) # [seq_len, k]
        time_emb = time_emb.unsqueeze(0).repeat(x.size(0), 1, 1) # [batch_size, seq_len, k]
        return torch.cat([x, time_emb], dim=-1)
        # return x + time_emb

class CrossAttentionFusion(nn.Module):
    def __init__(self, cnn_dim, transformer_dim):
        super().__init__()
        self.query = nn.Linear(cnn_dim, transformer_dim)
        self.key = nn.Linear(transformer_dim, transformer_dim)
        self.value = nn.Linear(transformer_dim, transformer_dim)
        
    def forward(self, cnn_features, transformer_features):
        Q = self.query(cnn_features).unsqueeze(1)  # [batch, 1, transformer_dim]
        K = self.key(transformer_features)         # [batch, seq_len, transformer_dim]
        V = self.value(transformer_features)       # [batch, seq_len, transformer_dim]
        
        attn_scores = (Q @ K.transpose(-2, -1)) / (K.size(-1) ** 0.5)  # [batch, 1, seq_len]
        attn_weights = torch.softmax(attn_scores, dim=-1)
        
        return torch.bmm(attn_weights, V).squeeze(1)  # [batch, transformer_dim]

class HighwayNetwork(nn.Module):
    def __init__(self, d_model):
        super().__init__()
        self.gate = nn.Sequential(
            nn.Linear(d_model, d_model),
            nn.Sigmoid()
        )
        
    def forward(self, fused, transformer):
        g = self.gate(fused)
        return g * fused + (1 - g) * transformer

class EnhancedHybridModel(nn.Module):
    def __init__(self, num_features, num_classes=3, d_model=512, nhead=16, dim_feedforward=1024, num_layers=6):
        super().__init__()
        # 1. InceptionTime Branch
        self.inception = nn.Sequential(
            InceptionModule(num_features),
            nn.ReLU(),
            nn.MaxPool1d(2),
            InceptionModule(128),
            nn.ReLU()
        )
        
        # 2. Transformer Branch
        self.time2vec = Time2Vec(num_features)
        self.transformer_proj = nn.Linear(num_features * 2, d_model)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        # 3. Fusion
        self.cross_attention = CrossAttentionFusion(128, d_model)
        self.highway = HighwayNetwork(d_model)
        
        # 4. Classifier
        self.classifier = nn.Sequential(
            nn.Linear(d_model, 128),
            nn.LayerNorm(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        # 1. Inception Path
        cnn_features = self.inception(x.permute(0, 2, 1))  # [batch, channels, seq_len//2]
        cnn_features = cnn_features.mean(dim=-1)          # [batch, channels=128]
        
        # 2. Transformer Path
        x_proj = self.transformer_proj(self.time2vec(x))  # [batch, seq_len, d_model]
        transformer_features = self.transformer(x_proj)      # [batch, seq_len, d_model]
        
        # 3. Fusion
        fused = self.cross_attention(cnn_features, transformer_features)  # [batch, d_model]
        output = self.highway(fused, transformer_features.mean(dim=1))   # [batch, d_model]
        
        return self.classifier(output)

In [None]:
N_FEATURES = 15
model = EnhancedHybridModel(num_features=N_FEATURES) 

In [None]:
# class Time2Vec(nn.Module):
#     def __init__(self, k, max_len=512):
#         super().__init__()
#         self.linear = nn.Linear(1, 1)
#         self.periodic = nn.Linear(1, k - 1)
#         self.register_buffer('t', torch.arange(max_len).float().unsqueeze(1)) # [max_len, 1]

#     def forward(self, x):
#         # x = [batch_size, seq_len, n_features]
#         seq_len = x.size(1)
#         t = self.t[:seq_len] # [seq_len, 1]
#         lin = self.linear(t) # [seq_len, 1]
#         sin = torch.sin(self.periodic(t)) # [seq_len, k - 1]
#         time_emb = torch.cat([lin, sin], dim=1) # [seq_len, k]
#         time_emb = time_emb.unsqueeze(0).repeat(x.size(0), 1, 1) # [batch_size, seq_len, k]
#         return torch.cat([x, time_emb], dim=-1)

# class TradingTF(nn.Module):
#     def __init__(self, seq_len, num_classes, input_dim, time2vec_dim, d_model, nhead, num_layers, dropout=0.5):
#         super().__init__()
#         self.time2vec = Time2Vec(time2vec_dim)
#         self.input_projection = nn.Linear(input_dim + time2vec_dim, d_model)
#         encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, batch_first=True)
#         self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
#         self.global_avg_pool = nn.AdaptiveAvgPool1d(1)
#         self.dropout = nn.Dropout(dropout)
#         self.fc1 = nn.Linear(d_model, 32)
#         self.fc2 = nn.Linear(32, num_classes)

#     def forward(self, x):
#         x = self.time2vec(x)
#         x = self.input_projection(x)
#         x = self.encoder(x)
#         x = x.transpose(1, 2)
#         x = self.global_avg_pool(x).squeeze(-1)
#         x = self.dropout(x)
#         x = F.relu((self.fc1(x)))
#         x = self.dropout(x)
#         return self.fc2(x)

# model = TradingTF(seq_len=128, num_classes=3, input_dim=21, time2vec_dim=16, d_model=128, nhead=4, num_layers=2, dropout=0.1)

In [None]:
model(sample_batch[0]).shape, sample_batch[1].shape

In [None]:
from torchinfo import summary
print(summary(model, (32, 60, 15)))

# 4 - Train and Evaluate model

In [None]:
class EarlyStopping:
    def __init__(self, patience=3, min_delta=0.001):
        """
        patience: Số epoch chờ mà không cải thiện trước khi dừng
        min_delta: Độ cải thiện tối thiểu để coi là tốt hơn
        """
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter}/{self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0
            
def eval_model(model, val_loader, criterion, device):
    model.to(device)
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    loop = tqdm(val_loader, unit='batch', desc='\tEvaluating: ')
    with torch.no_grad():
        for i, (images, labels) in enumerate(loop):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, -1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            loop.set_postfix(loss=(running_loss / (i + 1)))
    
    epoch_loss = running_loss / len(val_loader)
    epoch_acc = 100 * correct / total
    
    return epoch_loss, epoch_acc

def train_model(model, train_loader, criterion, optimizer, device, scheduler=None):
    model.to(device)
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    loop = tqdm(train_loader, unit='batch', desc=f'\tTraining: ')
    for i, (images, labels) in enumerate(loop):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, -1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        loop.set_postfix(loss=(running_loss / (i + 1)))
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100 * correct / total

    if scheduler is not None:
        scheduler.step()
    
    return epoch_loss, epoch_acc

In [None]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

n_train_samples, _, _ = load_shape('/kaggle/input/processed-xauusd/classification/train/shape.txt')
train_labels = np.memmap(f'/kaggle/input/processed-xauusd/classification/train/labels.dat', dtype=np.int64, mode='r', 
                              shape=(n_train_samples,))
# class_weights = compute_class_weight(
#     'balanced', 
#     classes=np.unique(train_labels), 
#     y=train_labels
# )
# criterion = nn.CrossEntropyLoss(weight=torch.tensor(class_weights, dtype=torch.float32).to(DEVICE))
criterion = nn.CrossEntropyLoss(ignore_index=2)

optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
NUM_EPOCHS = 50
PATIENCE = 10
MIN_DELTA = 0.0001
torch.cuda.empty_cache()

# model = nn.DataParallel(model)
    
train_losses = []
train_accs = []
val_losses = []
val_accs = []

early_stopping = EarlyStopping(patience=PATIENCE, min_delta=MIN_DELTA)

for epoch in range(NUM_EPOCHS):
    print(f'Epoch [{epoch + 1}/{NUM_EPOCHS}]')
    
    train_loss, train_acc = train_model(model, train_loader, criterion, optimizer, DEVICE)
    val_loss, val_acc = eval_model(model, val_loader, criterion, DEVICE)
    
    print(f'\tTrain Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%')
    print(f'\tVal Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.2f}%')
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    val_losses.append(val_loss)
    val_accs.append(val_acc)
    
    # Kiểm tra Early Stopping
    early_stopping(val_loss)
    if early_stopping.early_stop:
        print("Early stopping triggered!")
        break
    print('===================================================')

In [None]:
import matplotlib.pyplot as plt
def plot_training_results(train_losses, train_accs, val_losses, val_accs):
    """
    Vẽ biểu đồ kết quả huấn luyện: loss và accuracy cho train và validation.
    
    Parameters:
    - train_losses: List các giá trị loss của train qua các epoch
    - train_accs: List các giá trị accuracy của train qua các epoch
    - val_losses: List các giá trị loss của validation qua các epoch
    - val_accs: List các giá trị accuracy của validation qua các epoch
    """
    epochs = range(1, len(train_losses) + 1)
    
    # Tạo figure với 2x2 subplot
    plt.figure(figsize=(12, 8))
    
    # Subplot 1: Train Loss
    plt.subplot(2, 2, 1)
    plt.plot(epochs, train_losses, 'b-', label='Train Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training Loss')
    plt.legend()
    plt.grid(True)
    
    # Subplot 2: Train Accuracy
    plt.subplot(2, 2, 2)
    plt.plot(epochs, train_accs, 'g-', label='Train Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.title('Training Accuracy')
    plt.legend()
    plt.grid(True)
    
    # Subplot 3: Validation Loss
    plt.subplot(2, 2, 3)
    plt.plot(epochs, val_losses, 'r-', label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Validation Loss')
    plt.legend()
    plt.grid(True)
    
    # Subplot 4: Validation Accuracy
    plt.subplot(2, 2, 4)
    plt.plot(epochs, val_accs, 'm-', label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.title('Validation Accuracy')
    plt.legend()
    plt.grid(True)
    
    # Điều chỉnh layout và hiển thị
    plt.tight_layout()
    plt.show()

plot_training_results(train_losses, train_accs, val_losses, val_accs)

In [None]:
torch.save(model.state_dict(), 'model.pth')

In [None]:
def test_model(model, test_loader, device='cuda' if torch.cuda.is_available() else 'cpu'):
    model.to(device)
    model.eval()

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            batch_x = batch_x.to(device)  # [B, seq_len, n_features]
            batch_y = batch_y.to(device)

            outputs = model(batch_x)  # Expecting [B, num_classes]
            preds = torch.argmax(outputs, dim=-1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(batch_y.cpu().numpy())

    target_names = ['BUY', 'SELL', 'HOLD']
    cm = confusion_matrix(all_labels, all_preds)
    print('Confusion matrix:')
    print(pd.DataFrame(cm, columns=target_names, index=target_names))
    print("Classification Report:")
    print(classification_report(all_labels, all_preds, target_names=target_names, digits=4, zero_division=0))

In [None]:
test_model(model, train_loader, DEVICE)

In [None]:
test_model(model, val_loader, DEVICE)

In [None]:
test_model(model, test_loader, DEVICE)

In [None]:
def get_prediction(model, inputs, device):
    # inputs = [B, S, N]
    model.to(device)
    model.eval()
    with torch.no_grad():
        inputs = inputs.to(device)
        outputs = model(inputs)
        preds = torch.argmax(outputs, dim=-1)
    return preds.cpu().numpy()

In [None]:
batch = next(iter(test_loader))
inputs, labels = batch[0], batch[1]

print("Prediction: ", get_prediction(model, inputs, DEVICE))
print("Ground Truth: ", labels.cpu().numpy())