# 0. Import Libraries

In [7]:
import math
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# 1. Process Data

In [6]:
def transform_and_calculate(file_path, stop_distance=6, rr_ratio=3, lookahead=30):
    # 1. Data transformation
    def load_and_transform(file_path):
        # Load CSV with proper datetime handling
        df = pd.read_csv(
            file_path,
            delimiter='\t',
            parse_dates={'datetime': ['<DATE>', '<TIME>']},
            date_parser=lambda x: pd.to_datetime(x, format='%Y.%m.%d %H:%M:%S'),
            usecols=['<DATE>', '<TIME>', '<OPEN>', '<HIGH>', '<LOW>', '<CLOSE>', '<TICKVOL>', '<SPREAD>']
        )
        
        # Clean column names
        df.columns = df.columns.str.strip('<>').str.lower()
        
        # Column renaming and reorganization
        df = df.rename(columns={'tickvol': 'vol'})
        df = df[['datetime', 'open', 'high', 'low', 'close', 'vol', 'spread']]
        
        return df.sort_values('datetime').reset_index(drop=True)

    # 2. Spread-adjusted condition calculation
    def calculate_condition_met(df, stop_distance, rr_ratio, lookahead):
        # Initialize both condition columns
        df['long_condition'] = False
        df['short_condition'] = False
        
        for i in range(len(df)):
            current = df.iloc[i]
            future = df.iloc[i+1:i+lookahead+1]
            
            if future.empty:
                break
            
            # Calculate entry prices with spread adjustment
            long_entry = current['close'] + current['spread'] * 0.01
            short_entry = current['close'] - current['spread'] * 0.01
            
            # Calculate price levels with spread
            long_stop = long_entry - stop_distance
            long_target = long_entry + (stop_distance * rr_ratio)
            short_stop = short_entry + stop_distance
            short_target = short_entry - (stop_distance * rr_ratio)
            
            # Track both scenarios
            long_status = {'met': False, 'stopped': False}
            short_status = {'met': False, 'stopped': False}
            
            for _, future_candle in future.iterrows():
            # Check long condition if not yet resolved
                if not long_status['met'] and not long_status['stopped']:
                    # Check if price hit stop loss first
                    if future_candle['low'] <= long_stop:
                        long_status['stopped'] = True
                    # Check if price hit take profit first
                    elif future_candle['high'] >= long_target:
                        long_status['met'] = True

                # Check short condition if not yet resolved
                if not short_status['met'] and not short_status['stopped']:
                    # Check if price hit stop loss first
                    if future_candle['high'] >= short_stop:
                        short_status['stopped'] = True
                    # Check if price hit take profit first
                    elif future_candle['low'] <= short_target:
                        short_status['met'] = True

                # Early exit if both directions are resolved
                # if (long_status['met'] or long_status['stopped']) and \
                #     (short_status['met'] or short_status['stopped']):
                #     break

            # Record results in DataFrame
            df.at[i, 'long_condition'] = long_status['met']
            df.at[i, 'short_condition'] = short_status['met']
        
        return df

    # Execute processing pipeline
    df = load_and_transform(file_path)
    df = calculate_condition_met(df, stop_distance, rr_ratio, lookahead)
    return df

# Usage example:
csv_path = 'Data/MT5/XAUUSD_M15_202012070900_202502282345.csv'
processed_data = transform_and_calculate(csv_path)
processed_data.to_csv('Processed_Data/15m_3r.csv', index=False)
print("Data processing complete.")

  df = pd.read_csv(
  df = pd.read_csv(


Data processing complete.


# 2. Neural Network

In [None]:
class PositionalEncoding(nn.Module):
    """Implements the sine and cosine positional encoding."""
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)  # [max_len, d_model]
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)  # [max_len, 1]
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)  # apply sine on even indices
        pe[:, 1::2] = torch.cos(position * div_term)  # apply cosine on odd indices
        pe = pe.unsqueeze(0)  # [1, max_len, d_model]
        self.register_buffer('pe', pe)

    def forward(self, x):
        """
        Args:
            x: Tensor of shape [batch_size, seq_len, d_model]
        Returns:
            x with positional encodings added.
        """
        seq_len = x.size(1)
        return x + self.pe[:, :seq_len]

class PriceTransformer(nn.Module):
    """
    A Transformer model template specialized for processing price data sorted by datetime.
    """
    def __init__(self, input_dim, d_model, nhead, num_layers, dim_feedforward, dropout=0.1, max_seq_length=5000):
        """
        Args:
            input_dim: Number of features in the raw price data.
            d_model: Dimension of the model (embedding dimension).
            nhead: Number of attention heads.
            num_layers: Number of Transformer encoder layers.
            dim_feedforward: Dimension of the feedforward network inside each encoder layer.
            dropout: Dropout probability.
            max_seq_length: Maximum sequence length for positional encoding.
        """
        super(PriceTransformer, self).__init__()
        # Project input features into d_model dimensions.
        self.input_proj = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, max_len=max_seq_length)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, 
                                                   dim_feedforward=dim_feedforward, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.dropout = nn.Dropout(dropout)
        # Final regression head (e.g., to predict a future price value).
        self.fc_out = nn.Linear(d_model, 1)

    def forward(self, src):
        """
        Args:
            src: Tensor of shape [batch_size, seq_len, input_dim] containing price data sorted by datetime.
        Returns:
            Tensor of shape [batch_size, seq_len, 1] representing model predictions.
        """
        x = self.input_proj(src)            # [batch, seq_len, d_model]
        x = self.pos_encoder(x)             # add positional encoding
        x = x.transpose(0, 1)               # Transformer expects [seq_len, batch, d_model]
        x = self.transformer_encoder(x)     # apply encoder layers
        x = x.transpose(0, 1)               # back to [batch, seq_len, d_model]
        x = self.dropout(x)
        output = self.fc_out(x)             # regression output for each timestep
        return output

# -----------------------
# Training Loop Section
# -----------------------
if __name__ == "__main__":
    # Set seed for reproducibility
    torch.manual_seed(42)

    # Hyperparameters
    batch_size = 32
    seq_len = 100
    input_dim = 4          # e.g., [open, high, low, close] price features
    num_epochs = 10
    learning_rate = 0.001

    # Create dummy dataset
    dummy_input = torch.randn(batch_size, seq_len, input_dim)
    # Create dummy target values (regression targets)
    dummy_target = torch.randn(batch_size, seq_len, 1)

    # Instantiate the model
    model = PriceTransformer(input_dim=input_dim, d_model=64, nhead=8, 
                             num_layers=3, dim_feedforward=128, dropout=0.1, max_seq_length=200)
    model.train()  # set the model to training mode

    # Define loss function and optimizer
    criterion = nn.MSELoss()  # Mean Squared Error loss for regression
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Training loop
    for epoch in range(num_epochs):
        optimizer.zero_grad()          # clear previous gradients
        predictions = model(dummy_input)  # forward pass
        loss = criterion(predictions, dummy_target)  # compute loss
        loss.backward()                # backpropagate gradients
        optimizer.step()               # update model parameters

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")
