In [1]:
import torch 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: cuda


In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

# Required versions:
# torch>=2.0.0
# numpy>=1.21.0
# pandas>=1.3.0
# scikit-learn>=0.24.0
# matplotlib>=3.3.0

# Check CUDA availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Custom Dataset
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X).to(device)
        self.y = torch.FloatTensor(y).reshape(-1, 1).to(device)
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, sequence_length, hidden_size=128):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(input_size=1, hidden_size=hidden_size, 
                           num_layers=1, batch_first=True)
        self.lstm2 = nn.LSTM(input_size=hidden_size, hidden_size=64, 
                           num_layers=1, batch_first=True)
        self.lstm3 = nn.LSTM(input_size=64, hidden_size=32, 
                           num_layers=1, batch_first=True)
        
        self.bn1 = nn.BatchNorm1d(sequence_length)
        self.bn2 = nn.BatchNorm1d(sequence_length)
        self.bn3 = nn.BatchNorm1d(sequence_length)
        
        self.dropout = nn.Dropout(0.2)
        self.linear = nn.Linear(32, 1)
        
    def forward(self, x):
        # First LSTM layer
        x, _ = self.lstm1(x)
        x = self.bn1(x)
        x = self.dropout(x)
        
        # Second LSTM layer
        x, _ = self.lstm2(x)
        x = self.bn2(x)
        x = self.dropout(x)
        
        # Third LSTM layer
        x, _ = self.lstm3(x)
        x = self.bn3(x)
        x = self.dropout(x)
        
        # Take the last time step
        x = x[:, -1, :]
        x = self.linear(x)
        return x

def load_and_preprocess_data(file_path, split=0.8, sequence_length=10):
    # Load the dataset
    stock_data = pd.read_csv(file_path, delimiter=";")
    stock_data['Date'] = pd.to_datetime(stock_data['Date'], format='%Y.%m.%d %H:%M')
    stock_data = stock_data.sort_values(by='Date')
    
    # Extract close prices
    close_prices = stock_data['Close'].values.reshape(-1, 1)
    
    # Split data
    train_size = int(len(close_prices) * split)
    train = close_prices[:train_size]
    test = close_prices[train_size:]
    
    # Scale data
    scaler = MinMaxScaler(feature_range=(0, 1))
    train_scaled = scaler.fit_transform(train)
    test_scaled = scaler.transform(test)
    
    # Create sequences
    X_train, y_train = create_sequences(train_scaled, sequence_length)
    X_test, y_test = create_sequences(test_scaled, sequence_length)
    
    return X_train, y_train, X_test, y_test, scaler

def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:(i + sequence_length)])
        y.append(data[i + sequence_length])
    return np.array(X), np.array(y)

def calculate_direction_accuracy(actual_values, predicted_values):
    actual_direction = (actual_values[1:] > actual_values[:-1]).astype(int)
    predicted_direction = (predicted_values[1:] > predicted_values[:-1]).astype(int)
    direction_accuracy = np.mean(actual_direction == predicted_direction)
    return direction_accuracy, actual_direction, predicted_direction

def train_model(model, train_loader, criterion, optimizer, epochs):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {total_loss/len(train_loader):.4f}')

def evaluate(model, X_test, y_test, scaler):
    model.eval()
    with torch.no_grad():
        X_test_tensor = torch.FloatTensor(X_test).to(device)
        predictions = model(X_test_tensor).cpu().numpy()
        
    # Inverse transform predictions and actual values
    predictions_original = scaler.inverse_transform(predictions)
    y_test_original = scaler.inverse_transform(y_test.reshape(-1, 1))
    
    # Calculate metrics
    mae = mean_absolute_error(y_test_original, predictions_original)
    mape = mean_absolute_percentage_error(y_test_original, predictions_original)
    
    # Calculate direction accuracy
    dir_acc, actual_dir, pred_dir = calculate_direction_accuracy(y_test_original, predictions_original)
    
    # Plotting
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
    
    # Plot 1: Price predictions
    ax1.plot(y_test_original, label="Actual Values", color="blue", marker='o')
    ax1.plot(predictions_original, label="Predicted Values", color="red",
            linestyle='dashed', marker='x')
    ax1.set_title("Comparison of Predicted vs Actual Values")
    ax1.set_xlabel("Sample Index")
    ax1.set_ylabel("Price")
    ax1.legend()
    ax1.grid(True)

    # Plot 2: Direction predictions
    ax2.plot(actual_dir, label="Actual Direction", color="blue", marker='o')
    ax2.plot(pred_dir, label="Predicted Direction", color="red",
            linestyle='dashed', marker='x')
    ax2.set_title("Comparison of Predicted vs Actual Price Direction (1=Up, 0=Down)")
    ax2.set_xlabel("Sample Index")
    ax2.set_ylabel("Direction")
    ax2.legend()
    ax2.grid(True)

    plt.tight_layout()
    plt.show()
    
    return mae, mape, dir_acc, predictions_original

def run_model(X_train, y_train, X_test, y_test, scaler, n_iterations=1, sequence_length=60):
    total_mae = total_mape = total_acc = total_dir_acc = 0
    best_predictions = None
    best_metrics = float('inf')  # Using MAE as the metric to track

    for iteration in range(n_iterations):
        # Initialize model, criterion, and optimizer
        model = LSTMModel(sequence_length).to(device)
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        
        # Create dataset and dataloader
        train_dataset = TimeSeriesDataset(X_train, y_train)
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
        
        # Train model
        print(f"\nIteration {iteration + 1}/{n_iterations}")
        train_model(model, train_loader, criterion, optimizer, epochs=50)
        
        # Evaluate model
        mae, mape, dir_acc, predictions = evaluate(model, X_test, y_test, scaler)
        
        # Update totals
        total_mae += mae
        total_mape += mape
        total_acc += (1 - mape)
        total_dir_acc += dir_acc
        
        # Track best model
        if mae < best_metrics:
            best_metrics = mae
            best_predictions = predictions

        print(f"Iteration {iteration + 1} Results:")
        print(f"MAE: {mae:.4f}")
        print(f"MAPE: {mape:.4f}")
        print(f"Price Accuracy: {(1 - mape):.4f}")
        print(f"Direction Accuracy: {dir_acc:.4f}")

    # Calculate averages
    avg_mae = total_mae / n_iterations
    avg_mape = total_mape / n_iterations
    avg_acc = total_acc / n_iterations
    avg_dir_acc = total_dir_acc / n_iterations

    return avg_mae, avg_mape, avg_acc, avg_dir_acc, best_predictions

def main():
    # Hyperparameters
    SEQUENCE_LENGTH = 60
    N_ITERATIONS = 1
    
    # Load and preprocess data
    X_train, y_train, X_test, y_test, scaler = load_and_preprocess_data(
        "XAU_15m_data.csv",
        split=0.85,
        sequence_length=SEQUENCE_LENGTH
    )
    
    print("Starting model training and evaluation...")
    mae, mape, acc, dir_acc, predictions = run_model(
        X_train, y_train, X_test, y_test, scaler,
        n_iterations=N_ITERATIONS,
        sequence_length=SEQUENCE_LENGTH
    )
    
    print("\nFinal Average Results:")
    print(f"Mean Absolute Error = {mae:.4f}")
    print(f"Mean Absolute Percentage Error = {mape:.4f}")
    print(f"Price Accuracy = {acc:.4f}")
    print(f"Direction Accuracy = {dir_acc:.4f}")

if __name__ == "__main__":
    main()

Using device: cuda
Starting model training and evaluation...

Iteration 1/1
Epoch [10/50], Loss: 0.0006
Epoch [20/50], Loss: 0.0005
Epoch [30/50], Loss: 0.0005
Epoch [40/50], Loss: 0.0005
Epoch [50/50], Loss: 0.0006


OutOfMemoryError: CUDA out of memory. Tried to allocate 42.85 GiB. GPU 0 has a total capacity of 3.80 GiB of which 574.31 MiB is free. Process 10728 has 154.00 MiB memory in use. Process 3866445 has 664.00 MiB memory in use. Including non-PyTorch memory, this process has 2.43 GiB memory in use. Of the allocated memory 2.30 GiB is allocated by PyTorch, and 27.46 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)