In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [None]:
# Define the Transformer model
class TransformerModel(nn.Module):
    def __init__(self, input_size, output_size, num_layers=2, hidden_size=32, num_heads=2, dropout=0.1):
        super(TransformerModel, self).__init__()
        self.transformer = nn.TransformerEncoderLayer(d_model=input_size, nhead=num_heads, dim_feedforward=hidden_size, dropout=dropout)
        self.encoder = nn.TransformerEncoder(self.transformer, num_layers=num_layers)
        self.fc = nn.Linear(input_size, output_size)

    def forward(self, x):
        x = self.encoder(x)
        x = x.mean(dim=1)  # Average across the time steps
        x = self.fc(x)
        return x

In [None]:
# Define the RL trading agent
class RLTrader:
    def __init__(self, input_size, output_size, learning_rate=0.001, gamma=0.95):
        self.input_size = input_size
        self.output_size = output_size
        self.gamma = gamma

        # Create the transformer model
        self.model = TransformerModel(input_size, output_size)
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)

    def predict(self, state):
        state = torch.tensor(state, dtype=torch.float32)
        with torch.no_grad():
            prediction = self.model(state)
        return prediction.numpy()[0]

    def train(self, states, actions, rewards):
        states = torch.tensor(states, dtype=torch.float32)
        actions = torch.tensor(actions, dtype=torch.long)
        rewards = torch.tensor(rewards, dtype=torch.float32)

        # Compute predicted Q values
        predicted_values = self.model(states)
        predicted_q_values = predicted_values.gather(1, actions.unsqueeze(1)).squeeze(1)

        # Compute target Q values using temporal difference target
        target_q_values = rewards

        # Compute the loss (mean squared error between predicted and target Q values)
        loss = nn.MSELoss()(predicted_q_values, target_q_values)

        # Update the model
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

In [None]:
# Function to perform a trading simulation
def run_simulation(prices, trader, initial_balance=1000, num_days=100, max_stock=5, epsilon=0.1):
    balance = initial_balance
    stock = 0
    history = []

    for day in range(num_days):
        if day > 60:  # We need at least 60 days of history to make predictions
            state = prices[day-60:day].reshape(1, 1, 60)  # Convert the history into a 3D tensor
            action_values = trader.predict(state)

            # Epsilon-greedy exploration
            if np.random.rand() < epsilon:
                action = np.random.choice(trader.output_size)
            else:
                action = np.argmax(action_values)

            # Perform the chosen action and update balance and stock
            if action == 0:  # Buy
                if balance >= prices[day] and stock < max_stock:
                    stock_to_buy = min(int(balance / prices[day]), max_stock - stock)
                    stock += stock_to_buy
                    balance -= stock_to_buy * prices[day]
            elif action == 1:  # Sell
                if stock > 0:
                    balance += stock * prices[day]
                    stock = 0

            # Calculate daily returns
            daily_returns = (balance + stock * prices[day]) - initial_balance
            history.append((state, action, daily_returns))

    return history

In [None]:
# Main function to run the algorithmic trading and training
if __name__ == "__main__":
    # Generate some random price data for demonstration
    np.random.seed(42)
    prices = np.random.rand(200) * 100  # Assuming the prices are in the range [0, 100]

    # Define the input size and output size for the RLTrader
    input_size = 60  # History of last 60 days
    output_size = 3  # Buy, Sell, Hold

    # Initialize the RLTrader
    trader = RLTrader(input_size, output_size)

    # Training parameters
    num_episodes = 1000
    batch_size = 32
    epsilon = 0.1
    gamma = 0.95

    for episode in range(num_episodes):
        history = run_simulation(prices, trader, epsilon=epsilon)
        states, actions, returns = zip(*history)

        # Compute discounted returns
        discounted_returns = []
        G = 0
        for r in reversed(returns):
            G = r + gamma * G
            discounted_returns.insert(0, G)

        # Normalize discounted returns
        discounted_returns = np.array(discounted_returns)
        discounted_returns = (discounted_returns - np.mean(discounted_returns)) / (np.std(discounted_returns) + 1e-9)

        # Convert lists to numpy arrays
        states = np.array(states).squeeze(1)
        actions = np.array(actions)

        # Create DataLoader for training
        dataset = TensorDataset(torch.tensor(states, dtype=torch.float32), torch.tensor(actions, dtype=torch.long), torch.tensor(discounted_returns, dtype=torch.float32))
        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

        # Train the trader on the current episode's data
        for batch_states, batch_actions, batch_returns in dataloader:
            trader.train(batch_states, batch_actions, batch_returns)

        # Print progress
        if (episode + 1) % 100 == 0:
            print(f"Episode {episode + 1}/{num_episodes}")

    # Evaluate the trained agent on a test set
    test_prices = np.random.rand(100) * 100
    test_history = run_simulation(test_prices, trader)
    test_states, test_actions, test_returns = zip(*test_history)
    print(f"Final balance: ${np.mean(test_returns):.2f}")
