In [1]:
from networks.attention_agent import CausalAttentionAgent
import torch
from utils.trading_gym_env import TradingEnv
from utils.synthetic_data_service import SyntheticOHLCVGenerator
import pandas as pd
import matplotlib.pyplot as plt
from diffevo import DDIMScheduler, BayesianGenerator
from torch.nn.utils import parameters_to_vector, vector_to_parameters
from utils.fitess_funcs import batched_fitness_function
from api_wrappers.kraken_wrapper import KrakenWrapper
kw = KrakenWrapper()
dfs = kw.load_hist_files()



In [2]:
import numpy as np
import random

print(len(dfs))

def add_features(df):
    df.columns = [col.lower() for col in df.columns]
    print(df.head())
    # Ensure columns are numeric
    df['close'] = pd.to_numeric(df['close'], errors='coerce')
    df['high'] = pd.to_numeric(df['high'], errors='coerce')
    df['low'] = pd.to_numeric(df['low'], errors='coerce')
    df['vol'] = pd.to_numeric(df['vol'], errors='coerce')

    # Add log returns
    df['log_return'] = np.log(df['close']).diff()

    # Add moving averages
    df['ma_10'] = df['close'].rolling(window=10).mean()
    df['ma_50'] = df['close'].rolling(window=50).mean()

    # Add Fibonacci levels
    fib_ratios = [0.0, 0.236, 0.382, 0.5, 0.618, 1.0]
    for ratio in fib_ratios:
        df[f'fib_{int(ratio * 1000)}'] = np.nan
    for i in range(20, len(df)):
        high = df['high'].iloc[i-20:i].max()
        low = df['low'].iloc[i-20:i].min()
        for ratio in fib_ratios:
            level = high - (high - low) * ratio
            df.at[i, f'fib_{int(ratio * 1000)}'] = level

    # Drop NaN rows
    df = df.dropna().reset_index(drop=True)
    return df

# Add features to all dataframes
dfs = {asset: add_features(df) for asset, df in dfs.items()}
def split_train_test(df, train_ratio=0.8):
    train_size = int(len(df) * train_ratio)
    train_df = df.iloc[:train_size]
    test_df = df.iloc[train_size:]
    return train_df, test_df

# Split all dataframes into train/test sets
train_test_data = {asset: split_train_test(df) for asset, df in dfs.items()}
def prepare_data(df, seq_len):
    states = []
    price_changes = []

    for i in range(len(df) - seq_len):
        state = df.iloc[i:i+seq_len][['log_return', 'ma_10', 'ma_50']].values
        price_change = df.iloc[i+seq_len]['log_return']
        states.append(state)
        price_changes.append(price_change)

    states = torch.tensor(states, dtype=torch.float32)  # Shape: (num_samples, seq_len, num_features)
    price_changes = torch.tensor(price_changes, dtype=torch.float32)  # Shape: (num_samples,)
    return states, price_changes

# Prepare data for all assets
seq_len = 50
train_data = {asset: prepare_data(train, seq_len) for asset, (train, _) in train_test_data.items()}
test_data = {asset: prepare_data(test, seq_len) for asset, (_, test) in train_test_data.items()}

22
         date      open      high       low     close      vwap           vol
0  1694390400  0.249169  0.249169  0.236997  0.242000  0.241745  1.630805e+06
1  1694476800  0.242000  0.252356  0.240993  0.245998  0.248591  1.011622e+06
2  1694563200  0.245534  0.249705  0.243873  0.248728  0.247799  2.463036e+05
3  1694649600  0.248832  0.252286  0.246532  0.251329  0.249129  1.596386e+05
4  1694736000  0.251267  0.254644  0.237800  0.250528  0.244553  3.753199e+06
         date     open     high      low    close     vwap            vol
0  1694390400  0.09131  0.09202  0.08760  0.08844  0.08886   44115.023285
1  1694476800  0.08896  0.09198  0.08859  0.08965  0.09029  191172.611332
2  1694563200  0.08921  0.09195  0.08900  0.09089  0.09070   78639.702407
3  1694649600  0.09122  0.09379  0.09050  0.09347  0.09216  121614.765609
4  1694736000  0.09352  0.09671  0.09328  0.09630  0.09462   86333.150052
         date    open    high     low   close    vwap          vol
0  1694390400  1.2

  states = torch.tensor(states, dtype=torch.float32)  # Shape: (num_samples, seq_len, num_features)


In [3]:
def run(x_array, population, agent):
    rewards = []
    # Example of a random walk in the environment
    for xp in population:
        vector_to_parameters(torch.tensor(xp, dtype=torch.float32), agent.parameters())
        rewards.append(batched_fitness_function(agent, x_array))
    return rewards

# Define hyperparameters
POP_SIZE = 100
SCALING = 0.1

# Instantiate the model
STATE_DIM = train_data[list(train_data.keys())[0]][0].shape[-1]  # Number of features
ACTION_DIM = 2  # Buy, Sell
EP_LEN = 144
agent_model = CausalAttentionAgent(state_dim=STATE_DIM, action_dim=3, seq_len=seq_len)

# Initialize population
dim = parameters_to_vector(agent_model.parameters()).shape[0]
population = torch.randn(POP_SIZE, dim) * SCALING

# Fitness function
def fitness_function(population, agent, train_data, num_assets=10):
    """
    Evaluate the fitness of the population on a randomly selected batch of assets.

    Args:
        population: The population of agent parameters.
        agent: The trading agent model.
        train_data: Dictionary of training data for all assets.
        num_assets: Number of assets to randomly select for evaluation.

    Returns:
        List of rewards for each member of the population.
    """
    # Randomly select a batch of assets
    selected_assets = random.sample(list(train_data.keys()), num_assets)
    print(train_data)
    print(f"Selected assets for this generation: {selected_assets}")

    # Combine states and price changes for the selected assets
    batch_states = torch.cat([train_data[asset][0] for asset in selected_assets], dim=0)  # Combine states
    batch_price_changes = torch.cat([train_data[asset][1] for asset in selected_assets], dim=0)  # Combine price changes

    # Debug: Check shapes
    print("Batch states shape:", batch_states.shape)
    print("Batch price changes shape:", batch_price_changes.shape)

    # Ensure batch_states matches the model's expected input shape
    seq_len = agent.seq_len  # Sequence length expected by the model
    state_dim = agent.state_dim  # State dimension expected by the model
    batch_states = batch_states.view(-1, seq_len, state_dim)
    print("Batch states shape:", batch_states.shape)
    rewards = []
    for params in population:
        # Update the agent's parameters
        vector_to_parameters(params, agent.parameters())

        # Evaluate the agent on the batch
        reward = batched_fitness_function(agent, batch_states, batch_price_changes)
        rewards.append(reward.sum().item())  # Sum rewards across all assets in the batch

    return rewards

# Train with diffusion evolution
scheduler = DDIMScheduler(num_step=seq_len)
for step in range(100):  # Number of training steps
    rewards = fitness_function(population, agent_model, train_data)
    print(f"Step {step}, Best Reward: {max(rewards)}")
    population = scheduler.step(population, rewards)

{'ADAUSDT.txt': (tensor([[[ 0.0256,  0.2827,  0.2570],
         [-0.0323,  0.2862,  0.2580],
         [ 0.0479,  0.2906,  0.2592],
         ...,
         [ 0.0105,  0.5815,  0.4091],
         [-0.0439,  0.5940,  0.4149],
         [ 0.0377,  0.5998,  0.4211]],

        [[-0.0323,  0.2862,  0.2580],
         [ 0.0479,  0.2906,  0.2592],
         [ 0.0489,  0.2948,  0.2607],
         ...,
         [-0.0439,  0.5940,  0.4149],
         [ 0.0377,  0.5998,  0.4211],
         [-0.0465,  0.5994,  0.4265]],

        [[ 0.0479,  0.2906,  0.2592],
         [ 0.0489,  0.2948,  0.2607],
         [ 0.0188,  0.2998,  0.2623],
         ...,
         [ 0.0377,  0.5998,  0.4211],
         [-0.0465,  0.5994,  0.4265],
         [ 0.0251,  0.5991,  0.4325]],

        ...,

        [[ 0.0011,  0.7580,  0.8742],
         [-0.0518,  0.7456,  0.8674],
         [-0.0040,  0.7293,  0.8613],
         ...,
         [-0.0347,  0.6201,  0.7141],
         [-0.0041,  0.6175,  0.7114],
         [-0.0414,  0.6129,  0.71

AssertionError: Input tensor dimensions do not match the model's sequence length or state dimension.