In [13]:
from networks.attention_agent import CausalAttentionAgent
import torch
from utils.trading_gym_env import TradingEnv
from utils.synthetic_data_service import SyntheticOHLCVGenerator
import pandas as pd
import matplotlib.pyplot as plt
from diffevo import DDIMScheduler, BayesianGenerator
from torch.nn.utils import parameters_to_vector, vector_to_parameters
from utils.fitess_funcs import batched_fitness_function
from api_wrappers.kraken_wrapper import KrakenWrapper
kw = KrakenWrapper()
dfs = kw.load_hist_files()

In [None]:
import numpy as np
import random

print(len(dfs))

def add_features(df):
    df.columns = [col.lower() for col in df.columns]
    print(df.head())
    # Ensure columns are numeric
    df['close'] = pd.to_numeric(df['close'], errors='coerce')
    df['high'] = pd.to_numeric(df['high'], errors='coerce')
    df['low'] = pd.to_numeric(df['low'], errors='coerce')
    df['vol'] = pd.to_numeric(df['vol'], errors='coerce')

    # Add log returns
    df['log_return'] = np.log(df['close']).diff()

    # Add moving averages
    df['ma_10'] = df['close'].rolling(window=10).mean()
    df['ma_50'] = df['close'].rolling(window=50).mean()

    # Add Fibonacci levels
    fib_ratios = [0.0, 0.236, 0.382, 0.5, 0.618, 1.0]
    for ratio in fib_ratios:
        df[f'fib_{int(ratio * 1000)}'] = np.nan
    for i in range(20, len(df)):
        high = df['high'].iloc[i-20:i].max()
        low = df['low'].iloc[i-20:i].min()
        for ratio in fib_ratios:
            level = high - (high - low) * ratio
            df.at[i, f'fib_{int(ratio * 1000)}'] = level

    # Drop NaN rows
    df = df.dropna().reset_index(drop=True)
    return df

# Add features to all dataframes
dfs = {asset: add_features(df) for asset, df in dfs.items()}
def split_train_test(df, train_ratio=0.8):
    train_size = int(len(df) * train_ratio)
    train_df = df.iloc[:train_size]
    test_df = df.iloc[train_size:]
    return train_df, test_df

# Split all dataframes into train/test sets
train_test_data = {asset: split_train_test(df) for asset, df in dfs.items()}
def prepare_data(df, seq_len):
    states = []
    price_changes = []

    for i in range(len(df) - seq_len):
        state = df.iloc[i:i+seq_len][['log_return', 'ma_10', 'ma_50']].values
        price_change = df.iloc[i+seq_len]['log_return']
        states.append(state)
        price_changes.append(price_change)

    states = torch.tensor(states, dtype=torch.float32)  # Shape: (num_samples, seq_len, num_features)
    price_changes = torch.tensor(price_changes, dtype=torch.float32)  # Shape: (num_samples,)
    return states, price_changes

# Prepare data for all assets
seq_len = 50
train_data = {asset: prepare_data(train, seq_len) for asset, (train, _) in train_test_data.items()}
test_data = {asset: prepare_data(test, seq_len) for asset, (_, test) in train_test_data.items()}

def run(x_array, population, agent):
    rewards = []
    # Example of a random walk in the environment
    for xp in population:
        vector_to_parameters(torch.tensor(xp, dtype=torch.float32), agent.parameters())
        rewards.append(batched_fitness_function(agent, x_array))
    return rewards

# Define hyperparameters
POP_SIZE = 100
SCALING = 0.1

# Instantiate the model
STATE_DIM = train_data[list(train_data.keys())[0]][0].shape[-1]  # Number of features
ACTION_DIM = 2  # Buy, Sell
agent_model = CausalAttentionAgent(state_dim=STATE_DIM, action_dim=3, seq_len=seq_len)

# Initialize population
dim = parameters_to_vector(agent_model.parameters()).shape[0]
population = torch.randn(POP_SIZE, dim) * SCALING

# Fitness function
def fitness_function(population, agent, train_data, num_assets=10):
    """
    Evaluate the fitness of the population on a randomly selected batch of assets.

    Args:
        population: The population of agent parameters.
        agent: The trading agent model.
        train_data: Dictionary of training data for all assets.
        num_assets: Number of assets to randomly select for evaluation.

    Returns:
        List of rewards for each member of the population.
    """
    # Randomly select a batch of assets
    selected_assets = random.sample(list(train_data.keys()), num_assets)
    print(train_data)
    print(f"Selected assets for this generation: {selected_assets}")

    # Combine states and price changes for the selected assets
    batch_states = torch.cat([train_data[asset][0] for asset in selected_assets], dim=0)  # Combine states
    batch_price_changes = torch.cat([train_data[asset][1] for asset in selected_assets], dim=0)  # Combine price changes

    # Debug: Check shapes
    print("Batch states shape:", batch_states.shape)
    print("Batch price changes shape:", batch_price_changes.shape)

    # Ensure batch_states matches the model's expected input shape
    seq_len = agent.seq_len  # Sequence length expected by the model
    state_dim = agent.state_dim  # State dimension expected by the model
    batch_states = batch_states.view(-1, seq_len, state_dim)

    rewards = []
    for params in population:
        # Update the agent's parameters
        vector_to_parameters(params, agent.parameters())

        # Evaluate the agent on the batch
        reward = batched_fitness_function(agent, batch_states, batch_price_changes)
        rewards.append(reward.sum().item())  # Sum rewards across all assets in the batch

    return rewards

# Train with diffusion evolution
scheduler = DDIMScheduler(num_step=seq_len)
for step in range(100):  # Number of training steps
    rewards = fitness_function(population, agent_model, train_data)
    print(f"Step {step}, Best Reward: {max(rewards)}")
    population = scheduler.step(population, rewards)

         date      open      high       low     close      vwap           vol  \
0  1698624000  0.295563  0.303000  0.292362  0.302535  0.299260  1.557035e+06   
1  1698710400  0.303000  0.304567  0.284645  0.292913  0.294084  1.557500e+06   
2  1698796800  0.292276  0.308700  0.284385  0.307276  0.298274  1.412384e+06   
3  1698883200  0.307400  0.329323  0.298880  0.322682  0.312514  3.469029e+06   
4  1698969600  0.322620  0.329543  0.311669  0.328814  0.319348  2.965634e+06   

   log_return     ma_10     ma_50     fib_0   fib_236   fib_382   fib_500  \
0    0.025625  0.282736  0.256987  0.299470  0.284939  0.275950  0.268685   
1   -0.032321  0.286173  0.258005  0.303000  0.287636  0.278132  0.270450   
2    0.047871  0.290570  0.259231  0.304567  0.288834  0.279100  0.271234   
3    0.048921  0.294778  0.260710  0.308700  0.291991  0.281654  0.273300   
4    0.018825  0.299768  0.262259  0.329323  0.308116  0.294997  0.284393   

    fib_618  fib_1000  
0  0.261420  0.237900  
1 

AssertionError: Input tensor dimensions do not match the model's sequence length or state dimension.