In [1]:
# SETUP ===================================
import jupyter
import warnings

from src.utils.system import boot, Notify

boot()
warnings.filterwarnings("ignore")



# PACKAGES ================================
import os
import torch
import joblib
import numpy as np
import pandas as pd
import seaborn as sns
import torch.nn as nn
import gymnasium as gym
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.preprocessing import  RobustScaler

# FRAMEWORK STUFF =========================
from src.defaults import TOP2_STOCK_BY_SECTOR, FEATURE_COLS,EPISODE_LENGTH
from src.data.feature_pipeline import load_base_dataframe
from src.experiments.experiment_tracker import ExperimentTracker
from src.env.base_timeseries_trading_env import BaseSequenceAwareTradingEnv,SequenceAwareAlphaTradingEnv,SequenceAwareBaselineTradingAgent,SequenceAwareCalmarTradingEnv,SequenceAwareCumulativeTradingEnv,SequenceAwareDrawdownTradingEnv,SequenceAwareHybridTradingEnv,SequenceAwareHybridTradingEnv,SequenceAwareSharpeTradingEnv,SequenceAwareSortinoTradingEnv

from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.policies import ActorCriticPolicy



In [2]:

# ========== CONFIG ==========
EXPERIENCE_NAME = "core_sequence_aware_agent_design"
RESULTS_PATH = f"data/experiments/{EXPERIENCE_NAME}_barebones_results.csv"
N_EPISODES = 20
N_SEEDS = 3
N_EVAL_EPISODES = 3
AGENT_TYPES = ['mlp', 'lstm', 'transformer_single', 'transformer_multi']
WINDOW_LENGTH = 10  # or any value you want
TOTAL_TIMESTEPS = EPISODE_LENGTH * 150
N_STEPS = EPISODE_LENGTH * 2

TRANSACTION_COST = 0

CONFIG = {
    "batch_size": EPISODE_LENGTH,
    "n_steps": 128,
    "total_timesteps": TOTAL_TIMESTEPS,   
}


"""
features_extractor_kwargs={
    'window_length': WINDOW_LENGTH,
    'n_features': len(FEATURE_COLS),
    'd_model': 32,
    'nhead': ...,
    'num_layers': ...,
}
"""

# --- Load data ---
ohlcv_df = load_base_dataframe()

# --- Experiment tracker ---
experiment_tracker = ExperimentTracker(EXPERIENCE_NAME)



In [3]:
def make_env(df, ticker, feature_cols, episode_length, window_length):
    df_ticker = df[df['symbol'] == ticker].copy()
    return CumulativeTradingEnv(
        df=df_ticker,
        feature_cols=feature_cols,
        episode_length=episode_length,
        transaction_cost=TRANSACTION_COST,
        window_length=window_length,
    )

In [4]:
class TransformerExtractor(BaseFeaturesExtractor):
    def __init__(self, observation_space, window_length, n_features, d_model=32, nhead=1, num_layers=1):
        super().__init__(observation_space, features_dim=d_model)
        self.window_length = window_length
        self.n_features = n_features
        self.embedding = nn.Linear(n_features, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

    def forward(self, obs):
        # obs: [batch, window_length * n_features]
        batch = obs.shape[0]
        # reshape flat vector to (batch, window_length, n_features)
        x = obs.view(batch, self.window_length, self.n_features)
        x = self.embedding(x)      # (batch, window_length, d_model)
        x = x.permute(1, 0, 2)    # (window_length, batch, d_model)
        x = self.transformer(x)    # (window_length, batch, d_model)
        # Use last token as pooled output
        return x[-1]              # (batch, d_model)

In [5]:
class TransformerPolicy(ActorCriticPolicy):
    def __init__(self, *args, nhead=1, num_layers=1, window_length=WINDOW_LENGTH, n_features=2, **kwargs):
        super().__init__(
            *args,
            features_extractor_class=TransformerExtractor,
            features_extractor_kwargs={
                'window_length': window_length,
                'n_features': n_features,
                'd_model': 32,
                'nhead': nhead,
                'num_layers': num_layers,
            },
            **kwargs
        )

In [6]:
# Test 8: Learnability
from src.env.realistic_synthetic_environment import realistic_synthetic_market_sample
class RandomAgent:
    def __init__(self, env):
        self.env = env
    def predict(self, obs, *args, **kwargs):
        return self.env.action_space.sample(), {}

class AlwaysLongAgent:
    def __init__(self, env):
        self.env = env
    def predict(self, obs, *args, **kwargs):
        return 1, {}  # Always go long
    
def evaluate_baseline_agent(env, agent, n_episodes=20, episode_sequence=None):
    rewards = []
    if episode_sequence:
        env.set_episode_sequence(episode_sequence)
    for _ in range(n_episodes):
        obs, _ = env.reset()
        done = False
        total_reward = 0
        while not done:
            action, _ = agent.predict(obs)
            obs, reward, done, _, _ = env.step(action)
            total_reward += reward
        rewards.append(total_reward)
    return np.mean(rewards), np.std(rewards)



In [7]:
from stable_baselines3 import PPO
from sb3_contrib import RecurrentPPO

def make_agent(agent_type, env, window_length, feature_cols, **kwargs):
    if agent_type == 'mlp':
        return PPO("MlpPolicy", env, verbose=0, **kwargs)
    elif agent_type == 'lstm':
        return RecurrentPPO("MlpLstmPolicy", env, verbose=0, **kwargs)
        #return PPO("MlpLstmPolicy", env, verbose=0, **kwargs)
    elif agent_type.startswith('transformer'):
        
        n_features = len(feature_cols)
       
        return PPO(
            TransformerPolicy,
            env,
            verbose=0,
            policy_kwargs={
                'window_length': window_length,
                'n_features': env.observation_space.shape[1],
                'nhead': 2,        # set as desired
                'num_layers': 2,   # set as desired
            },
            **kwargs
        )
    else:
        raise ValueError(f"Unknown agent type: {agent_type}")


In [14]:
def evaluate_agent(env, agent, n_episodes=10, episode_sequence=None, is_sb3=False):
    rewards = []
    if episode_sequence:
        env.set_episode_sequence(episode_sequence)
    for _ in range(n_episodes):
        obs,_ = env.reset()
        done = False
        total_reward = 0
        while not done:
            if is_sb3:
                action, _ = agent.predict(obs, deterministic=True)
            else:
                action, _ = agent.predict(obs)
            obs, reward, done, _, _ = env.step(action)
            total_reward += reward
        rewards.append(total_reward)
    return np.mean(rewards), np.std(rewards)

In [None]:
# ENVIRONMENT AND SEQUENCES ======================
df = ohlcv_df[(ohlcv_df['date'] >= "2023-01-01") & (ohlcv_df['date']<"2025-01-01")].copy()
df = ohlcv_df[ohlcv_df['symbol']== "AAPL"]
feature_cols = FEATURE_COLS
env = SequenceAwareCumulativeTradingEnv(
    df, feature_cols=feature_cols, episode_length=EPISODE_LENGTH, window_length=WINDOW_LENGTH, seed=314)

seq = env.generate_episode_sequences(train_steps=100)

# Baseline agents =================================
random_agent = RandomAgent(env)
always_long_agent = AlwaysLongAgent(env)
mean_rand, std_rand = evaluate_agent(env, random_agent, n_episodes=20, episode_sequence=seq)
mean_long, std_long = evaluate_agent(env, always_long_agent, n_episodes=20, episode_sequence=seq)
print(f"Random: mean {mean_rand:.4f}, std {std_rand:.4f}")
print(f"Always Long: mean {mean_long:.4f}, std {std_long:.4f}")

# RL agents =======================================
AGENT_TYPES = ['mlp', 'lstm', 'transformer_single', 'transformer_multi']
#AGENT_TYPES = ['transformer_single', 'transformer_multi']
for agent_type in AGENT_TYPES:
    print(f"\nTraining {agent_type} agent...")
    env = SequenceAwareCumulativeTradingEnv(
        df, feature_cols=feature_cols, episode_length=EPISODE_LENGTH, window_length=WINDOW_LENGTH, seed=314)
    env2 = SequenceAwareCumulativeTradingEnv(
        df, feature_cols=['return_1d','volume'], episode_length=EPISODE_LENGTH, window_length=WINDOW_LENGTH, seed=314)
    env.set_episode_sequence(seq)
    env2.set_episode_sequence(seq)
    print(f"\nTraining Env 1")
    model = make_agent(agent_type, env, window_length=WINDOW_LENGTH, feature_cols=feature_cols, n_steps=EPISODE_LENGTH, batch_size=4)
    model.learn(total_timesteps=TOTAL_TIMESTEPS)
    mean_rl, std_rl = evaluate_agent(env, model, n_episodes=10, episode_sequence=seq, is_sb3=True)
    print(f"ALL FEATURES {agent_type} agent: mean {mean_rl:.4f}, std {std_rl:.4f}")
    print(f"\nTraining Env 2")
    model = make_agent(agent_type, env2, window_length=WINDOW_LENGTH, feature_cols=["return_1d","volume"], n_steps=EPISODE_LENGTH, batch_size=4)
    model.learn(total_timesteps=TOTAL_TIMESTEPS)
    mean_rl, std_rl = evaluate_agent(en2v, model, n_episodes=10, episode_sequence=seq, is_sb3=True)
    print(f"2 FEATURES {agent_type} agent: mean {mean_rl:.4f}, std {std_rl:.4f}")


Random: mean -0.0240, std 0.1038
Always Long: mean 0.1616, std 0.0000

Training mlp agent...

Training Env 1


In [None]:
trainer.agent