In [1]:
# SETUP ===================================
import jupyter
import warnings

from src.utils.system import boot, Notify

boot()
warnings.filterwarnings("ignore")



# PACKAGES ================================
import os
import torch
import joblib
import numpy as np
import pandas as pd
import seaborn as sns
import torch.nn as nn
import gymnasium as gym
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.preprocessing import  RobustScaler

# FRAMEWORK STUFF =========================
from src.config import TOP2_STOCK_BY_SECTOR, FEATURE_COLS
from src.data.feature_pipeline import load_base_dataframe
from src.experiments.experiment_tracker import ExperimentTracker
from src.env.base_timeseries_trading_env import BaseSequenceAwareTradingEnv,SequenceAwareAlphaTradingEnv,SequenceAwareBaselineTradingAgent,SequenceAwareCalmarTradingEnv,SequenceAwareCumulativeTradingEnv,SequenceAwareDrawdownTradingEnv,SequenceAwareHybridTradingEnv,SequenceAwareHybridTradingEnv,SequenceAwareSharpeTradingEnv,SequenceAwareSortinoTradingEnv

from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.policies import ActorCriticPolicy



In [2]:

# ========== CONFIG ==========
EXPERIENCE_NAME = "core_sequence_aware_agent_design"
RESULTS_PATH = f"data/experiments/{EXPERIENCE_NAME}_barebones_results.csv"
N_EPISODES = 20
N_SEEDS = 3
N_EVAL_EPISODES = 3
AGENT_TYPES = ['mlp', 'lstm', 'transformer_single', 'transformer_multi']
WINDOW_LENGTH = 10  # or any value you want


TRANSACTION_COST = 0

CONFIG = {
    "batch_size": 32,
    "n_steps": 128,
    "total_timesteps": 5000,   
}


"""
features_extractor_kwargs={
    'window_length': WINDOW_LENGTH,
    'n_features': len(FEATURE_COLS),
    'd_model': 32,
    'nhead': ...,
    'num_layers': ...,
}
"""

# --- Load data ---
ohlcv_df = load_base_dataframe()

# --- Experiment tracker ---
experiment_tracker = ExperimentTracker(EXPERIENCE_NAME)



In [3]:
def make_env(df, ticker, feature_cols, episode_length, window_length):
    df_ticker = df[df['symbol'] == ticker].copy()
    return CumulativeTradingEnv(
        df=df_ticker,
        feature_cols=feature_cols,
        episode_length=episode_length,
        transaction_cost=TRANSACTION_COST,
        window_length=window_length,
    )

In [4]:
class TransformerExtractor(BaseFeaturesExtractor):
    def __init__(self, observation_space, window_length, n_features, d_model=32, nhead=1, num_layers=1):
        super().__init__(observation_space, features_dim=d_model)
        self.window_length = window_length
        self.n_features = n_features
        self.embedding = nn.Linear(n_features, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

    def forward(self, obs):
        # obs: [batch, window_length * n_features]
        batch = obs.shape[0]
        # reshape flat vector to (batch, window_length, n_features)
        x = obs.view(batch, self.window_length, self.n_features)
        x = self.embedding(x)      # (batch, window_length, d_model)
        x = x.permute(1, 0, 2)    # (window_length, batch, d_model)
        x = self.transformer(x)    # (window_length, batch, d_model)
        # Use last token as pooled output
        return x[-1]              # (batch, d_model)

In [5]:
class TransformerPolicy(ActorCriticPolicy):
    def __init__(self, *args, nhead=1, num_layers=1, window_length=10, n_features=2, **kwargs):
        super().__init__(
            *args,
            features_extractor_class=TransformerExtractor,
            features_extractor_kwargs={
                'window_length': window_length,
                'n_features': n_features,
                'd_model': 32,
                'nhead': nhead,
                'num_layers': num_layers,
            },
            **kwargs
        )

# Unit tests:
1. Output Shapes
2. Window Consistency (Padding at Episode Start)
3. Step Through Environment
4. SB3 Policy Compatibility
5. Transformer Policy Compatibility
6. Action Space and Reward Consistency
7. Episode Generator
8. Is able to learn

In [6]:
# Test 1: Output Shapes

# Test windowed obs shape (flat vs. 2D)
df = ohlcv_df.copy()
feature_cols = FEATURE_COLS
env = BaseSequenceAwareTradingEnv(
    df, feature_cols=feature_cols, episode_length=30, window_length=5, return_sequences=True
)
obs, _ = env.reset()
print("2D window shape:", obs.shape)  # Expect (5, obs_dim)

env_flat = BaseSequenceAwareTradingEnv(
    df, feature_cols=feature_cols, episode_length=30, window_length=5, return_sequences=False
)
obs_flat, _ = env_flat.reset()
print("Flat window shape:", obs_flat.shape)  # Expect (5*obs_dim,)


2D window shape: (5, 25)
Flat window shape: (125,)


In [7]:
# Test 2: Window consistency
env = BaseSequenceAwareTradingEnv(
    df, feature_cols=feature_cols, episode_length=10, window_length=5, return_sequences=True
)
obs, _ = env.reset()
assert np.allclose(obs[0], obs[1]), "Padding at start should repeat first row"
assert obs.shape == (5, len(feature_cols) + len(env.internal_features))
print("Padding and shape OK")


Padding and shape OK


In [8]:
# Test 3: Step Through Environment

env = BaseSequenceAwareTradingEnv(
    df, feature_cols=feature_cols, episode_length=10, window_length=3, return_sequences=True
)
obs, _ = env.reset()
for i in range(8):
    action = env.action_space.sample()
    obs, reward, done, trunc, info = env.step(action)
    print(f"Step {i} | Obs shape: {obs.shape} | Reward: {reward:.5f}")
    if done:
        print("Episode done:", info)
        break

Step 0 | Obs shape: (3, 25) | Reward: -0.00000
Step 1 | Obs shape: (3, 25) | Reward: -0.00168
Step 2 | Obs shape: (3, 25) | Reward: 0.00280
Step 3 | Obs shape: (3, 25) | Reward: 0.00405
Step 4 | Obs shape: (3, 25) | Reward: -0.00380
Step 5 | Obs shape: (3, 25) | Reward: 0.00035
Step 6 | Obs shape: (3, 25) | Reward: 0.00595
Step 7 | Obs shape: (3, 25) | Reward: 0.00453


In [9]:
# SB3 Policy Compatibility
# Train an MLP agent on env with return_sequences=False (flat). 

from stable_baselines3 import PPO

env = BaseSequenceAwareTradingEnv(
    df, feature_cols=feature_cols, episode_length=30, window_length=5, return_sequences=False
)
from stable_baselines3.common.vec_env import DummyVecEnv
vec_env = DummyVecEnv([lambda: env])

model = PPO("MlpPolicy", vec_env, n_steps=8, batch_size=4, verbose=0)
model.learn(total_timesteps=40)
print("SB3 PPO MLP works!")

Using cpu device
---------------------------
| time/              |    |
|    fps             | 25 |
|    iterations      | 1  |
|    time_elapsed    | 0  |
|    total_timesteps | 8  |
---------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 18          |
|    iterations           | 2           |
|    time_elapsed         | 0           |
|    total_timesteps      | 16          |
| train/                  |             |
|    approx_kl            | 0.009850413 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.1        |
|    explained_variance   | -33.8       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00883    |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0274     |
|    value_loss           | 0.0939      |
-----------------------------------------
-------------------------------

In [10]:
# Test 5: Transformer Policy Compatibility
# Make sure custom transformer can process the 2D obs by running a forward pass 
# through the extractor to check for shape errors


obs = np.random.randn(2, 5*8).astype(np.float32)  # batch=2, window_length=5, n_features=8
# Extractor expects (batch, window_length*n_features), will reshape internally.
extractor = TransformerExtractor(
    gym.spaces.Box(-np.inf, np.inf, shape=(5*8,), dtype=np.float32), 5, 8
)
with torch.no_grad():
    torch_out = extractor(torch.from_numpy(obs))
print("Transformer output shape:", torch_out.shape)


Transformer output shape: torch.Size([2, 32])


In [11]:
# Test 6: Action Space and Reward Consistency
# mini-episode ti check action output and cumulative reward:

env = BaseSequenceAwareTradingEnv(
    df, feature_cols=feature_cols, episode_length=10, window_length=5, return_sequences=False
)
obs, _ = env.reset()
cumulative = 0
for _ in range(10):
    action = env.action_space.sample()
    obs, reward, done, trunc, info = env.step(action)
    cumulative += reward
    if done:
        print("Episode finished | Cumulative reward:", cumulative)
        print("Info dict:", info)
        break

Episode finished | Cumulative reward: 0.013414064869331254
Info dict: {'episode_sharpe': 0.07957961511099951, 'episode_sortino': 0.21686155982717858, 'episode_total_reward': 0.013414064869331254, 'cumulative_return': 0.013552485095466027, 'calmar': 2.277073226063145, 'max_drawdown': 0.0059517124615693865, 'win_rate': 1.0, 'alpha': -0.014927406833922907, 'returns': array([-0.00067249, -0.00168237,  0.00279924, -0.0041545 ,  0.00380186,
        0.00044898, -0.00595171,  0.00463172,  0.01439334]), 'downside': array([-0.00067249, -0.00168237, -0.0041545 , -0.00595171])}


In [12]:
# Test 7: Episode Generator
# Check that the same seed produces the same episode list across runs.

env = BaseSequenceAwareTradingEnv(df, feature_cols=feature_cols, episode_length=10, window_length=5)
seq1 = env.generate_episode_sequences(train_steps=1000)
env2 = BaseSequenceAwareTradingEnv(df, feature_cols=feature_cols, episode_length=10, window_length=5)
seq2 = env2.generate_episode_sequences(train_steps=1000)
assert seq1 == seq2, "Episode sequences should be the same for same seed!"
print("Episode generator determinism OK")

Episode generator determinism OK


In [14]:
from src.env.realistic_synthetic_environment import realistic_synthetic_market_sample

In [15]:
synth_df = realistic_synthetic_market_sample()
synth_df.head()

Unnamed: 0,id,symbol,timestamp,date,open,high,low,close,volume,trade_count,...,vwap_change,trade_count_change,sector_id,industry_id,return_1d,vix,vix_norm,sp500,sp500_norm,market_return_1d
0,1,SYNTH,2022-01-01 05:00:00,2022-01-01,99.976862,100.234115,99.715568,99.878243,5281382,36083,...,0.000148,1.235928,8,51,0.001151,19.692173,-0.061565,4003.468478,0.086712,0.000596
1,2,SYNTH,2022-01-02 05:00:00,2022-01-02,100.015923,100.357089,99.515679,99.975101,3120798,80679,...,0.000148,1.235928,8,51,0.001909,19.195666,-0.160867,3985.664778,-0.358381,0.002774
2,3,SYNTH,2022-01-03 05:00:00,2022-01-03,100.051753,100.282568,99.978618,100.157867,6864185,52247,...,0.001518,-0.352409,8,51,0.001137,25.623677,1.124735,3989.162494,-0.270938,0.000185
3,4,SYNTH,2022-01-04 05:00:00,2022-01-04,100.108377,100.37218,99.794455,100.034944,4437793,54864,...,-0.000402,0.050089,8,51,-0.005435,17.953861,-0.409228,3999.072918,-0.023177,-0.003489
4,5,SYNTH,2022-01-05 05:00:00,2022-01-05,100.125001,100.469729,99.783784,100.230749,5463192,43558,...,0.000748,-0.206073,8,51,0.00371,19.772259,-0.045548,3998.807655,-0.029809,0.002478
