In [2]:
# SETUP: Imports & Paths ===========================
import jupyter
from src.utils.system import boot, Notify

boot()
import os
import joblib
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


from tqdm import tqdm

from src.data.feature_pipeline import basic_chart_features,load_base_dataframe
from src.predictability.easiness import rolling_sharpe, rolling_r2, rolling_info_ratio, rolling_autocorr
from src.predictability.pipeline import generate_universe_easiness_report
from IPython import display

from src.experiments.experiment_tracker import ExperimentTracker
from src.config import TOP2_STOCK_BY_SECTOR


from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
from scipy.stats import skew, kurtosis, entropy
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.stattools import acf, acovf
from src.defaults import TOP2_STOCK_BY_SECTOR


from tqdm import tqdm
from sklearn.preprocessing import  RobustScaler
from IPython.display import display

# FRAMEWORK STUFF =========================

from src.data.feature_pipeline import load_base_dataframe
from src.experiments.experiment_tracker import ExperimentTracker
from src.env.base_trading_env import CumulativeTradingEnv,AlphaTradingEnv
from src.env.base_timeseries_trading_env import SequenceAwareAlphaTradingEnv,SequenceAwareCumulativeTradingEnv
from src.agent.base_model import TransformerPpo
from src.defaults import RANDOM_SEEDS
from src.utils.db import ConfigurableMixin

import warnings
warnings.filterwarnings("ignore")


In [3]:

EXPERIENCE_NAME = "episode_learning_transferability"
FEATURE_COLS= ["return_1d", "volume", "vix"]
# Prepare your dataframes for two consecutive episodes:
SEEDS = RANDOM_SEEDS
EPISODE_LENGTH = 21
WINDOW_LENGTH = EPISODE_LENGTH*2
N_UPDATES = 10

excluded_tickers=['CEG', 'GEHC', 'GEV', 'KVUE', 'SOLV']
excluded_tickers.sort()

config={
    "excluded_tickers": excluded_tickers,
    "episode_length":EPISODE_LENGTH,
    "window_length":WINDOW_LENGTH,
    "n_updates":N_UPDATES,
    "agent":"TransformerPpoAgent",
    "environment":"SequenceAwareCumulativeTradingEnv"
}
run_settings={
    "excluded_tickers": excluded_tickers,
    "start_date": '2022-01-01',
    "end_date":"2026-01-01",
    "episode_length":EPISODE_LENGTH,
    "window_length":WINDOW_LENGTH,
    "n_updates":N_UPDATES
}

# Config section


In [None]:
# LOAD OHLCV ==========================================


ohlcv_df = load_base_dataframe()
ohlcv_df['date'] = pd.to_datetime(ohlcv_df['date'])
ohlcv_df = ohlcv_df[(ohlcv_df['date'] >= run_settings["start_date"]) & (ohlcv_df['date'] < run_settings["end_date"])]
ohlcv_df['month'] = ohlcv_df['date'].dt.to_period('M')
ohlcv_df['return_1d'] = ohlcv_df['return_1d'].fillna(0)
ohlcv_df['sector_id'] = ohlcv_df['sector_id'].fillna('unknown')
ohlcv_df['industry_id'] = ohlcv_df['industry_id'].fillna('unknown')

In [None]:
# SETUP ===================================
import jupyter
import warnings

from src.utils.system import boot, Notify



In [None]:
ohlcv_df = load_base_dataframe()
notification = Notify(EXPERIENCE_NAME)

In [None]:
import torch
import torch.nn as nn
import numpy as np
import gymnasium as gym
from sb3_contrib import RecurrentPPO
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.policies import ActorCriticPolicy
from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback
from sb3_contrib.common.recurrent.policies import RecurrentActorCriticPolicy

import torch
import torch.nn as nn
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor

def generate_causal_mask(seq_len):
    return torch.triu(torch.ones((seq_len, seq_len), dtype=torch.bool), diagonal=1)

def sinusoidal_positional_encoding(seq_len, d_model, device):
    """Returns a (seq_len, d_model) matrix with classic Transformer sin-cos encoding."""
    pe = torch.zeros(seq_len, d_model, device=device)
    position = torch.arange(0, seq_len, dtype=torch.float, device=device).unsqueeze(1)
    div_term = torch.exp(torch.arange(0, d_model, 2, device=device).float() * (-torch.log(torch.tensor(10000.0)) / d_model))
    pe[:, 0::2] = torch.sin(position * div_term)
    pe[:, 1::2] = torch.cos(position * div_term)
    return pe

class TransformerFeatureExtractor(BaseFeaturesExtractor):
    def __init__(self, observation_space, d_model=64, n_heads=4, n_layers=2):
        super().__init__(observation_space, features_dim=d_model)
        self.d_model = d_model
        input_dim = observation_space.shape[-1]
        self.input_proj = nn.Linear(input_dim, d_model)

        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=n_heads, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)

    def forward(self, obs):
        # obs: (batch, seq_len, input_dim)
        x = self.input_proj(obs)
        seq_len = x.size(1)
        device = x.device
        pe = sinusoidal_positional_encoding(seq_len, self.d_model, device)
        x = x + pe.unsqueeze(0)  # (1, seq_len, d_model) for broadcasting
        causal_mask = generate_causal_mask(seq_len).to(device)
        x = self.transformer(x, mask=causal_mask)
        pooled_output = x[:, -1]
        return pooled_output

# Transformer Policy ===================================
class TransformerPolicy(RecurrentActorCriticPolicy):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs,
                         features_extractor_class=TransformerFeatureExtractor,
                         features_extractor_kwargs=dict(
                             d_model=64, n_heads=4, n_layers=2
                         ))
        #self._build(self.lr_schedule)

# Regime Augmentation Wrapper ===========================
class RegimeAugmentingWrapper(gym.ObservationWrapper):
    def __init__(self, env):
        super().__init__(env)
        self.regime_dim = 3  # One-hot: bull, bear, sideways
        obs_shape = self.observation_space.shape
        self.observation_space = gym.spaces.Box(
            low=-np.inf, high=np.inf,
            shape=(obs_shape[0], obs_shape[1] + self.regime_dim),
            dtype=np.float32
        )

    def observation(self, obs):
        regime = self.env.get_current_regime()  # 0,1,2 -> bull,bear,sideways
        one_hot = np.zeros(self.regime_dim)
        one_hot[regime] = 1.0
        one_hot = np.repeat(one_hot[None, :], obs.shape[0], axis=0)
        return np.concatenate([obs, one_hot], axis=-1)

In [None]:
import numpy as np
import time
from datetime import datetime
import os
from scipy.stats import ttest_ind, mannwhitneyu
import json
import hashlib
def encode_hash(model_id,environment_id,train_episode_id,test_episode_id,n_updates):
    xx = json.dumps({
        "n_updates":n_updates,
        "model_id":model_id,
        "environment_id":environment_id,
        "train_episode_id":train_episode_id,
        "test_episode_id":test_episode_id
    }, sort_keys=True) 
    return hashlib.sha256(xx.encode()).hexdigest()
                         

def test_transferability(
    env_maker, agent_maker,df,
    episode_sequence_train, episode_sequence_test, feature_cols, episode_length,
    episode_id_train,episode_id_test,
    n_updates=8, window_length=10, random_seeds=[7,314],
    verbose=True
):
    """
    Train agent on episode A, test on episode B, N times (new seed each time).
    Compares performance to random on episode B.
    """
    agent_rewards_B, random_rewards_B = [], []
    total_timesteps = episode_length * n_updates
    run_results = []
  

 
    e = env_maker(1)
    a = agent_maker(e,1)
    environment_id = e.db_id
    model_id = a.db_id
    
    csv_path = EXPERIENCE_NAME + '.csv'
    run_hash = encode_hash(model_id,environment_id,episode_id_train,episode_id_test,n_updates)

    if os.path.exists(csv_path):
        df = pd.read_csv(csv_path)
    else:
        df = pd.DataFrame()
    
    if not df.empty and (df['hash'] ==run_hash).any():
        return [],[],0,0,{}
        #df = pd.concat([df, pd.DataFrame([row_dict])], ignore_index=True)
        #df.to_csv(csv_path, index=False)
        #print("Row appended.")
    
    for run in range(len(random_seeds)):
        seed = random_seeds[run]
        np.random.seed(seed)

        # --- Train agent on Episode A ---
        envA = env_maker(seed)
        envA.set_episode_sequence(episode_sequence_train)  # or your symbol/start tuple
   
        model = agent_maker(envA,seed)
        agent = model.agent
        agent.learn(total_timesteps=total_timesteps)
   

        # --- Test trained agent on Episode B ---
    
        envB = env_maker(seed)
        envB.set_episode_sequence(episode_sequence_test)
        obs, _ = envB.reset()
        rewards = []
        done = False
        while not done:
            action, _ = agent.predict(obs)
            obs, reward, done, truncated, info = envB.step(action)
            rewards.append(reward)
        agent_rewards_B.append(np.sum(rewards))

        # --- Random policy on Episode B (for reference) ---
        envB_rand = env_maker(seed)
        envB_rand.set_episode_sequence(episode_sequence_test)
        obs, _ = envB_rand.reset()
        rand_rewards = []
        done = False
        while not done:
            action = envB_rand.action_space.sample()
            obs, reward, done, truncated, info = envB_rand.step(action)
            rand_rewards.append(reward)
        random_rewards_B.append(np.sum(rand_rewards))

        run_config= {
            "train_episode_id":envA.db_id,
            "test_episode_id":envB.db_id,
            "agent_id":model.db_id,
            "test_rand_reward":np.sum(rand_rewards),
            "test_agent_reward":np.sum(rewards),
        }
        run_results.append(run_config)
        if verbose:
            print(f"[Run {run+1}/{len(random_seeds)}] Agent (on B): {agent_rewards_B[-1]:.4f}, Random (on B): {random_rewards_B[-1]:.4f}")

    # --- Statistical testing on Episode B ---
    agent_rewards_B = np.array(agent_rewards_B)
    random_rewards_B = np.array(random_rewards_B)

    t_stat, t_pval = ttest_ind(agent_rewards_B, random_rewards_B, equal_var=False)
    mw_stat, mw_pval = mannwhitneyu(agent_rewards_B, random_rewards_B, alternative='greater')

    print("\n==== Transferability Significance Results (Episode B) ====")
    print(f"Agent mean (B): {agent_rewards_B.mean():.4f} ± {agent_rewards_B.std():.4f}")
    print(f"Random mean (B): {random_rewards_B.mean():.4f} ± {random_rewards_B.std():.4f}")
    print(f"T-test p-value: {t_pval:.4g}")
    print(f"Mann-Whitney U p-value: {mw_pval:.4g}")
    outcome = ""
    if t_pval < 0.05:
        outcome = "Transferable: Agent outperforms random with **statistical significance** (t-test)!"
    else:
        outcome = "No significant transfer detected (t-test)."
    if mw_pval < 0.05:
        outcome = "Transferable: Agent outperforms random with **statistical significance** (Mann-Whitney U)!"
    else:
        outcome = "No significant transfer detected (Mann-Whitney U)."
    print(outcome)
    envA.episode_df.iloc[envA.window_length]['date']
    date_str = str(envA.episode_df.iloc[envA.window_length]['date'].date())
    train_date = date_str
    envB.episode_df.iloc[envB.window_length]['date']
    date_str = str(envB.episode_df.iloc[envB.window_length]['date'].date())
    test_date = date_str
    study_result = {
        "experience_name": EXPERIENCE_NAME,
        "config": config,
        "agent_id":model.db_id,
        "environment_id": envA.db_id,
        "train_date":train_date,
        "test_date":test_date,
        "run_settings": run_settings,
        "train_episode_id": episode_id_train,
        "test_episode_id": episode_id_test,
        "feature_cols": envA.feature_cols, 
        "episode_length": EPISODE_LENGTH,
        "window_length": WINDOW_LENGTH,
        "random_seeds": RANDOM_SEEDS,
        
        'advantage':  np.median(agent_rewards_B)-np.median(random_rewards_B),
        "agent_rewards_mean": np.median(agent_rewards_B),
        "agent_rewards_std": agent_rewards_B.std(),
        
        "random_rewards_mean": np.median(random_rewards_B),
        "random_rewards_std": random_rewards_B.std(),
        "total_timesteps":total_timesteps,
        "t_pval": float(t_pval),
        "mw_pval": float(mw_pval),
        
        "symbol":episode_sequence_train[0][0],
        "date": episode_sequence_train[0][1],
        "hash":run_hash,
        "agent_rewards":json.dumps(agent_rewards_B.tolist()),
        "random_rewards": json.dumps(random_rewards_B.tolist())
    }
    if t_pval < 1.05 and mw_pval < 1.05:
        notification.success(f"Favorable episode for {episode_sequence_train[0][0]} @ {train_date}")
        
    df = pd.concat([df, pd.DataFrame([study_result])], ignore_index=True)
    df.to_csv(csv_path, index=False)
    return agent_rewards_B, random_rewards_B, t_pval, mw_pval,study_result

In [None]:
def env_maker(seed):
    environment= SequenceAwareCumulativeTradingEnv(
        ohlcv_df,
        feature_cols=FEATURE_COLS,
        episode_length=EPISODE_LENGTH, 
        transaction_cost=0, 
        seed=seed, 
        window_length=WINDOW_LENGTH )
    return environment
    
def agent_maker(environment,seed):
    agent = TransformerPpo(
        RecurrentPPO,
        TransformerPolicy,
        environment,
        model_config={
            "n_steps":EPISODE_LENGTH,
            "batch_size":EPISODE_LENGTH,
            
            }, run_config={"seed":seed,"verbose":0})
    return agent

In [None]:
import pandas as pd
from datetime import datetime, timedelta

def month_ranges(start_date_str):
    # Parse start date
    start = pd.Timestamp(start_date_str).replace(day=1)
    # Get first day of *next* month after today
    today = pd.Timestamp.today().replace(hour=0, minute=0, second=0, microsecond=0)
    next_month = (today + pd.offsets.MonthBegin(1)).replace(day=1)
    final_month = (next_month + pd.offsets.MonthBegin(1)).replace(day=1)
    
    ranges = []
    d = start
    while d < final_month:
        next_d = (d + pd.offsets.MonthBegin(1)).replace(day=1)
        ranges.append([d.strftime('%Y-%m-%d'), next_d.strftime('%Y-%m-%d')])
        d = next_d
    return ranges




In [None]:
pairs = month_ranges('2024-01-01')
tickers = TOP2_STOCK_BY_SECTOR
for ticker in tickers:
    run_start_time = time.time()
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')
    notification.info(f"Train start for {ticker} | {timestamp}")
    
    for p in pairs:
        print('train '+ticker+' '+p[0]+' '+p[1])
        train_env = SequenceAwareCumulativeTradingEnv(
            ohlcv_df,
            feature_cols=FEATURE_COLS,
            episode_length=EPISODE_LENGTH, 
            transaction_cost=0, 
            seed=RANDOM_SEEDS[0], 
            window_length=WINDOW_LENGTH 
        )
        _,start_train,id_train = train_env.get_episode_by_start_date(ticker,p[0])
        _,start_test,id_test  = train_env.get_episode_by_start_date(ticker,p[1])

        train_sequence = [[ticker,start_train]]
        test_sequence = [[ticker,start_test]]
        agent_rewards, random_rewards, t_pval, mw_pval,results = test_transferability(
            env_maker,
            agent_maker,
            ohlcv_df,
            train_sequence,
            test_sequence,
            feature_cols=FEATURE_COLS,
            episode_length=EPISODE_LENGTH,
            random_seeds=RANDOM_SEEDS,
            n_updates=N_UPDATES,
            window_length=WINDOW_LENGTH,
            episode_id_train=id_train,
            episode_id_test=id_test
        )
   
    run_end_time = time.time()
    elapsed = run_end_time - run_start_time
    elapsed_str = f"{elapsed:.1f}s"
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')
 
    notification.info(f"Train complete for {ticker} | {timestamp} | Exec time: {elapsed_str}")


In [None]:
results= pd.read_csv(EXPERIENCE_NAME+'.csv')
results

In [None]:
xx
agent_rewards, random_rewards, t_pval, mw_pval,results = test_transferability(
    env_maker,
    agent_maker,
    ohlcv_df,
    train_sequence,
    test_sequence,
    feature_cols=FEATURE_COLS,
    episode_length=EPISODE_LENGTH,
    random_seeds=RANDOM_SEEDS,
    n_updates=N_UPDATES,
    window_length=WINDOW_LENGTH,
    episode_id_train=id_train,
    episode_id_test=id_test
)