# 01 - Environment Reward System Ablation Test

1. Compute rolling predictability metrics for each ticker
2. Visualize and compare scores across universe and time
3. Select top-N most “learnable” tickers for RL agent
4. Document all decisions, assumptions, and open questions


In [2]:
import jupyter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind, mannwhitneyu
from src.env.base_trading_env import CumulativeTradingEnv


from src.utils.system import boot
import os
import numpy as np
import pandas as pd

boot()
from tqdm import tqdm
from src.data.feature_pipeline import load_base_dataframe
from src.predictability.easiness import rolling_sharpe, rolling_r2, rolling_info_ratio, rolling_autocorr
from src.predictability.pipeline import generate_universe_easiness_report
from IPython import display

from stable_baselines3 import PPO
from src.experiments.experiment_tracker import ExperimentTracker  
from src.env.base_trading_env import (  
    BaseTradingEnv, SharpeTradingEnv, SortinoTradingEnv, AlphaTradingEnv,
    DrawdownTradingEnv, CumulativeTradingEnv, CalmarTradingEnv, HybridTradingEnv,BaselineTradingAgent
)

# ---- 1. Define features ----
MARKET_FEATURES = [
    "day_of_month", "day_of_week", "order_flow", "candle_body",
    "upper_shadow", "lower_shadow", "price_change", "candle_change",
    "order_flow_change", "overnight_price_change", "volume_change",
    "vwap_change", "trade_count_change", "return_1d", "vix_norm", "market_return_1d"
]
INTERNAL_FEATURES = [
    "position", "holding_period", "cumulative_reward", "pct_time", 
    "drawdown", "rel_perf", "unrealized_pnl", "entry_price", "time_in_position"
]

ablation_variants = [
    ("all_features", MARKET_FEATURES, INTERNAL_FEATURES),
    ("market_only", MARKET_FEATURES, []),
    ("internal_only", [], INTERNAL_FEATURES),
]
for f in INTERNAL_FEATURES:
    ablation_variants.append((f"no_{f}", MARKET_FEATURES, [i for i in INTERNAL_FEATURES if i != f]))

EXPERIMENT_NAME = "walkforward_ablation_base_env_internal_features"
EXCLUDED_TICKERS = ['CEG', 'GEHC', 'GEV', 'KVUE', 'SOLV']



TOTAL_TIMESTEPS=20000
EPISODE_LENGTH = 50
TOTAL_TRAIN_EPISODES = int(TOTAL_TIMESTEPS/EPISODE_LENGTH )+1
TOTAL_TEST_EPISODES = 5

SEED = 314
TRANSACTION_COST=0#0.0001

# --- Walk-forward Splits ---
walk_forward_splits = [
    ("2023-01-01", "2023-07-01", "2023-09-01", "2023-12-01"),
    ("2024-01-01", "2024-07-01", "2024-09-01", "2024-12-01"),
]

# --- Ablation Variants ---

CONFIG = {
    "feature_cols":ALL_FEATURES,
    "total_train_episodes":TOTAL_TRAIN_EPISODES,
    "total_test_episodes":TOTAL_TEST_EPISODES,
    "episode_length":EPISODE_LENGTH,
    "seed":SEED,
    "transaction_cost":TRANSACTION_COST,
    "total_timesteps":TOTAL_TIMESTEPS,
    "agent":"PPO"
}

# LOAD OHLCV ==========================================
ohlcv_df = load_base_dataframe()
ohlcv_df.tail()
_ohlcv=ohlcv_df.copy()

# Drop-one ablations
for f in INTERNAL_FEATURES:
    ablation_variants.append((f"no_{f}", MARKET_FEATURES + [i for i in INTERNAL_FEATURES if i != f]))

# ---- 3. Walk-forward evaluation function ----
def evaluate_feature_set(feature_set, config, walk_forward_splits, RL_ENV_CLASS, BaselineAgentClass, ohlcv_df, EXCLUDED_TICKERS, EPISODE_LENGTH, TOTAL_TRAIN_EPISODES, TOTAL_TEST_EPISODES, SEED):
    all_rows = []
    for split_idx, (train_start, train_end, test_start, test_end) in enumerate(walk_forward_splits):
        # Data splits
        df_train = ohlcv_df[(ohlcv_df['date'] >= train_start) & (ohlcv_df['date'] < train_end) & ~ohlcv_df['symbol'].isin(EXCLUDED_TICKERS)].reset_index(drop=True)
        df_test = ohlcv_df[(ohlcv_df['date'] >= test_start) & (ohlcv_df['date'] < test_end) & ~ohlcv_df['symbol'].isin(EXCLUDED_TICKERS)].reset_index(drop=True)

        # Deterministic episode generator (use your actual logic here)
        def generate_episode_sequences(df, episode_length, n_episodes, excluded_tickers, seed=314):
            rng = np.random.default_rng(seed)
            eligible_tickers = [t for t in df['symbol'].unique() if t not in excluded_tickers]
            sequences = []
            for _ in range(n_episodes):
                ticker = rng.choice(eligible_tickers)
                stock_df = df[df['symbol'] == ticker]
                max_start = len(stock_df) - episode_length - 1
                if max_start < 1: continue
                start_idx = rng.integers(0, max_start)
                sequences.append((ticker, int(start_idx)))
            return sequences

        split_seed = int(pd.Timestamp(test_start).timestamp())
        train_seq = generate_episode_sequences(df_train, EPISODE_LENGTH, TOTAL_TRAIN_EPISODES, EXCLUDED_TICKERS, seed=split_seed)
        test_seq = generate_episode_sequences(df_test, EPISODE_LENGTH, TOTAL_TEST_EPISODES, EXCLUDED_TICKERS, seed=split_seed + 1)

        # RL Agent Training (PPO) -- instantiate env with current features
        train_env = RL_ENV_CLASS(df_train, feature_cols=feature_set, episode_length=EPISODE_LENGTH, seed=SEED)
        train_env.set_episode_sequence(train_seq)
        test_env = RL_ENV_CLASS(df_test, feature_cols=feature_set, episode_length=EPISODE_LENGTH, seed=SEED)
        test_env.set_episode_sequence(test_seq)

        # Train PPO agent
        from stable_baselines3 import PPO
        agent = PPO("MlpPolicy", train_env, verbose=0, n_steps=EPISODE_LENGTH, seed=SEED)
        agent.learn(total_timesteps=config['total_timesteps'])

        # Evaluate PPO agent
        def evaluate_env(env, agent, n_episodes, agent_type):
            metrics = []
            for _ in range(n_episodes):
                obs, _ = env.reset()
                done = False
                while not done:
                    action, _ = agent.predict(obs, deterministic=True)
                    obs, reward, done, truncated, info = env.step(action)
                info = info.copy()
                info['agent'] = agent_type
                metrics.append(info)
            return metrics

        rl_metrics = evaluate_env(test_env, agent, len(test_seq), "RL")

        # Evaluate random agent
        def evaluate_random(env, n_episodes):
            metrics = []
            for _ in range(n_episodes):
                obs, _ = env.reset()
                done = False
                while not done:
                    action = env.action_space.sample()
                    obs, reward, done, truncated, info = env.step(action)
                info = info.copy()
                info['agent'] = "Random"
                metrics.append(info)
            return metrics

        random_metrics = evaluate_random(test_env, len(test_seq))
        # Store all results
        for row in rl_metrics + random_metrics:
            row.update({"split": f"{test_start}--{test_end}"})
            all_rows.append(row)
    return pd.DataFrame(all_rows)

# ---- 4. Run ablation study ----
results_table = []
for ablation_name, feature_set in ablation_variants:
    print(f"\n==== Ablation: {ablation_name} ====")
    ablation_df = evaluate_feature_set(
        feature_set=feature_set,
        config=CONFIG,
        walk_forward_splits=walk_forward_splits,
        RL_ENV_CLASS=CumulativeTradingEnv,  # Use the chosen reward/env for now
        BaselineAgentClass=None,  # If needed for more baselines
        ohlcv_df=ohlcv_df,
        EXCLUDED_TICKERS=EXCLUDED_TICKERS,
        EPISODE_LENGTH=EPISODE_LENGTH,
        TOTAL_TRAIN_EPISODES=TOTAL_TRAIN_EPISODES,
        TOTAL_TEST_EPISODES=TOTAL_TEST_EPISODES,
        SEED=SEED,
    )
    ablation_df["ablation"] = ablation_name
    results_table.append(ablation_df)

# ---- 5. Combine all results
feature_ablation_df = pd.concat(results_table, ignore_index=True)

# ---- 6. Analyze and visualize ----
summary = []
for ablation, group in feature_ablation_df.groupby("ablation"):
    rl_sharpes = group.loc[group['agent']=='RL', 'episode_sharpe'].dropna()
    random_sharpes = group.loc[group['agent']=='Random', 'episode_sharpe'].dropna()
    mean_rl = rl_sharpes.mean() if len(rl_sharpes) > 0 else np.nan
    mean_random = random_sharpes.mean() if len(random_sharpes) > 0 else np.nan
    t_p = np.nan
    u_p = np.nan
    if len(rl_sharpes) > 1 and len(random_sharpes) > 1:
        t_stat, t_p = ttest_ind(rl_sharpes, random_sharpes, equal_var=False)
        u_stat, u_p = mannwhitneyu(rl_sharpes, random_sharpes, alternative='greater')
    summary.append({
        "ablation": ablation,
        "mean_sharpe_rl": mean_rl,
        "mean_sharpe_random": mean_random,
        "t_pvalue": t_p,
        "u_pvalue": u_p,
        "RL>Random": mean_rl > mean_random,
    })
summary_df = pd.DataFrame(summary)
summary_df = summary_df.sort_values(by="mean_sharpe_rl", ascending=False)

# ---- 7. Visualization ----
plt.figure(figsize=(12,8))
sns.barplot(x='mean_sharpe_rl', y='ablation', data=summary_df)
plt.title("Feature Ablation: Mean RL Sharpe by Feature Set")
plt.xlabel("Mean Test Sharpe (RL Agent)")
plt.ylabel("Ablation (Feature Set)")
plt.tight_layout()
plt.show()

print("\n### Feature Ablation Summary Table:")
print(summary_df.to_markdown(index=False, floatfmt=".3g"))



==== Ablation: all_features ====


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=50 and n_envs=1)


KeyError: "['position', 'holding_period', 'cumulative_reward', 'pct_time', 'drawdown', 'rel_perf', 'unrealized_pnl', 'entry_price', 'time_in_position'] not in index"