In [1]:
import jupyter

  from pandas.core import (


In [2]:
import os
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import gymnasium as gym

from tqdm import tqdm
from pathlib import Path
from datetime import datetime
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import RobustScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
from scipy.stats import skew, kurtosis, entropy
from statsmodels.stats.diagnostic import acorr_ljungbox

from src.utils.system import boot
from src.defaults import RANDOM_SEEDS, TOP2_STOCK_BY_SECTOR
from tracker import OHLCV_DF, EpisodeTracker, EnvironmentTracker, AgentTracker



In [3]:
# Configuration ======================
excluded_tickers = sorted(['CEG', 'GEHC', 'GEV', 'KVUE', 'SOLV'])

config = {
    "regressor": "RandomForestRegressor",
    "n_estimators": 300,
    "random_state": 314,
    "transaction_cost": 0
}

run_settings = {
    "excluded_tickers": excluded_tickers,
    "min_samples": 10,
    "cv_folds": 3,
    "lags": 5,
    "start_date": "2023-12-31",
    "end_date": "2025-05-01",
    "seed": 314,
    "episode_length": 50,
    "noise_feature_cols": ["return_1d", "volume"],
    "train_steps": 50_000,
    "lookback": 0,
    
}

# System Boot =======================
DEVICE = boot()



In [4]:
# Load and Prepare Data ================
ohlcv_df = OHLCV_DF.copy()


# Filter Tickers ======================
tickers = ohlcv_df['symbol'].unique()
tickers = tickers[~np.isin(tickers, excluded_tickers)]
tickers = ["AAPL"]  # Force test with AAPL
#tickers = TOP2_STOCK_BY_SECTOR
# Load and prepare trackers
ep_tracker    = EpisodeTracker()
env_tracker   = EnvironmentTracker()
agent_tracker = AgentTracker()

In [23]:
ep = ep_tracker.findEpisode("2025-01-01","AAPL",120,"both")
train_df = ep['train']['df']

env = env_tracker.findEnvironment(
    "v4",
    {"lookback":0,
     #"scaling_strategy":"power",
     "n_timesteps":120,
     "market_features":[ "close", "price_change", "volume_change"]},
    ticker="AAPL",
    start_idx=ep['train']['df_start_iloc']
)
agent = agent_tracker.findAgent('PPO','MlpPolicy',{})

In [17]:
ep['train']
model = agent["model"].boot(env['environment'])
model

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


<stable_baselines3.ppo.ppo.PPO at 0x1881585cd50>

In [7]:
model.learn(total_timesteps=10_000)
#env['environment'].reset()

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 119      |
|    ep_rew_mean     | -0.293   |
| time/              |          |
|    fps             | 368      |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 119          |
|    ep_rew_mean          | -0.227       |
| time/                   |              |
|    fps                  | 375          |
|    iterations           | 2            |
|    time_elapsed         | 10           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0039481735 |
|    clip_fraction        | 0.00503      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.69        |
|    explained_variance   | -0.107       |
|    learning_r

<stable_baselines3.ppo.ppo.PPO at 0x18822e54a90>

In [19]:
env

{'env_id': 13,
 'id': 13,
 'version': 'v2',
 'config': {'lookback': 0,
  'market_features': ['close', 'price_change', 'volume_change'],
  'n_timesteps': 120,
  'seed': 314,
  'ticker': 'AAPL',
  'start_idx': 631},
 'environment': <environments.PositionTradingEnvV2 at 0x18813f28890>}

In [9]:
# Helper functions ====================
import numpy as np

def compute_returns_curve(curve):
    returns = np.diff(curve) / curve[:-1]
    return returns

def sharpe_ratio(returns, risk_free_rate=0.0):
    excess = returns - risk_free_rate
    return np.mean(excess) / (np.std(excess) + 1e-8)

def sortino_ratio(returns, risk_free_rate=0.0):
    returns = np.array(returns)
    excess = returns - risk_free_rate
    downside = excess[excess < 0]
    
    # Avoid division by zero: if no downside, assume very small downside deviation
    if len(downside) == 0:
        downside_std = 1e-8
    else:
        downside_std = np.std(downside)
    
    return np.mean(excess) / downside_std


def calmar_ratio(returns_curve):
    total_return = returns_curve[-1] / returns_curve[0] - 1
    drawdown = np.maximum.accumulate(returns_curve) - returns_curve
    max_drawdown = np.max(drawdown) / returns_curve[0]
    return total_return / (max_drawdown + 1e-8)

def central_tendency_difference (mean,median,std):
    return abs(mean-median)/(abs(std))

In [10]:
import os
import json
import hashlib
from collections import defaultdict
STORAGE_PATH = "data/experiments/episode_benchmark_engine/runs.csv"


EXCLUDED_TICKERS = sorted(["CEG", "GEHC", "GEV", "KVUE", "SOLV"])

CONFIG = {
    "regressor": "RandomForestRegressor",
    "n_estimators": 300,
    "random_state": 314,
    "transaction_cost": 0,
}
LOOKBACK = 0
EPISODE_LENGTH = 50

RUN_SETTINGS = {
    "excluded_tickers": EXCLUDED_TICKERS,
    "cv_folds": 3,
    "lags": 5,
    "seed": 314,
    'total_timesteps':50_000,
    "episode": {
        "episode_length": EPISODE_LENGTH,
        "lookback": LOOKBACK,
    },
    "environment": {
        "market_features": ["close", "price_change", "volume_change"],
        "version": "v2",
        "lookback": LOOKBACK,
        "episode_length": EPISODE_LENGTH,
        "transaction_cost": 0,
    },
    "agent": {
        "model_class": "PPO",
        "policy_class": "MlpPolicy",
        "config": {
            "verbose": 1,
            "ent_coef":0.1,
            "policy_kwargs": 
                {
                
                    "net_arch": [64, 64]
                    }
                },
    },
}


class EpisodeBenchmark:
    def __init__(
        self,
        tickers=["AAPL"],
        config=CONFIG,
        run_settings=RUN_SETTINGS,
        start_date="2024-01-01",
    ):
        self.ohlcv_df = OHLCV_DF.copy()
        self.tickers = tickers  # Force test with AAPL
        self.start_date = start_date

        self.config = CONFIG
        self.run_settings = RUN_SETTINGS
        self.run_settings['environment']['market_features'].sort()
        self.ep_tracker = EpisodeTracker()
        self.env_tracker = EnvironmentTracker()
        self.agent_tracker = AgentTracker()
        
        self.boot()
        
    def boot(self):
        if os.path.exists(STORAGE_PATH):
            self.completed_runs_df = pd.read_csv(STORAGE_PATH)

            # Fix: use self.completed_runs_df, not df
            self.completed_hashes = set(self.completed_runs_df["run_hash"].unique())
            self.seen_seeds = defaultdict(set)
            for _, row in self.completed_runs_df.iterrows():
                self.seen_seeds[row["run_hash"]].add(row["seed"])
        else:
            self.completed_runs_df = pd.DataFrame()
            self.completed_hashes = set()
            self.seen_seeds = defaultdict(set)

    def compute_run_hash(self, agent_id, train_episode_id,train_environment_id):
        market_features =self.run_settings['environment']['market_features']
        market_features.sort()
        payload = {
            "agent_id": agent_id,
            "episode_id": train_episode_id,
            "environment_id":train_environment_id,
            "timesteps": self.run_settings['total_timesteps'],
            "lookback":self.run_settings['episode']['lookback'],
            "episode_length":self.run_settings['episode']['episode_length'],
            "market_features":json.dumps(market_features)
        }
        return hashlib.md5(json.dumps(payload, sort_keys=True).encode()).hexdigest()
    
    def extract_agent_diagnostics(self,env, model, mode="train"):
        """
        Runs agent through environment and extracts residual diagnostics
        from reward trajectory, wallet progression, and optionally oracle and market.
        """
        rewards = []
        residuals_oracle = []
        obs = env.reset()[0]
        done = False

        oracle_progress = []
        while not done:
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done, _, info = env.step(action)
            rewards.append(reward)

            # Oracle fallback
            oracle_score = info.get("oracle_score", None)
            if oracle_score is not None:
                oracle_progress.append(oracle_score)
                residuals_oracle.append(oracle_score - reward)
            else:
                oracle_progress.append(abs(reward))

        # Agent vs Oracle residuals
        if residuals_oracle:
            r_oracle = np.array(residuals_oracle)
        else:
            smoothed = pd.Series(rewards).rolling(window=5, min_periods=1).mean()
            r_oracle = np.array(rewards) - smoothed.values

        # Agent vs Market residuals
        agent_returns = np.array(env.wallet_progress)
        market_returns = np.array(env.market_progress)
        r_market = agent_returns - market_returns

        # Daily returns
        returns = pd.Series(agent_returns).pct_change().dropna().values
        market_pct_returns = pd.Series(market_returns).pct_change().dropna().values

        diagnostics = {
            f"{mode}_total_reward": env.total_reward,
            f"{mode}_wallet":env.wallet,
            f"{mode}_market":env.market_progress[-1],

            # Residuals vs Oracle
            f"{mode}_resid_oracle_std": np.std(r_oracle),
            f"{mode}_resid_oracle_skew": skew(r_oracle),
            f"{mode}_resid_oracle_kurtosis": kurtosis(r_oracle),
            f"{mode}_resid_oracle_acf1": pd.Series(r_oracle).autocorr(lag=1),
            f"{mode}_resid_oracle_mean": np.mean(r_oracle),
            f"{mode}_resid_oracle_median": np.median(r_oracle),
            f"{mode}_resid_oracle_max": np.max(r_oracle),
            f"{mode}_resid_oracle_min": np.min(r_oracle),
            f"{mode}_ljung_oracle_pval": (
                acorr_ljungbox(r_oracle, lags=[min(10, len(r_oracle) - 1)], return_df=True).iloc[0]['lb_pvalue']
                if len(r_oracle) > 10 else np.nan
            ),

            # Residuals vs Market
            f"{mode}_resid_market_std": np.std(r_market),
            f"{mode}_resid_market_skew": skew(r_market),
            f"{mode}_resid_market_kurtosis": kurtosis(r_market),
            f"{mode}_resid_market_acf1": pd.Series(r_market).autocorr(lag=1),
            f"{mode}_resid_market_mean": np.mean(r_market),
            f"{mode}_resid_market_median": np.median(r_market),
            f"{mode}_resid_market_max": np.max(r_market),
            f"{mode}_resid_market_min": np.min(r_market),
            f"{mode}_ljung_market_pval": (
                acorr_ljungbox(r_market, lags=[min(10, len(r_market) - 1)], return_df=True).iloc[0]['lb_pvalue']
                if len(r_market) > 10 else np.nan
            ),

            # Financial performance
            f"{mode}_sharpe": sharpe_ratio(returns),
            f"{mode}_sortino": sortino_ratio(returns),
            f"{mode}_calmar": calmar_ratio(agent_returns),
            f"{mode}_market_sharpe": sharpe_ratio(market_pct_returns),
            f"{mode}_market_sortino": sortino_ratio(market_pct_returns),
            f"{mode}_market_calmar": calmar_ratio(market_returns),
        }

        return diagnostics


    def run(self, tickers=None):
        # Configurations =============================
        config = self.config
        run_settings = self.run_settings

        # Feature Extraction Loop ====================
        features, targets, metadata, runs = [], [], [], []
        ohlcv_df = self.ohlcv_df.copy()

        if tickers == None:
            tickers = self.tickers
        
        seed = 314
        boot(seed)
        
        for symbol in tqdm(tickers):
            df = ohlcv_df[ohlcv_df["symbol"] == symbol].sort_values("date").copy()
            df = df[df["date"] > self.start_date]
            df = df.iloc[: -self.run_settings["episode"]["episode_length"]]
            months = df["month"].unique()
            
            for i  in range(len(months)):
                try:

                    target_date = str(months[i]) + "-01"
                    
                    episodes = self.ep_tracker.findEpisode(
                        target_date,
                        symbol,
                        episode_length=self.run_settings["episode"]["episode_length"],
                        lookback=self.run_settings["episode"]["lookback"],
                        mode="both",
                    )

                    train_episode = episodes["train"]
                    test_episode = episodes["test"]

                    env_tracker = EnvironmentTracker()

                    train_env_config = {
                        "ticker": symbol,
                        "n_timesteps": self.run_settings["episode"]["episode_length"],
                        "lookback": self.run_settings["episode"]["lookback"],
                        "market_features":self.run_settings['environment']['market_features'],
                        "seed": seed,
                        "start_idx": train_episode["df_start_iloc"],  # type: ignore
                    }
                    test_env_config = train_env_config.copy()
                    test_env_config["start_idx"] = test_episode["df_start_iloc"] # type: ignore

                    env_info = env_tracker.findEnvironment(
                        version="v2", config=train_env_config
                    )
                    
                    train_env = env_info["environment"]
                    #train_config["start_idx"] = test_episode["start_idx"]
                    
                    test_env = env_tracker.findEnvironment(
                        version="v2", config=test_env_config
                    )
                   
                    test_env = test_env["environment"]

                    tracker = AgentTracker()
                    
                    agent = tracker.findAgent(
                        **self.run_settings['agent']
                   
                    )
                    
                    run_hash = self.compute_run_hash(
                        agent_id=agent["id"],
                        train_episode_id=train_episode["id"],
                        train_environment_id=env_info['id']
                    )
                    if run_hash in self.completed_hashes and seed in self.seen_seeds[run_hash]:
                        continue  # Skip
                        
                    _model = agent["model"].boot(train_env)
                    _model.learn(total_timesteps=self.run_settings['total_timesteps'])
                    
                    # diagnostics 
                     
                    train_diagnostics =self.extract_agent_diagnostics(train_env,_model,mode="train")
                    test_diagnostics =self.extract_agent_diagnostics(test_env,_model,mode="test")
                    full_diagnostics = {
                        **train_diagnostics,
                        **test_diagnostics
                    }
                    #ddf.append(full_diagnostics)
                    results =  {
                        'run_hash':run_hash,
                        'seed':seed,
                        'target_date':target_date,
                            "agent":self.run_settings['agent']['model_class'],
                            "policy":self.run_settings['agent']['policy_class'],
                            "env_version":env_info['version'],
                            "train_episode_id": train_episode["id"],
                            "test_episode_id":  test_episode["id"],
                            "total_timesteps": self.run_settings['total_timesteps'],
                            "ticker": symbol,
                            "target_date": target_date,
                            "environment_id": env_info["id"],
                            "agent_id": agent["id"],
                            "episode_length":self.run_settings['episode']['episode_length'],
                            "lookback":self.run_settings['episode']['lookback'],
                            "market_features":json.dumps(self.run_settings['environment']['market_features']),
                            **full_diagnostics
                        }
                    if run_hash in self.completed_hashes and seed in self.seen_seeds[run_hash]:
                        print(f"Skipping already completed run {run_hash} with seed {seed}")
                    else:
                        self.completed_runs_df = pd.concat([self.completed_runs_df, pd.DataFrame([results])], ignore_index=True)
                        self.completed_hashes.add(run_hash)
                        self.seen_seeds[run_hash].add(seed)
                        self.completed_runs_df.to_csv(STORAGE_PATH,index=False)

                    runs.append(
                        {
                            "agent":self.run_settings['agent']['model_class'],
                            "policy":self.run_settings['agent']['policy_class'],
                            "env_version":"v2",
                     
                            "train_episode_id": train_episode["id"],
                            "test_episode_id":  test_episode["id"],
                            "total_timesteps": self.run_settings['total_timesteps'],
                            "ticker": symbol,
                            "target_date": target_date,
                            "environment_id": env_info["id"],
                            "agent_id": agent["id"],
                            "model": _model,
                            "train_env": train_env,
                            "test_env": test_env,
                            **full_diagnostics
                        }
                    )
               
                    print('next')
                except Exception as e:
                    print(f"Skipping {symbol} {months[i]} due to error: {e}")
             
        return runs    


In [20]:
eb = EpisodeBenchmark(tickers=TOP2_STOCK_BY_SECTOR)

In [21]:
#runs = eb.run()
eb.extract_agent_diagnostics(env['environment'],model)

{'train_total_reward': -0.2112629259249607,
 'train_wallet': 1,
 'train_market': 1.123126949949466,
 'train_resid_oracle_std': 0.024695197241272563,
 'train_resid_oracle_skew': 1.3407164336676156,
 'train_resid_oracle_kurtosis': 8.731594161080057,
 'train_resid_oracle_acf1': 0.04875954464630813,
 'train_resid_oracle_mean': 0.00045379600177894815,
 'train_resid_oracle_median': -0.0012578296121428774,
 'train_resid_oracle_max': 0.13934752896326452,
 'train_resid_oracle_min': -0.09039298601881147,
 'train_ljung_oracle_pval': 0.023079967734685555,
 'train_resid_market_std': 0.04563653121163207,
 'train_resid_market_skew': -0.9359093868439381,
 'train_resid_market_kurtosis': 0.7607640259387449,
 'train_resid_market_acf1': 0.9613263481148321,
 'train_resid_market_mean': -0.00804295880148433,
 'train_resid_market_median': 0.0008788504635935945,
 'train_resid_market_max': 0.08937909214747108,
 'train_resid_market_min': -0.1381992354000967,
 'train_ljung_market_pval': 1.6997133151070983e-120,
 

In [None]:
"""
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 119      |
|    ep_rew_mean     | -0.293   |
| time/              |          |
|    fps             | 368      |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 119          |
|    ep_rew_mean          | -0.227       |
| time/                   |              |
|    fps                  | 375          |
|    iterations           | 2            |
|    time_elapsed         | 10           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0039481735 |
|    clip_fraction        | 0.00503      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.69        |
|    explained_variance   | -0.107       |
|    learning_rate        | 0.0003       |
|    loss                 | 0.146        |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00425     |
|    value_loss           | 0.253        |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 119         |
|    ep_rew_mean          | -0.0435     |
| time/                   |             |
|    fps                  | 344         |
|    iterations           | 3           |
|    time_elapsed         | 17          |
|    total_timesteps      | 6144        |
| train/                  |             |
|    approx_kl            | 0.004291957 |
|    clip_fraction        | 0.00679     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.688      |
|    explained_variance   | 0.169       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0454     |
|    n_updates            | 20          |
|    policy_gradient_loss | -0.00449    |
|    value_loss           | 0.221       |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 119         |
|    ep_rew_mean          | 0.179       |
| time/                   |             |
|    fps                  | 353         |
|    iterations           | 4           |
|    time_elapsed         | 23          |
|    total_timesteps      | 8192        |
| train/                  |             |
|    approx_kl            | 0.008619598 |
|    clip_fraction        | 0.0177      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.684      |
|    explained_variance   | 0.14        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0335     |
|    n_updates            | 30          |
|    policy_gradient_loss | -0.00589    |
|    value_loss           | 0.206       |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 119          |
|    ep_rew_mean          | 0.386        |
| time/                   |              |
|    fps                  | 353          |
|    iterations           | 5            |
|    time_elapsed         | 28           |
|    total_timesteps      | 10240        |
| train/                  |              |
|    approx_kl            | 0.0033814711 |
|    clip_fraction        | 0.0268       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.679       |
|    explained_variance   | 0.103        |
|    learning_rate        | 0.0003       |
|    loss                 | 9.68e-05     |
|    n_updates            | 40           |
|    policy_gradient_loss | -0.00903     |
|    value_loss           | 0.169        |
------------------------------------------
<stable_baselines3.ppo.ppo.PPO at 0x18822e54a90>

{'train_total_reward': -0.2112629259249607,
 'train_wallet': 1,
 'train_market': 1.123126949949466,
 'train_resid_oracle_std': 0.024695197241272563,
 'train_resid_oracle_skew': 1.3407164336676156,
 'train_resid_oracle_kurtosis': 8.731594161080057,
 'train_resid_oracle_acf1': 0.04875954464630813,
 'train_resid_oracle_mean': 0.00045379600177894815,
 'train_resid_oracle_median': -0.0012578296121428774,
 'train_resid_oracle_max': 0.13934752896326452,
 'train_resid_oracle_min': -0.09039298601881147,
 'train_ljung_oracle_pval': 0.023079967734685555,
 'train_resid_market_std': 0.04563653121163207,
 'train_resid_market_skew': -0.9359093868439381,
 'train_resid_market_kurtosis': 0.7607640259387449,
 'train_resid_market_acf1': 0.9613263481148321,
 'train_resid_market_mean': -0.00804295880148433,
 'train_resid_market_median': 0.0008788504635935945,
 'train_resid_market_max': 0.08937909214747108,
 'train_resid_market_min': -0.1381992354000967,
 'train_ljung_market_pval': 1.6997133151070983e-120,
 'train_sharpe': 0.0,
 'train_sortino': 0.0,
 'train_calmar': 0.0,
 'train_market_sharpe': 0.08434086382473578,
 'train_market_sortino': 0.10215646954792426,
 'train_market_calmar': 1.0155852732451096}

# V4 ============================================
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 119      |
|    ep_rew_mean     | -0.293   |
| time/              |          |
|    fps             | 368      |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 119          |
|    ep_rew_mean          | -0.227       |
| time/                   |              |
|    fps                  | 375          |
|    iterations           | 2            |
|    time_elapsed         | 10           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0039481735 |
|    clip_fraction        | 0.00503      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.69        |
|    explained_variance   | -0.107       |
|    learning_rate        | 0.0003       |
|    loss                 | 0.146        |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00425     |
|    value_loss           | 0.253        |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 119         |
|    ep_rew_mean          | -0.0435     |
| time/                   |             |
|    fps                  | 344         |
|    iterations           | 3           |
|    time_elapsed         | 17          |
|    total_timesteps      | 6144        |
| train/                  |             |
|    approx_kl            | 0.004291957 |
|    clip_fraction        | 0.00679     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.688      |
|    explained_variance   | 0.169       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0454     |
|    n_updates            | 20          |
|    policy_gradient_loss | -0.00449    |
|    value_loss           | 0.221       |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 119         |
|    ep_rew_mean          | 0.179       |
| time/                   |             |
|    fps                  | 353         |
|    iterations           | 4           |
|    time_elapsed         | 23          |
|    total_timesteps      | 8192        |
| train/                  |             |
|    approx_kl            | 0.008619598 |
|    clip_fraction        | 0.0177      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.684      |
|    explained_variance   | 0.14        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0335     |
|    n_updates            | 30          |
|    policy_gradient_loss | -0.00589    |
|    value_loss           | 0.206       |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 119          |
|    ep_rew_mean          | 0.386        |
| time/                   |              |
|    fps                  | 353          |
|    iterations           | 5            |
|    time_elapsed         | 28           |
|    total_timesteps      | 10240        |
| train/                  |              |
|    approx_kl            | 0.0033814711 |
|    clip_fraction        | 0.0268       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.679       |
|    explained_variance   | 0.103        |
|    learning_rate        | 0.0003       |
|    loss                 | 9.68e-05     |
|    n_updates            | 40           |
|    policy_gradient_loss | -0.00903     |
|    value_loss           | 0.169        |
------------------------------------------

{'train_total_reward': 1.8404246654035108,
 'train_wallet': 1.2311124381229703,
 'train_market': 1.2610675191320353,
 'train_resid_oracle_std': 0.1649787826127421,
 'train_resid_oracle_skew': 7.100542170671588,
 'train_resid_oracle_kurtosis': 71.28572593394061,
 'train_resid_oracle_acf1': -0.03614584193924006,
 'train_resid_oracle_mean': -6.761922913547766e-05,
 'train_resid_oracle_median': -0.0005808469933746441,
 'train_resid_oracle_max': 1.5933017925434414,
 'train_resid_oracle_min': -0.4090312514457644,
 'train_ljung_oracle_pval': 0.43397976725966475,
 'train_resid_market_std': 0.004207810947213421,
 'train_resid_market_skew': 3.914946935632663,
 'train_resid_market_kurtosis': 19.557611931096172,
 'train_resid_market_acf1': 0.6085714183637173,
 'train_resid_market_mean': -0.004342682837053663,
 'train_resid_market_median': -0.006046241177076084,
 'train_resid_market_max': 0.022354143814353078,
 'train_resid_market_min': -0.006476324087659302,
 'train_ljung_market_pval': 4.222512971005281e-58,
 'train_sharpe': 0.09010650230498658,
 'train_sortino': 0.32005405023559785,
 'train_calmar': 1.8007265566029227,
 'train_market_sharpe': 0.09126171812151522,
 'train_market_sortino': 0.32781071367581516,
 'train_market_calmar': 1.8345812189354533}
<stable_baselines3.ppo.ppo.PPO at 0x18822e54a90>
"""

In [None]:
xxxx

In [None]:
pddf= pd.DataFrame(runs)
pddf['train_episode_id'],pddf['test_episode_id']

In [None]:
cols_to_drop = [col for col in pddf.columns if col.startswith('test_') and col != 'test_total_reward']
pddf_cleaned = pddf.drop(columns=cols_to_drop)

# Now you can compute correlation
correlations = pddf_cleaned.corr(numeric_only=True)['test_total_reward'].sort_values(ascending=False)
correlations

In [None]:
pddf[['train_wallet','test_wallet','train_market','test_market','test_total_reward',"train_total_reward",
"train_sortino"                  ,
"train_calmar"                   ,
"train_resid_market_acf1"        ,
"train_resid_oracle_kurtosis"    ,
"train_sharpe"                   ,
"train_resid_oracle_max"         ]]

In [None]:
ppdf['train_episode_id']