In [1]:
!pip install sb3-contrib torch

Collecting sb3-contrib
  Downloading sb3_contrib-2.6.0-py3-none-any.whl.metadata (4.1 kB)
Collecting stable_baselines3<3.0,>=2.6.0 (from sb3-contrib)
  Downloading stable_baselines3-2.6.0-py3-none-any.whl.metadata (4.8 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)
  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cusparse-cu12==12.3.

In [2]:
import math
import numpy as np
import pandas as pd
from itertools import combinations

from gymnasium import Env
from gymnasium.spaces import Box, Dict

import torch as th
import torch.nn as nn

In [3]:
import pandas as pd

def get_forex_data():
    # Load the dataset
    data_set = pd.read_csv('/kaggle/input/hourly-rates-v2/Hourly_Rates_v2 (1).csv', na_values='ND', parse_dates=['Timestamp'])
    
    # Interpolate missing values to handle missing data
    data_set.interpolate(inplace=True)
    
    # Select only the required columns
    df = data_set[['Timestamp','EURUSD', 'GBPUSD','JPYUSD']].copy()
    
    
    return df
def get_pair_price_from_row(row, pair, base_currency):
    """
    Given a row (a pandas Series) and a pair (tuple of two currencies),
    return the exchange rate defined as:
      - If the second currency equals the base, then the price is assumed to be available
        in the column f"{non_base}{base_currency}".
      - If the first currency equals the base, then the price is 1/(price from f"{other}{base_currency}").
      - Otherwise, compute the cross rate as (price of A in base)/(price of B in base).
    """
    A, B = pair
    if A == B:
        return 1.0
    if B == base_currency:
        return row[f"{A}{base_currency}"]
    elif A == base_currency:
        return 1.0 / row[f"{B}{base_currency}"]
    else:
        return row[f"{A}{base_currency}"] / row[f"{B}{base_currency}"]

In [4]:
import math
import numpy as np
import pandas as pd
from gymnasium import Env
from gymnasium.spaces import Box

class State:
    def __init__(self, bars_count=30, verbose=False):
        self.bars_count = bars_count
        self._prices = None
        self._offset = None
        self.balance = None
        self.trade_max_percentage = None
        self.verbose = verbose
        # These will be set in reset:
        self.portfolio = None        # dict mapping currency -> amount (in native units)
        self.base_currency = None
        self.pairs = None            # list of tuples of currency pairs (sorted)
        self.portfolio_currencies = None  # list of all currencies in portfolio (including base)
        self.leverage = 15  # default leverage ratio

    def reset(self, prices, offset, initial_balance, trade_max_percentage,
              pairs, base_currency, portfolio_currencies):
        # Ensure offset is high enough to allow for bars_count history.
        assert offset >= self.bars_count - 1, "Offset must allow for sufficient historical data"
        self._prices = prices.copy()
        self._offset = offset
        self.balance = initial_balance
        self.trade_max_percentage = trade_max_percentage
        self.base_currency = base_currency
        self.pairs = pairs
        self.portfolio_currencies = portfolio_currencies
        # Initialize portfolio: all funds in base_currency; zero in others.
        self.portfolio = {curr: (initial_balance if curr == base_currency else 0) 
                          for curr in portfolio_currencies}
        self.ammortized_values = {curr: 0 for curr in portfolio_currencies}
    def get_pair_price(self, row, pair):
        return get_pair_price_from_row(row, pair, self.base_currency)

    def encode(self):
        """
        Build an observation dictionary that includes:
          - 'past_prices': a float32 array of shape (bars_count, num_pairs) computed dynamically
          - 'portfolio': a float32 array of portfolio fractions (computed in base currency)
        """
        # Get the historical rows needed.
        historical = self._prices.iloc[self._offset - self.bars_count + 1: self._offset + 1]
        bars = self.bars_count
        num_pairs = len(self.pairs)
        past_prices = np.zeros((bars, num_pairs), dtype=np.float32)
        # Compute price for each pair for every historical row.
        for i, (_, row) in enumerate(historical.iterrows()):
            for j, pair in enumerate(self.pairs):
                past_prices[i, j] = self.get_pair_price(row, pair)
        
        # Compute current portfolio value in base currency using the latest row.
        current_row = historical.iloc[-1]
        total_value = 0.0
        portfolio_values = {}
        for curr, amt in self.portfolio.items():
            if curr == self.base_currency:
                val = amt
            else:
                val = amt / current_row[f"{curr}{self.base_currency}"]
            portfolio_values[curr] = val
            total_value += val
        # Compute portfolio fractions in a fixed (sorted) order.
        sorted_curr = sorted(self.portfolio.keys())
        portfolio_frac = np.array([portfolio_values[c] for c in sorted_curr], dtype=np.float32)
        portfolio_frac = portfolio_frac / (total_value + 1e-8)

        # Add amortized values in sorted order, normalized (e.g., by total value)
        amortized_vals = np.array([self.ammortized_values[c] for c in sorted_curr], dtype=np.float32)
        # amortized_vals = amortized_vals / (total_value + 1e-8)  # normalize
        
        return {"past_prices": past_prices, "portfolio": portfolio_frac, "amortized": amortized_vals}
        
        # return {"past_prices": past_prices, "portfolio": portfolio_frac}

    @property
    def shape(self):
        """
        Returns a dict mapping observation keys to their shapes.
        """
        return {
            "past_prices": (self.bars_count, len(self.pairs)),
            "portfolio": (len(self.portfolio),),
            "amortized": (len(self.ammortized_values),)
        }

    def step(self, action, reward_type="InDirect"):
        """
        Process a trade action vector (one action per pair). For each pair (A,B)
        in self.pairs (sorted lexicographically), interpret a positive action as
        "buy A using B" and a negative action as "sell A for B." Trades are capped
        by a maximum trade amount (based on current balance, leverage, and trade_max_percentage).
        Returns the reward.
        """
        reward = 0
        current_row = self._prices.iloc[self._offset]
        next_row = self._prices.iloc[self._offset + 1]
        num_pairs = len(self.pairs)
        # For each pair, compute a max trade amount (dividing available leverage across pairs)
        per_pair_trade = self.balance * (self.leverage / num_pairs) * self.trade_max_percentage
        # Loop over each pair and perform the trade.
        for i, (A, B) in enumerate(self.pairs):
            # if the action is negative, we swap the currencies
            if action[i] < 0:
                A, B = B, A
            a_base_price = self.get_pair_price(current_row, (A, self.base_currency))
            b_base_price = self.get_pair_price(current_row, (B, self.base_currency))
            # Buy currency A using currency B.
            trade_amount = per_pair_trade * abs(action[i])
            trade_amount = min(trade_amount, trade_amount + (self.portfolio[B]/ b_base_price))
            if trade_amount > 0:
                # You spend trade_amount of B to get (trade_amount/price) of A.
                self.portfolio[A] += trade_amount * a_base_price
                self.portfolio[B] -= trade_amount * b_base_price
                if reward_type =="InDirect":
                    if A == self.base_currency:
                        if self.ammortized_values[B]>0 and self.portfolio[B]>0:
                            reward += math.log(1 / ((self.ammortized_values[B]/self.portfolio[B]) * b_base_price))
                        else:
                            reward = 0
                    self.ammortized_values[A] += trade_amount
                    self.ammortized_values[B] -= trade_amount
                if self.verbose:
                    print(f"{current_row['Timestamp']}: Spent {trade_amount* b_base_price:.2f} {B} to buy {trade_amount* a_base_price:.2f} {A}")
        # Compute new total portfolio value in base currency using next_row prices.
        new_value = 0.0
        for curr, amt in self.portfolio.items():
            if curr == self.base_currency:
                val = amt
            else:
                val = amt / next_row[f"{curr}{self.base_currency}"]
            new_value += val
        if(reward_type == "Direct"):
            # Compute direct reward as the log return.
            if new_value > 0 and self.balance > 0:
                reward = math.log(new_value / self.balance)
            else:
                reward = 0
        self.balance = new_value
        self._offset += 1
        done = self._offset >= len(self._prices) - 2
        info = {"balance": self.balance, "portfolio": self.portfolio}
        return reward, done, info

In [5]:
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor

class LSTMFeatureExtractor(BaseFeaturesExtractor):
    def __init__(self, observation_space: dict, features_dim=128, lstm_hidden_size=64):
        """
        observation_space: expects a Dict with keys:
          - "past_prices": Tensor of shape (bars_count, num_pairs)
          - "portfolio": Tensor of shape (portfolio_dim,)
        features_dim: dimension of the final feature vector.
        lstm_hidden_size: hidden state size for the LSTM processing the price sequence.
        """
        super(LSTMFeatureExtractor, self).__init__(observation_space, features_dim)
        
        # Get shapes from the observation space.
        self.bars_count, self.num_pairs = observation_space.spaces["past_prices"].shape
        self.portfolio_dim = observation_space.spaces["portfolio"].shape[0]

        # LSTM to process the price sequence.
        self.lstm = nn.LSTM(
            input_size=self.num_pairs,
            hidden_size=lstm_hidden_size,
            batch_first=True
        )

        # After computing indicators (RSI & MACD) for each pair, we have 2*num_pairs values.
        self.indicator_dim = 2 * self.num_pairs

        # Fully-connected layer to process portfolio info.
        self.portfolio_fc = nn.Sequential(
            nn.Linear(self.portfolio_dim, 32),
            nn.ReLU()
        )
        
        # Final fully-connected layer to combine all features.
        # The input dimension is: lstm_hidden_size + indicator_dim + 32.
        self.combined_fc = nn.Sequential(
            nn.Linear(lstm_hidden_size + self.indicator_dim + 32, features_dim),
            nn.ReLU()
        )
    def compute_ema(self, prices: th.Tensor, span: int) -> th.Tensor:
        """
        Compute the exponential moving average (EMA) for the given prices.
        prices: Tensor of shape (batch_size, bars_count, num_pairs)
        span: period span for the EMA.
        Returns:
            A tensor of shape (batch_size, num_pairs) containing the final EMA value.
        """
        alpha = 2.0 / (span + 1)
        batch_size, T, num_pairs = prices.shape
        # Initialize with the first time-step.
        ema = prices[:, 0, :]  # shape: (batch_size, num_pairs)
        # Iteratively update the EMA over time.
        for t in range(1, T):
            ema = alpha * prices[:, t, :] + (1 - alpha) * ema
        return ema  # shape: (batch_size, num_pairs)

    def compute_macd(self, prices: th.Tensor) -> th.Tensor:
        """
        Compute the MACD (Moving Average Convergence Divergence) indicator.
        Uses a short-term EMA (span 12) and a long-term EMA (span 26).
        Returns:
            A tensor of shape (batch_size, num_pairs) representing the MACD.
        """
        ema_short = self.compute_ema(prices, span=12)
        ema_long = self.compute_ema(prices, span=26)
        macd = ema_short - ema_long
        return macd

    def compute_rsi(self, prices: th.Tensor, period: int = 14) -> th.Tensor:
        """
        Compute the Relative Strength Index (RSI) using Wilder's smoothing method.
        prices: Tensor of shape (batch_size, bars_count, num_pairs)
        period: period for computing RSI (default: 14)
        Returns:
            A tensor of shape (batch_size, num_pairs) representing the RSI.
        """
        # Compute differences along the time dimension.
        diff = prices[:, 1:, :] - prices[:, :-1, :]  # shape: (batch_size, bars_count-1, num_pairs)
        # Separate gains and losses.
        gain = th.clamp(diff, min=0)
        loss = -th.clamp(diff, max=0)
        batch_size, T_minus_1, num_pairs = gain.shape
        # Adjust period if the sequence is too short.
        period = min(period, T_minus_1)
        # Initialize the average gain and loss using the first 'period' values.
        avg_gain = gain[:, :period, :].mean(dim=1)
        avg_loss = loss[:, :period, :].mean(dim=1)
        # Update the averages using Wilder's smoothing method.
        for t in range(period, T_minus_1):
            current_gain = gain[:, t, :]
            current_loss = loss[:, t, :]
            avg_gain = (avg_gain * (period - 1) + current_gain) / period
            avg_loss = (avg_loss * (period - 1) + current_loss) / period
        rs = avg_gain / (avg_loss + 1e-8)
        rsi = 100 - (100 / (1 + rs))
        return rsi

    def forward(self, observations: dict) -> th.Tensor:
        """
        observations: dict with keys "past_prices" and "portfolio"
          - past_prices: Tensor of shape (batch_size, bars_count, num_pairs)
          - portfolio: Tensor of shape (batch_size, portfolio_dim)
        """
        past_prices = observations["past_prices"]
        # we pass price differences through the LSTM
        price_diffs = past_prices[:, 1:, :] - past_prices[:, :-1, :]  # Shape: (batch_size, bars_count-1, num_pairs)
        portfolio = observations["portfolio"]

        # Process the price sequence through the LSTM.
        _, (h_n, _) = self.lstm(price_diffs)
        lstm_last = h_n.squeeze(0)  # shape: (batch_size, lstm_hidden_size)

        # Compute the technical indicators.
        rsi = self.compute_rsi(past_prices, period=14)   # shape: (batch_size, num_pairs)
        macd = self.compute_macd(past_prices)              # shape: (batch_size, num_pairs)
        # Directly concatenate the raw indicator outputs.
        indicators = th.cat([rsi, macd], dim=1)            # shape: (batch_size, 2*num_pairs)

        # Process portfolio information.
        portfolio_features = self.portfolio_fc(portfolio)

        # Combine LSTM output, raw indicators, and portfolio features.
        combined = th.cat([lstm_last, indicators, portfolio_features], dim=1)
        return self.combined_fc(combined)


2025-05-13 19:00:30.728368: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747162830.959074      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747162831.022724      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [6]:
class ForexTradingEnv(Env):
    def __init__(self, df, currencies=["EUR","GBP"], base_currency="USD", initial_balance=1000,verbose=False,
                 bars_count=30, max_steps=5000):
        """
        df: pandas DataFrame containing price data. It is assumed to have columns like "EURUSD", "GBPUSD", etc.
        currencies: list of non-base currencies (e.g. ["EUR","GBP"]).
        base_currency: the base currency (e.g. "USD").
        """
        super(ForexTradingEnv, self).__init__()
        self.df = df.copy()
        self.initial_balance = initial_balance
        self.bars_count = bars_count
        self.max_steps = max_steps
        self.steps = 0
        self.base_currency = base_currency
        self.currencies = currencies  # non-base currencies

        # The portfolio will include the base and the other currencies.
        self.portfolio_currencies = sorted(list(set([base_currency] + currencies)))

        # Create all unique pairs from portfolio currencies.
        # (For example, if portfolio currencies are ["EUR", "GBP", "USD"],
        # the pairs will be: ("EUR","GBP"), ("EUR","USD"), ("GBP","USD"))
        pairs = []
        for comb in combinations(self.portfolio_currencies, 2):
            pair = tuple(sorted(comb))
            pairs.append(pair)
        pairs = sorted(pairs)
        self.pairs = pairs

        # Define the dynamic action space: one continuous action per pair.
        self.action_space = Box(low=-1, high=1, shape=(len(self.pairs),), dtype=np.float32)

        # Define the observation space as a Dict with two keys.
        self.observation_space = Dict({
            "past_prices": Box(low=0, high=np.inf, shape=(bars_count, len(self.pairs)), dtype=np.float32),
            "portfolio": Box(low=0, high=1, shape=(len(self.portfolio_currencies),), dtype=np.float32)
        })

        self.state = State(bars_count=self.bars_count,verbose=verbose)

    def seed(self, seed):
        np.random.seed(seed)

    def reset(self, seed=None):
        super().reset(seed=seed)
        self.steps = 0
        rng = np.random.default_rng(seed)
        # Choose an offset ensuring at least bars_count historical rows.
        offset = rng.integers(self.bars_count - 1, len(self.df) - 1)
        self.state.reset(prices=self.df, offset=offset,
                         initial_balance=self.initial_balance,
                         trade_max_percentage=0.2,
                         pairs=self.pairs,
                         base_currency=self.base_currency,
                         portfolio_currencies=self.portfolio_currencies)
        return self.state.encode(), {}

    def step(self, action):
        reward, terminated, info = self.state.step(action)
        truncated = self.state._offset >= len(self.df) - 1
        observation = self.state.encode()
        self.steps += 1
        if self.steps >= self.max_steps:
            terminated = True
        return observation, reward, terminated, truncated, info

    def render(self, mode='human'):
        if mode != 'human':
            raise NotImplementedError("Only 'human' rendering mode is supported.")
        print(f"Step: {self.state._offset}")
        print(f"Portfolio: {self.state.portfolio}")
        print(f"Balance: {self.state.balance}")

In [7]:
import os
import math
from sb3_contrib import RecurrentPPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import VecNormalize
import torch
import warnings
import zipfile

warnings.simplefilter(action="ignore", category=FutureWarning)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set up the Recurrent PPO model (with LSTM Policy)
data = get_forex_data()
split_index = int(0.8 * len(data))
train_data = data.iloc[:split_index]
test_data = data.iloc[split_index:]

kwargs = {"df": train_data}
train_envs = make_vec_env(ForexTradingEnv, n_envs=4, env_kwargs=kwargs)
train_envs = VecNormalize(train_envs, norm_obs=True, norm_reward=True, clip_obs=10.0)

# adding custom feature extractor
policy_kwargs = dict(
    features_extractor_class=LSTMFeatureExtractor,
    features_extractor_kwargs=dict(features_dim=256, lstm_hidden_size=128),
    lstm_hidden_size=256,
    n_lstm_layers=1,
    shared_lstm=False,
    enable_critic_lstm=True,
)
# model_load_path = '/kaggle/working/rppo_forex_indirect_v3.zip'
model_save_path = "rppo_forex_indirect_v3_2.zip"  # Updated model path for R-PPO

model_dir = '/kaggle/input/rppo_forex_indirect_v5_fixed/other/default/1'
zip_path = '/kaggle/working/rppo_forex_indirect_v3_3.zip'
log_dir = "rppo_logs/"

def zip_model_directory(directory_path, zip_path):
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, _, files in os.walk(directory_path):
            for file in files:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, start=directory_path)
                zipf.write(file_path, arcname)

zip_model_directory(model_dir, zip_path)
print(f"Zipped model saved to {zip_path}")

# Load the existing model if it exists, otherwise create a new one
if os.path.exists(zip_path):
    print(f"Loading existing model from {zip_path}...")
    model = RecurrentPPO.load(zip_path, env=train_envs, device=device, tensorboard_log=log_dir)
else:
    print("No existing model found. Training from scratch...")
    model = RecurrentPPO(
        "MlpLstmPolicy", 
        train_envs, 
        tensorboard_log=log_dir,
        verbose=1, 
        device=device, 
        policy_kwargs=policy_kwargs,
        n_steps=256) 

# Train the model
# try:
#     model.learn(total_timesteps=3000000) #10000000 
# except KeyboardInterrupt:
#     model.save(model_save_path)
#     print(f"Training interrupted. Model saved to {model_save_path}")

# Save the model after training
model.save(model_save_path)
print(f"Model saved to {model_save_path}")


Zipped model saved to /kaggle/working/rppo_forex_indirect_v3_3.zip
Loading existing model from /kaggle/working/rppo_forex_indirect_v3_3.zip...
Model saved to rppo_forex_indirect_v3_2.zip


In [8]:
# def calculate_performance_metrics(balance_history, trading_days=252):
#     balance = np.array(balance_history)
#     # Avoid division by zero or empty arrays
#     if len(balance) < 2:
#         return {}
#     returns = np.diff(balance) / balance[:-1]
#     total_return = balance[-1] / balance[0] - 1
    
#     # Annualized return using the formula: (final/initial)^(trading_days/num_periods) - 1
#     annualized_return = (balance[-1] / balance[0]) ** (trading_days / len(returns)) - 1
#     # Daily volatility (standard deviation of returns)
#     daily_vol = np.std(returns)
#     # Sharpe ratio (assuming risk-free rate = 0)
#     sharpe_ratio = np.mean(returns) / daily_vol * np.sqrt(trading_days) if daily_vol != 0 else np.nan
    
#     # Maximum drawdown: max loss from a peak to a trough
#     cumulative_max = np.maximum.accumulate(balance)
#     drawdowns = (balance - cumulative_max) / cumulative_max
#     max_drawdown = np.min(drawdowns)
    
#     # Sortino ratio: similar to Sharpe but considers only downside risk
#     negative_returns = returns[returns < 0]
#     downside_std = np.std(negative_returns) if negative_returns.size > 0 else 0
#     sortino_ratio = np.mean(returns) / downside_std * np.sqrt(trading_days) if downside_std != 0 else np.nan
#     return {
#         "Total Return": total_return,
#         "Annualized Return": annualized_return,
#         "Daily Volatility": daily_vol,
#         "Sharpe Ratio": sharpe_ratio,
#         "Max Drawdown": max_drawdown,
#         "Sortino Ratio": sortino_ratio,
#     }

# import numpy as np

def calculate_performance_metrics(balance_history, trading_hours_per_year=8760):
    balance = np.array(balance_history)
    
    # Avoid division by zero or empty arrays
    if len(balance) < 2:
        return {}
    
    # Hourly returns
    returns = np.diff(balance) / balance[:-1]
    total_return = balance[-1] / balance[0] - 1

    # Annualized return based on hourly compounding
    annualized_return = (balance[-1] / balance[0]) ** (trading_hours_per_year / len(returns)) - 1
    
    # Hourly volatility (standard deviation of returns)
    hourly_vol = np.std(returns)
    
    # Sharpe ratio (assuming risk-free rate = 0)
    sharpe_ratio = np.mean(returns) / hourly_vol * np.sqrt(trading_hours_per_year) if hourly_vol != 0 else np.nan
    
    # Maximum drawdown
    cumulative_max = np.maximum.accumulate(balance)
    drawdowns = (balance - cumulative_max) / cumulative_max
    max_drawdown = np.min(drawdowns)
    
    # Sortino ratio (using only downside risk)
    negative_returns = returns[returns < 0]
    downside_std = np.std(negative_returns) if negative_returns.size > 0 else 0
    sortino_ratio = np.mean(returns) / downside_std * np.sqrt(trading_hours_per_year) if downside_std != 0 else np.nan
    
    return {
        "Total Return": total_return,
        "Annualized Return": annualized_return,
        "Daily Volatility": hourly_vol,
        "Sharpe Ratio": sharpe_ratio,
        "Max Drawdown": max_drawdown,
        "Sortino Ratio": sortino_ratio,
    }


In [9]:
import numpy as np
from stable_baselines3.common.vec_env import VecNormalize
from stable_baselines3.common.env_util import make_vec_env
import plotly.graph_objects as go

# Storage for all accepted performance metrics
all_metrics = []
required_envs = 1000
batch_size = 50
min_steps = 200

while len(all_metrics) < required_envs:
    # Step 1: Create environments
    test_kwargs = {
        "df": data,
        "max_steps": 5000,
        "verbose": False
    }
    test_envs = make_vec_env(ForexTradingEnv, n_envs=batch_size, env_kwargs=test_kwargs)
    test_envs = VecNormalize(test_envs, norm_obs=True, norm_reward=True, clip_obs=10.0)
    obs = test_envs.reset()

    # Step 2: Setup state and history
    lstm_states = None
    episode_starts = np.ones((test_envs.num_envs,), dtype=bool)
    portfolio_history = [{"Balance": []} for _ in range(test_envs.num_envs)]

    # Step 3: Run environments until one terminates
    while True:
        action, lstm_states = model.predict(
            obs, state=lstm_states, episode_start=episode_starts, deterministic=True
        )
        obs, rewards, dones, infos = test_envs.step(action)

        for i, info in enumerate(infos):
            portfolio_history[i]["Balance"].append(info["balance"])

        episode_starts = dones
        if dones.any():
            break

    # Step 4: Validate step count
    steps_per_env = [len(ph["Balance"]) for ph in portfolio_history]
    if min(steps_per_env) < min_steps:
        print("Batch rejected due to short run.")
        continue

    print(f"Batch accepted: {len(all_metrics)} + {batch_size} envs")

    # Step 5: Compute and store metrics
    for i in range(batch_size):
        metrics = calculate_performance_metrics(portfolio_history[i]["Balance"])
        all_metrics.append(metrics)

# Step 6: Compute percentile statistics
def percentile_summary(metrics_list, metric_key, percentiles=[25, 50, 75, 90]):
    values = [m[metric_key] for m in metrics_list]
    return np.percentile(values, percentiles).tolist()

metric_keys = [
    "Total Return", "Annualized Return", "Daily Volatility",
    "Sharpe Ratio", "Max Drawdown", "Sortino Ratio"
]

table_data = {
    "Metric": [],
    "25th Percentile": [],
    "50th Percentile (Median)": [],
    "75th Percentile": [],
    "90th Percentile": []
}

for key in metric_keys:
    ps = percentile_summary(all_metrics, key)
    table_data["Metric"].append(key)
    table_data["25th Percentile"].append(f"{ps[0]*100:.2f}%" if "Return" in key or "Drawdown" in key else f"{ps[0]:.2f}")
    table_data["50th Percentile (Median)"].append(f"{ps[1]*100:.2f}%" if "Return" in key or "Drawdown" in key else f"{ps[1]:.2f}")
    table_data["75th Percentile"].append(f"{ps[2]*100:.2f}%" if "Return" in key or "Drawdown" in key else f"{ps[2]:.2f}")
    table_data["90th Percentile"].append(f"{ps[3]*100:.2f}%" if "Return" in key or "Drawdown" in key else f"{ps[3]:.2f}")

# Step 7: Plot percentile summary as a table
summary_table = go.Figure(data=[go.Table(
    header=dict(
        values=list(table_data.keys()),
        fill_color='paleturquoise',
        align='left'
    ),
    cells=dict(
        values=list(table_data.values()),
        fill_color='lavender',
        align='left'
    )
)])
summary_table.update_layout(
    title = f"Performance Metrics Percentile Summary (n={required_envs})",
    margin=dict(l=20, r=20, t=50, b=20)
)
summary_table.show()


Batch accepted: 0 + 50 envs
Batch accepted: 50 + 50 envs
Batch accepted: 100 + 50 envs
Batch rejected due to short run.
Batch accepted: 150 + 50 envs
Batch accepted: 200 + 50 envs
Batch accepted: 250 + 50 envs
Batch accepted: 300 + 50 envs
Batch accepted: 350 + 50 envs
Batch accepted: 400 + 50 envs
Batch accepted: 450 + 50 envs
Batch rejected due to short run.
Batch accepted: 500 + 50 envs
Batch accepted: 550 + 50 envs
Batch accepted: 600 + 50 envs
Batch accepted: 650 + 50 envs
Batch rejected due to short run.
Batch accepted: 700 + 50 envs
Batch accepted: 750 + 50 envs
Batch accepted: 800 + 50 envs
Batch accepted: 850 + 50 envs
Batch accepted: 900 + 50 envs
Batch accepted: 950 + 50 envs



the imp module is deprecated in favour of importlib and slated for removal in Python 3.12; see the module's documentation for alternative uses



In [10]:
# test_kwargs = {
#     "df": data,
#     "max_steps": 5000,
#     "verbose": False
# }

# # Create the vectorized environment using the dynamic kwargs.
# test_envs = make_vec_env(ForexTradingEnv, n_envs=50, env_kwargs=test_kwargs)
# test_envs = VecNormalize(test_envs, norm_obs=True, norm_reward=True, clip_obs=10.0)
# obs = test_envs.reset()

# # Dynamically determine the portfolio keys from the first environment.
# env0 = test_envs.envs[0].unwrapped
# portfolio_keys = sorted(list(env0.state.portfolio.keys()))
# # Optionally, add a key for overall balance.
# portfolio_keys.append("Balance")

# # Initialize portfolio history for each environment.
# portfolio_history = [{key: [] for key in portfolio_keys} for _ in range(test_envs.num_envs)]

# # Initialize LSTM states and episode start signals.
# lstm_states = None
# episode_starts = np.ones((test_envs.num_envs,), dtype=bool)

# # Main evaluation loop.
# while True:
#     # Get the predicted action from the model.
#     action, lstm_states = model.predict(
#         obs, state=lstm_states, episode_start=episode_starts, deterministic=True
#     )

#     # Step the environment.
#     obs, rewards, dones, infos = test_envs.step(action)

#     # Record portfolio values for each environment dynamically.
#     for i, info in enumerate(infos):
#         portfolio = info["portfolio"]
#         for key in portfolio_history[i].keys():
#             if key == "Balance":
#                 portfolio_history[i][key].append(info["balance"])
#             else:
#                 portfolio_history[i][key].append(portfolio.get(key, 0))

#     # Update episode start signals (reset LSTM states for new episodes).
#     episode_starts = dones

#     # Break out when any episode is done.
#     if dones.any():
#         break

# print("Evaluation complete!")


In [11]:
# import plotly.graph_objects as go
# import numpy as np

# # List to collect summary metrics for each environment
# summary_metrics = []

# # Loop over environments and plot individual figures with metrics annotations
# for i in range(test_envs.num_envs):
#     balance_history = portfolio_history[i]["Balance"]
#     metrics = calculate_performance_metrics(balance_history)
    
#     # Add environment identifier to the metrics dictionary
#     metrics['Environment'] = f"Env {i + 1}"
#     summary_metrics.append(metrics)
# table_headers = [
#     "Environment", "Total Return", "Annualized Return", 
#     "Daily Volatility", "Sharpe Ratio", "Max Drawdown", 
#     "Sortino Ratio"
# ]

# # Prepare lists for each column
# envs = []
# total_returns = []
# annualized_returns = []
# daily_vols = []
# sharpe_ratios = []
# max_drawdowns = []
# sortino_ratios = []


# for met in summary_metrics:
#     envs.append(met.get("Environment", ""))
#     total_returns.append(met.get("Total Return", 0))
#     annualized_returns.append(met.get("Annualized Return", 0))
#     daily_vols.append(met.get("Daily Volatility", 0))
#     sharpe_ratios.append(met.get("Sharpe Ratio", 0))
#     max_drawdowns.append(met.get("Max Drawdown", 0))
#     sortino_ratios.append(met.get("Sortino Ratio", 0))

# avg_total_return = np.mean(total_returns)
# avg_annualized_return = np.mean(annualized_returns)
# avg_daily_vol = np.mean(daily_vols)
# avg_sharpe_ratio = np.mean(sharpe_ratios)
# avg_max_drawdown = np.mean(max_drawdowns)
# avg_sortino_ratio = np.mean(sortino_ratios)

# # Format values as strings with correct number formatting
# total_returns = [f"{x*100:.2f}%" for x in total_returns] + [f"{avg_total_return*100:.2f}%"]
# annualized_returns = [f"{x*100:.2f}%" for x in annualized_returns] + [f"{avg_annualized_return*100:.2f}%"]
# daily_vols = [f"{x*100:.2f}%" for x in daily_vols] + [f"{avg_daily_vol*100:.2f}%"]
# sharpe_ratios = [f"{x:.2f}" for x in sharpe_ratios] + [f"{avg_sharpe_ratio:.2f}"]
# max_drawdowns = [f"{x*100:.2f}%" for x in max_drawdowns] + [f"{avg_max_drawdown*100:.2f}%"]
# sortino_ratios = [f"{x:.2f}" for x in sortino_ratios] + [f"{avg_sortino_ratio:.2f}"]

# # Append "Average" row at the end
# envs.append("Average")

# # Create a Plotly Table for summary statistics
# summary_table = go.Figure(data=[go.Table(
#     header=dict(
#         values=table_headers,
#         fill_color='paleturquoise',
#         align='left',
#         font=dict(size=12)
#     ),
#     cells=dict(
#         values=[
#             envs, total_returns, annualized_returns, 
#             daily_vols, sharpe_ratios, max_drawdowns, sortino_ratios
#         ],
#         fill_color='lavender',
#         align='left',
#         font=dict(size=11)
#     ))
# ])

# summary_table.update_layout(
#     title="Summary Performance Metrics Across Environments",
#     margin=dict(l=20, r=20, t=50, b=20)
# )

# summary_table.show()

# bal = go.Figure()
# for i in range(test_envs.num_envs):
#     bal.add_trace(go.Scatter(y=portfolio_history[i]["Balance"], mode="lines", name=f"Env {i}",))
# bal.update_layout(
#         title=f"Blance Progression",
#         xaxis_title="Steps",
#         yaxis_title="Portfolio Value",
#         hovermode="x unified",
#     )
# bal.show()
# for i in range(test_envs.num_envs):
#     fig = go.Figure()
#     for key in portfolio_history[i].keys():
#         if key == 'Environment':
#             continue
#         fig.add_trace(go.Scatter(y=portfolio_history[i][key], mode="lines", name=key))
#     # Set legend position to the right
#     fig.update_layout(legend=dict(x=1.05, y=1))

#     # Create annotations for performance metrics, placed on the right, below the legend
#     annotations = []
#     y_pos = 0.7  # Starting y-position for metrics annotations
#     for key, value in summary_metrics[i].items():
#         if key == "Environment":
#             continue  # Skip environment name in annotations

#         # Format as percentage if key contains 'Return' or 'Drawdown'
#         text = f"{key}: {value:.2%}" if "Return" in key or "Drawdown" in key else f"{key}: {value:.2f}"
#         annotations.append(dict(
#             xref="paper", yref="paper",
#             x=1.0, y=y_pos,  # Position to the right of the plot
#             xanchor="left", yanchor="top",
#             text=text,
#             showarrow=False,
#             font=dict(size=10),
#             align="left"
#         ))
#         y_pos -= 0.05  # Adjust vertical spacing for next metric

#     fig.update_layout(
#         title=f"Portfolio Progression in Environment {i + 1}",
#         xaxis_title="Steps",
#         yaxis_title="Portfolio Value",
#         hovermode="x unified",
#         annotations=annotations
#     )

#     fig.show()

