# Toy version

In [1]:
import numpy as np
import pandas as pd

import torch
import torch.optim as optim
from torch.nn import functional as F


from scripts.Agent import iRDPGAgent
from scripts.PRBuffer import Episode, PRBuffer, generate_demonstration_episodes, collect_episode
from scripts.Env import POMDPTEnv, dt_policy, intraday_greedy_actions

from tqdm import trange, tqdm

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
df = pd.read_csv('data/INTC_1Min_2024-02-01_2025-02-01.csv')
df.set_index('timestamp', inplace=True)
df = df.loc['2024-02-01':'2024-05-01']
df.to_csv('data/train_three_months.csv')

In [4]:
df = pd.read_csv('data/train_three_months.csv')

In [5]:
env = POMDPTEnv(df, window_size=1) 

In [6]:
len(intraday_greedy_actions(env))

1955


24242

In [8]:
env.current_step

np.int64(12121)

In [5]:
def load_model(agent, filename="trained_irdpg.pth"):
    checkpoint = torch.load(filename, map_location=agent.device)

    agent.actor_gru.load_state_dict(checkpoint["actor_gru"])
    agent.actor_fc.load_state_dict(checkpoint["actor_fc"])
    agent.critic_gru.load_state_dict(checkpoint["critic_gru"])
    agent.critic_fc.load_state_dict(checkpoint["critic_fc"])
    agent.target_actor.load_state_dict(checkpoint["target_actor_gru"])
    agent.target_actor_fc.load_state_dict(checkpoint["target_actor_fc"])
    agent.target_critic.load_state_dict(checkpoint["target_critic_gru"])
    agent.target_critic_fc.load_state_dict(checkpoint["target_critic_fc"])

    return agent

In [9]:
env = POMDPTEnv(df, window_size=1) 
agent = iRDPGAgent(obs_dim=env.observation_space.shape[0], device=device)
agent = load_model(agent, filename="models/01/trained_irdpg_500.pth")

TypeError: '>=' not supported between instances of 'int' and 'datetime.date'

In [8]:
import matplotlib.pyplot as plt

In [9]:
def evaluate(env, agent, model_path, n_episodes=10):
    # Load trained model
    agent = load_model(agent, model_path)
    agent.eval()
    
    metrics = {
        "total_returns": [],
        "sharpe_ratios": [],
        "volatilities": [],
        "max_drawdowns": []
    }
    
    for _ in range(n_episodes):
        episode_returns = []
        episode_values = []
        obs = env.reset()
        h_actor = None
        done = False
        peak_value = env.initial_balance
        max_drawdown = 0

        while not done:
            with torch.no_grad():
                action, h_actor = agent.act(obs, h_actor, add_noise=False)
            
            obs, reward, done, _ = env.step(action)
            
            # Track metrics
            current_value = env.balance
            episode_returns.append(reward)
            episode_values.append(current_value)
            
            # Calculate drawdown
            peak_value = max(peak_value, current_value)
            drawdown = (peak_value - current_value) / peak_value
            max_drawdown = max(max_drawdown, drawdown)

        # Episode metrics
        returns = np.array(episode_returns)
        metrics["total_returns"].append(env.cumulative_profit / env.initial_balance)
        metrics["sharpe_ratios"].append(returns.mean() / (returns.std() + 1e-9))
        metrics["volatilities"].append(returns.std())
        metrics["max_drawdowns"].append(max_drawdown)
    
    # Aggregate results
    print(f"Evaluation over {n_episodes} episodes:")
    print(f"Average Total Return: {np.mean(metrics['total_returns']):.2%}")
    print(f"Average Sharpe Ratio: {np.mean(metrics['sharpe_ratios']):.2f}")
    print(f"Average Volatility: {np.mean(metrics['volatilities']):.4f}")
    print(f"Average Max Drawdown: {np.mean(metrics['max_drawdowns']):.2%}")
    
    # Plot cumulative returns
    plt.figure(figsize=(10, 6))
    for i in range(min(3, n_episodes)):  # Plot first 3 episodes
        plt.plot(np.cumsum(episode_returns[i]), label=f"Episode {i+1}")
    plt.xlabel("Time Step")
    plt.ylabel("Cumulative Return")
    plt.title("Evaluation: Cumulative Returns")
    plt.legend()
    plt.savefig("evaluation_returns.png")
    plt.close()
    
    return metrics

evaluate(env, agent, 'models/trained_irdpg_500.pth', n_episodes=3)

Evaluation over 3 episodes:
Average Total Return: -5.32%
Average Sharpe Ratio: 0.03
Average Volatility: 239.9122
Average Max Drawdown: 2.66%


{'total_returns': [np.float64(-0.026580847810374467),
  np.float64(-0.053161695620749495),
  np.float64(-0.07974254343112411)],
 'sharpe_ratios': [np.float64(0.026412928732093707),
  np.float64(0.026412928732093707),
  np.float64(0.026412928732093707)],
 'volatilities': [np.float64(239.91218113917725),
  np.float64(239.91218113917725),
  np.float64(239.91218113917725)],
 'max_drawdowns': [np.float64(0.026580847810348206),
  np.float64(0.026580847810348206),
  np.float64(0.026580847810348206)]}

In [10]:
import numpy as np
import matplotlib.pyplot as plt
import torch

def evaluate(agent, env, num_episodes=10):
    """
    Evaluate the agent's performance over multiple episodes.
    
    Args:
        agent (iRDPGAgent): Trained agent
        env (TradingEnv): Trading environment
        num_episodes (int): Number of evaluation episodes
        
    Returns:
        dict: Aggregated evaluation metrics
    """
    agent.eval()
    metrics = {
        'cumulative_returns': [],
        'annualized_sharpe': [],
        'max_drawdowns': [],
        'win_rate': [],
        'profit_factor': [],
        'num_trades': [],
        'action_distribution': [],
        'portfolio_values': []
    }

    for _ in range(num_episodes):
        obs = env.reset()
        h_actor = None
        done = False
        
        # Episode tracking
        portfolio = [env.initial_balance]
        positions = []
        actions = []
        trade_returns = []
        current_balance = env.initial_balance
        prev_position = env.position
        num_trades = 0

        while not done:
            with torch.no_grad():
                action, h_actor = agent.act(obs, h_actor, add_noise=False)
            
            # Store action and position
            actions.append(action.cpu().numpy())
            positions.append(env.position)
            
            # Execute step
            next_obs, reward, done, _ = env.step(action)
            
            # Calculate returns
            new_balance = env.balance
            trade_return = new_balance - current_balance
            trade_returns.append(trade_return)
            portfolio.append(new_balance)
            
            # Track position changes
            if env.position != prev_position:
                num_trades += 1
            prev_position = env.position
            
            current_balance = new_balance
            obs = next_obs

        # Calculate metrics
        returns = np.array(trade_returns)
        portfolio = np.array(portfolio)
        
        # Cumulative return
        cumulative_return = (portfolio[-1] / portfolio[0] - 1) * 100
        
        # Sharpe ratio (annualized)
        if len(returns) > 1:
            sharpe = (np.mean(returns) / np.std(returns)) * np.sqrt(252)
        else:
            sharpe = 0.0
            
        # Max drawdown
        peak = np.maximum.accumulate(portfolio)
        drawdown = (peak - portfolio) / peak
        max_drawdown = np.max(drawdown) * 100 if len(drawdown) > 0 else 0.0
        
        # Win rate and profit factor
        wins = returns[returns > 0]
        losses = returns[returns < 0]
        win_rate = len(wins) / len(returns) * 100 if len(returns) > 0 else 0.0
        profit_factor = np.sum(wins) / np.abs(np.sum(losses)) if len(losses) > 0 else np.inf
        
        # Store metrics
        metrics['cumulative_returns'].append(cumulative_return)
        metrics['annualized_sharpe'].append(sharpe)
        metrics['max_drawdowns'].append(max_drawdown)
        metrics['win_rate'].append(win_rate)
        metrics['profit_factor'].append(profit_factor)
        metrics['num_trades'].append(num_trades)
        metrics['action_distribution'].append(np.mean(actions, axis=0))
        metrics['portfolio_values'].append(portfolio)

    # Generate report
    report = {
        'cumulative_return (%)': f"{np.mean(metrics['cumulative_returns']):.2f} ± {np.std(metrics['cumulative_returns']):.2f}",
        'sharpe_ratio': f"{np.nanmean(metrics['annualized_sharpe']):.2f} ± {np.nanstd(metrics['annualized_sharpe']):.2f}",
        'max_drawdown (%)': f"{np.mean(metrics['max_drawdowns']):.2f} ± {np.std(metrics['max_drawdowns']):.2f}",
        'win_rate (%)': f"{np.mean(metrics['win_rate']):.2f} ± {np.std(metrics['win_rate']):.2f}",
        'profit_factor': f"{np.mean([p for p in metrics['profit_factor'] if p != np.inf]):.2f}",
        'avg_trades_per_episode': f"{np.mean(metrics['num_trades']):.1f} ± {np.std(metrics['num_trades']):.1f}",
    }

    # Plotting
    plot_equity_curves(metrics['portfolio_values'])
    plot_action_distribution(metrics['action_distribution'])

    return report

def plot_equity_curves(portfolio_values, num_curves=5):
    """Plot first few equity curves"""
    plt.figure(figsize=(12, 6))
    for i, pv in enumerate(portfolio_values[:num_curves]):
        plt.plot(pv / pv[0], label=f'Episode {i+1}')
    plt.title('Normalized Equity Curves')
    plt.xlabel('Time Step')
    plt.ylabel('Portfolio Value (Multiple of Initial)')
    plt.legend()
    plt.grid(True)
    plt.savefig('equity_curves.png')
    plt.close()

def plot_action_distribution(action_distributions):
    """Plot average action probabilities"""
    avg_actions = np.mean(action_distributions, axis=0)
    plt.figure(figsize=(8, 4))
    plt.bar(['Long', 'Short'], avg_actions)
    plt.title('Average Action Distribution')
    plt.ylabel('Probability')
    plt.ylim(0, 1)
    plt.savefig('action_distribution.png')
    plt.close()

In [13]:
agent = load_model(agent, filename="models/01/trained_irdpg_500.pth")

In [14]:
evaluate(agent, env, num_episodes=10)

{'cumulative_return (%)': '2.65 ± 0.01',
 'sharpe_ratio': '115.90 ± 16.05',
 'max_drawdown (%)': '0.00 ± 0.00',
 'win_rate (%)': '99.75 ± 0.16',
 'profit_factor': '623.75',
 'avg_trades_per_episode': '20.2 ± 21.0'}