In [2]:
import sys
sys.path.append("..")

import numpy as np
import pandas as pd

# Helper classes for data processing, state representation
from reinforcetrader.data_pipeline import RawDataLoader, FeatureBuilder
from reinforcetrader.state import EpisodeStateLoader
from reinforcetrader.dqn_agent import DRLAgent

# Import other uttilities
from reinforcetrader.utils.backtest_engine import BackTester

# Helper method for displaying large dataframes
from IPython.display import HTML
pd.set_option("display.max_rows", None)
from IPython.display import HTML, display

def display_df(df, rows=10):
    # Puts the scrollbar next to the DataFrame
    display(HTML("<div style='height: 300px; overflow: auto; width: 98%'>" + df.head(rows).to_html() + "</div>"))

# Test DQN Agent Performance on OOS Data
To run the backtest, we must first set up a few things: replicate the WFV state loader, and load the model and its configuration.

## Get WFV State Loader

In [3]:
# Get DJI raw data using Yahoo Finance API
data_loader = RawDataLoader(start_date='1999-08-01', end_date='2025-09-08', index='DJI', verbose=False)

# Load the data from cache
ticker_data, benchmark_data = data_loader.get_hist_prices()

# Build features for state representation, reward computation, etc.
feature_builder = FeatureBuilder(ticker_data=ticker_data, benchmark_data=benchmark_data, f_prefix='DJI')
feature_builder.build_features()

# Store the computed features and the feature index ranges (used later)
features_data = feature_builder.get_features()
feature_indices = feature_builder.get_feature_indices()

Building ticker features: 100%|█████████████████████████████████████| 28/28 [00:02<00:00, 10.68it/s]


File already exists, skipping save: ../data/processed/DJI_tickers_features_2000-04-07_2025-09-04.csv


In [4]:
# Define the Walk-Forward Validation configuration
WFV_config = {
    'mode': 'expanding', # supports 'expanding' and 'moving'
    'train_start': '2000-04-07',
    'train_end': '2019-09-30',
    'test_start': '2019-10-01',
    'test_end': '2025-07-31',
    'train_window_size': 1200,
    'val_window_size': 630
}

In [5]:
# Initialize the episode state loader
state_loader = EpisodeStateLoader(features_data, feature_indices, WFV_config)

## Load the Trained Agent

In [6]:
# Create RL agent and testing configuration
# NOTE: Many of the params are NOT used in testing, but are used to define the agent. Thus, defining them is optional
agent_config = {
    'num_features': len(feature_indices['State']),
    'learning_rate': 1e-3,
    'dropout_p': 0.1,
    'memory_buffer_len': 200000,
    'state_matrix_window': 60,
    'epsilon_start': 1.0,
    'epsilon_min': 0.05,
    'epsilon_boost_factor': 0.3,
    'decay_updates': 50000,
    'discount_factor': 0.95,
    'batch_size': 256,
    'replay_start_size': 5000,
    'train_interval': 1,
}

test_config = {
    'plots_dir': '../plots/',
    'logs_dir': '../logs/',
    'outputs_dir': '../data/test/'
}

# Set the trained model path here
MODEL_PATH = '../model_checkpoints/DDQN_DSR_AE1.keras'

# Load DRL Agent
agent = DRLAgent(agent_config=agent_config, reward_type='DSR', model_path=MODEL_PATH)

I0000 00:00:1766375911.895471     484 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3584 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


Loading model from ../model_checkpoints/DDQN_DSR_AE1.keras


## Test Agent Performance From 2020-2025

In [None]:
# Run test and get the signals and prices for the stock
# episode_id = 0 as there is only one "episode" in the test
signals, prices = agent.test(state_loader, 0, test_config)

Testing episode 0:   7%|███▏                                         | 2/28 [01:19<16:26, 37.96s/it]

In [None]:
# Inspect the signals dataframe. It should include the predicted action and q-value (logit value)
display_df(signals)

In [None]:
# Run the backtester to measure agent performance and compare to baselines
backtester = BackTester(signals, prices)
backtester.run_backtest()

## Agent behavior: Per Ticker Analysis