In [1]:
%matplotlib inline
from vnpy.factor.backtesting.backtesting import BacktestEngine
from vnpy.factor.setting import get_backtest_data_cache_path, get_backtest_report_path


backtest_engine = BacktestEngine(
    factor_module_name="vnpy.factor.factors",
    output_data_dir_for_analyser_reports=get_backtest_report_path(),
    output_data_dir_for_calculator_cache=get_backtest_data_cache_path()
)

[vnpy.trader.setting] Updated SETTINGS from vt_setting.json
[32m2025-06-04 14:48:31.636[0m | [1mINFO[0m | [36mFactorMakerBacktestOrchestrator[0m | [1m[FactorMakerBacktestOrchestrator] Successfully imported factor module: 'vnpy.factor.factors'[0m
[32m2025-06-04 14:48:31.636[0m | [1mINFO[0m | [36mFactorMakerBacktestOrchestrator[0m | [1m[FactorMakerBacktestOrchestrator] FactorMakerBacktestOrchestrator initialized.[0m


In [2]:
vt_symbols = ['btcusdt.BINANCE', 'ethusdt.BINANCE', 'xrpusdt.BINANCE']

In [3]:
backtest_engine.output_data_dir_for_calculator_cache

PosixPath('/Users/chenzhao/Documents/crypto_vnpy/vnpy/tests/.vnpy/factor/cache/backtest_factor_data_cache')

In [4]:
from vnpy.factor.setting import FACTOR_DEFINITIONS_FILEPATH
from vnpy.factor.utils.factor_utils import load_factor_setting


factor_definations = load_factor_setting(FACTOR_DEFINITIONS_FILEPATH)

In [5]:
macd_factor_defination = factor_definations[2]
macd_factor_defination

{'class_name': 'MACDFactor',
 'factor_name': 'MACDFactor',
 'factor_key': 'factor_1m_macdfactor@fast_period_12-signal_period_9-slow_period_26',
 'freq': '1m',
 'params': {'fast_period': 12, 'slow_period': 26, 'signal_period': 9},
 'dependencies_factor': [{'class_name': 'EMAFactor',
   'factor_name': 'fast_ema',
   'factor_key': 'factor_1m_emafactor@period_12',
   'freq': '1m',
   'params': {'period': 12},
   'dependencies_factor': [],
   'dependencies_freq': [],
   'dependencies_symbol': [],
   'dependencies_exchange': [],
   'last_run_datetime': '2025-06-02 11:17:40',
   'factor_mode': 'LIVE'},
  {'class_name': 'EMAFactor',
   'factor_name': 'slow_ema',
   'factor_key': 'factor_1m_emafactor@period_26',
   'freq': '1m',
   'params': {'period': 26},
   'dependencies_factor': [],
   'dependencies_freq': [],
   'dependencies_symbol': [],
   'dependencies_exchange': [],
   'last_run_datetime': '2025-06-02 11:17:40',
   'factor_mode': 'LIVE'}],
 'dependencies_freq': [],
 'dependencies_symbo

In [6]:
import importlib

from vnpy.factor.utils.factor_utils import init_factors


factor_module = importlib.import_module("vnpy.factor.factors")
macd_factor = init_factors(
    module_for_primary_classes=factor_module,
    settings_data=[macd_factor_defination],
    dependencies_module_lookup_for_instances=factor_module
)[0]

In [7]:
macd_factor.get_nested_params_for_optimizer()

{'fast_period': 12,
 'slow_period': 26,
 'signal_period': 9,
 'fast_ema.period': 12,
 'slow_ema.period': 26}

In [8]:
target_factor_instance, flattened_factors = backtest_engine._init_and_flatten_factor(macd_factor, vt_symbols)

[32m2025-06-04 14:48:31.681[0m | [1mINFO[0m | [36mFactorMakerBacktestOrchestrator[0m | [1m[FactorMakerBacktestOrchestrator] Initializing and flattening factor based on definition. Symbols: ['btcusdt.BINANCE', 'ethusdt.BINANCE', 'xrpusdt.BINANCE'][0m
[32m2025-06-04 14:48:31.682[0m | [1mINFO[0m | [36mFactorMakerBacktestOrchestrator[0m | [1m[FactorMakerBacktestOrchestrator] Target factor instance created: factor_1m_macdfactor@fast_period_12-signal_period_9-slow_period_26[0m
[32m2025-06-04 14:48:31.683[0m | [34m[1mDEBUG[0m | [36mFactorMakerBacktestOrchestrator[0m | [34m[1m[FactorMakerBacktestOrchestrator] Factor tree flattened. Total factors in graph: 3[0m


In [9]:
calculator = backtest_engine._create_calculator()

[32m2025-06-04 14:48:31.687[0m | [1mINFO[0m | [36mFactorMakerFactorCalculator[0m | [1m[FactorMakerFactorCalculator] FactorCalculator initialized. Factor cache dir: /Users/chenzhao/Documents/crypto_vnpy/vnpy/tests/.vnpy/factor/cache/backtest_factor_data_cache[0m
[32m2025-06-04 14:48:31.688[0m | [1mINFO[0m | [36mFactorMakerFactorCalculator[0m | [1m[FactorMakerFactorCalculator] Factor cache directory ensured at: /Users/chenzhao/Documents/crypto_vnpy/vnpy/tests/.vnpy/factor/cache/backtest_factor_data_cache[0m


In [10]:
from datetime import datetime

import numpy as np
import polars as pl
from typing import List, Dict

# Schema for the intermediate flat DataFrame
_OHLCV_FLAT_SCHEMA = {
    "datetime": pl.Datetime(time_unit="us"),
    "symbol": pl.Utf8,
    "open": pl.Float64,
    "high": pl.Float64,
    "low": pl.Float64,
    "close": pl.Float64,
    "volume": pl.Float64,
}


def generate_fake_ohlcv_wide_dict(
    start_date: datetime,
    end_date: datetime,
    interval: str,
    vt_symbols: List[str]
) -> Dict[str, pl.DataFrame]:
    """
    Generates a dictionary of Polars DataFrames with fake OHLCV data.
    Each key in the dictionary is an OHLCV type ("open", "high", "low", "close", "volume").
    Each DataFrame has a "datetime" column and columns for each symbol in vt_symbols,
    containing the respective OHLCV values.

    Args:
        start_date: The start datetime for the data generation.
        end_date: The end datetime for the data generation.
        interval: Polars interval string for fake data generation (e.g., "1m", "1h").
        vt_symbols: List of symbol strings (e.g., ["BTCUSDT", "ETHUSDT"]).

    Returns:
        Dict[str, pl.DataFrame]: A dictionary where keys are OHLCV types and
                                 values are "wide" DataFrames. Returns an empty
                                 dictionary if no data can be generated.
    """
    if not vt_symbols:
        return {}

    # --- Inlined logic from _generate_fake_ohlcv_flat_df ---
    flat_df: pl.DataFrame
    try:
        fake_dates = pl.datetime_range(
            start=start_date,
            end=end_date,
            interval=interval,
            time_unit="us",  # Assuming interval is in minutes
            eager=True,
            time_zone="UTC"
        )
    except Exception: # Handle potential errors in datetime_range
        return {} # Return empty dict if date range generation fails

    if len(fake_dates) == 0:
        return {} # Return empty dict if no dates are generated

    num_fake_rows_per_symbol = len(fake_dates)
    all_rows_data = []

    for symbol in vt_symbols:
        # Generate somewhat realistic OHLCV data
        close_prices = np.random.rand(num_fake_rows_per_symbol) * 100 + 50
        open_prices = close_prices + (np.random.rand(num_fake_rows_per_symbol) - 0.5) * 10
        high_prices = np.maximum(open_prices, close_prices) + np.random.rand(num_fake_rows_per_symbol) * 5
        low_prices = np.minimum(open_prices, close_prices) - np.random.rand(num_fake_rows_per_symbol) * 5
        low_prices = np.maximum(low_prices, 0.01) # Ensure low is not zero or negative
        volumes = np.random.rand(num_fake_rows_per_symbol) * 1000 + 500

        for i in range(num_fake_rows_per_symbol):
            all_rows_data.append({
                "datetime": fake_dates[i],
                "symbol": symbol,
                "open": open_prices[i],
                "high": high_prices[i],
                "low": low_prices[i],
                "close": close_prices[i],
                "volume": volumes[i],
            })

    if not all_rows_data: # Should only happen if vt_symbols was empty, but that's checked above
        return {}

    flat_df = pl.DataFrame(all_rows_data, schema=_OHLCV_FLAT_SCHEMA)
    # --- End of inlined logic ---

    if flat_df.is_empty(): # Double check, though previous checks should cover this
        return {}

    ohlcv_dict: Dict[str, pl.DataFrame] = {}
    ohlcv_types = ["open", "high", "low", "close", "volume"]

    # Get unique sorted datetimes once for potential error case in pivot
    unique_sorted_datetimes = flat_df.get_column("datetime").unique().sort()

    for ohlcv_type in ohlcv_types:
        try:
            pivoted_df = flat_df.pivot(
                index="datetime",
                on="symbol",  # Changed "columns" to "on"
                values=ohlcv_type
            )
            # Sort by datetime just in case pivoting changes order
            pivoted_df = pivoted_df.sort("datetime")
            ohlcv_dict[ohlcv_type] = pivoted_df
        except Exception as e:
            # Handle potential errors during pivoting
            print(f"Error pivoting data for {ohlcv_type}: {e}. Creating empty structure for this type.")
            # Create an empty DataFrame with the expected structure
            empty_df_for_type = pl.DataFrame({"datetime": unique_sorted_datetimes})
            for sym in vt_symbols:
                 empty_df_for_type = empty_df_for_type.with_columns(pl.lit(None, dtype=pl.Float64).alias(sym))
            ohlcv_dict[ohlcv_type] = empty_df_for_type

    return ohlcv_dict

In [11]:
from datetime import timedelta


end_dt = datetime.now()
start_dt = end_dt - timedelta(days=5) # Generate for the last 30 minutes
time_interval = "1m" # 5-minute interval

ohlcv_data_dictionary = generate_fake_ohlcv_wide_dict(
    start_date=start_dt,
    end_date=end_dt,
    interval=time_interval,
    vt_symbols=vt_symbols
)

In [12]:
backtest_engine.memory_bar = ohlcv_data_dictionary
backtest_engine.num_data_rows = backtest_engine.memory_bar["close"].height

In [13]:
ohlcv_data_dictionary

{'open': shape: (7_201, 4)
 ┌────────────────────────────┬─────────────────┬─────────────────┬─────────────────┐
 │ datetime                   ┆ btcusdt.BINANCE ┆ ethusdt.BINANCE ┆ xrpusdt.BINANCE │
 │ ---                        ┆ ---             ┆ ---             ┆ ---             │
 │ datetime[μs]               ┆ f64             ┆ f64             ┆ f64             │
 ╞════════════════════════════╪═════════════════╪═════════════════╪═════════════════╡
 │ 2025-05-30 14:48:31.703517 ┆ 72.322443       ┆ 50.286653       ┆ 102.966383      │
 │ 2025-05-30 14:49:31.703517 ┆ 69.714506       ┆ 119.285236      ┆ 140.2193        │
 │ 2025-05-30 14:50:31.703517 ┆ 69.937841       ┆ 64.731664       ┆ 127.119094      │
 │ 2025-05-30 14:51:31.703517 ┆ 127.440508      ┆ 118.994463      ┆ 98.30896        │
 │ 2025-05-30 14:52:31.703517 ┆ 55.283141       ┆ 148.617473      ┆ 63.70866        │
 │ …                          ┆ …               ┆ …               ┆ …               │
 │ 2025-06-04 14:44:31.7035

In [14]:
factor_df = backtest_engine._run_factor_computation(
        calculator=calculator,
        target_factor_instance=target_factor_instance,
        flattened_factors=flattened_factors,
        vt_symbols_for_run=vt_symbols,  # Use the symbols for this specific run
    )

[32m2025-06-04 14:48:31.891[0m | [1mINFO[0m | [36mFactorMakerBacktestOrchestrator[0m | [1m[FactorMakerBacktestOrchestrator] Starting factor value computation phase...[0m
[32m2025-06-04 14:48:31.893[0m | [34m[1mDEBUG[0m | [36mFactorMakerFactorCalculator[0m | [34m[1m[FactorMakerFactorCalculator] Aligning vt_symbols in factor 'factor_1m_emafactor@period_12' to ['btcusdt.BINANCE', 'ethusdt.BINANCE', 'xrpusdt.BINANCE'][0m
[32m2025-06-04 14:48:31.895[0m | [34m[1mDEBUG[0m | [36mFactorMakerFactorCalculator[0m | [34m[1m[FactorMakerFactorCalculator] Aligning vt_symbols in factor 'factor_1m_emafactor@period_26' to ['btcusdt.BINANCE', 'ethusdt.BINANCE', 'xrpusdt.BINANCE'][0m
[32m2025-06-04 14:48:31.896[0m | [1mINFO[0m | [36mFactorMakerFactorCalculator[0m | [1m[FactorMakerFactorCalculator] Starting calculation for: factor_1m_macdfactor@fast_period_12-signal_period_9-slow_period_26 with symbols ['btcusdt.BINANCE', 'ethusdt.BINANCE', 'xrpusdt.BINANCE'][0m
[32m2025

In [15]:
factor_df

datetime,btcusdt.BINANCE,ethusdt.BINANCE,xrpusdt.BINANCE
"datetime[μs, UTC]",f64,f64,f64
2025-05-30 14:48:31.703517 UTC,0.0,0.0,0.0
2025-05-30 14:49:31.703517 UTC,-0.366008,4.641576,2.302186
2025-05-30 14:50:31.703517 UTC,-0.727703,3.413422,2.9254
2025-05-30 14:51:31.703517 UTC,3.108542,6.036053,1.258957
2025-05-30 14:52:31.703517 UTC,0.700206,9.327731,-1.854376
…,…,…,…
2025-06-04 14:44:31.703517 UTC,1.45069,-1.218276,1.350531
2025-06-04 14:45:31.703517 UTC,1.855304,-1.324281,-1.237633
2025-06-04 14:46:31.703517 UTC,-2.214612,-4.113984,-0.339514
2025-06-04 14:47:31.703517 UTC,-1.414318,-0.017893,-2.909403


In [16]:
calculator.close()

[32m2025-06-04 14:48:32.280[0m | [1mINFO[0m | [36mFactorMakerFactorCalculator[0m | [1m[FactorMakerFactorCalculator] FactorCalculator closed.[0m
[32m2025-06-04 14:48:32.338[0m | [34m[1mDEBUG[0m | [36mFactorMakerFactorCalculator[0m | [34m[1m[FactorMakerFactorCalculator] GC performed.[0m
Factor memory file cleared and re-initialized: /Users/chenzhao/Documents/crypto_vnpy/vnpy/tests/.vnpy/factor/cache/backtest_factor_data_cache/factor_1m_emafactor@period_12.arrow
Factor memory file cleared and re-initialized: /Users/chenzhao/Documents/crypto_vnpy/vnpy/tests/.vnpy/factor/cache/backtest_factor_data_cache/factor_1m_emafactor@period_26.arrow
Factor memory file cleared and re-initialized: /Users/chenzhao/Documents/crypto_vnpy/vnpy/tests/.vnpy/factor/cache/backtest_factor_data_cache/factor_1m_macdfactor@fast_period_12-signal_period_9-slow_period_26.arrow
[32m2025-06-04 14:48:32.340[0m | [34m[1mDEBUG[0m | [36mFactorMakerFactorCalculator[0m | [34m[1m[FactorMakerFactorCal

In [17]:
market_close_prices_df = backtest_engine.memory_bar["close"].clone()

In [18]:
actual_analysis_start_dt = factor_df.select(pl.col('datetime').min()).item()
actual_analysis_end_dt = factor_df.select(pl.col('datetime').max()).item()

In [19]:
report_path = backtest_engine._run_factor_analysis(
    factor_df=factor_df,
    market_close_prices_df=market_close_prices_df,
    target_factor_instance=target_factor_instance,
    analysis_start_dt=actual_analysis_start_dt,
    analysis_end_dt=actual_analysis_end_dt,
    num_quantiles=2,
    returns_look_ahead_period=1,
    long_percentile_threshold=0.5,
    short_percentile_threshold=0.5,
    report_filename_prefix='test',
)

[32m2025-06-04 14:48:32.359[0m | [1mINFO[0m | [36mFactorMakerBacktestOrchestrator[0m | [1m[FactorMakerBacktestOrchestrator] Starting factor analysis phase...[0m
[32m2025-06-04 14:48:32.361[0m | [1mINFO[0m | [36mFactorMakerFactorAnalyser[0m | [1m[FactorMakerFactorAnalyser] FactorAnalyser initialized. Report dir: /Users/chenzhao/Documents/crypto_vnpy/vnpy/tests/.vnpy/factor/reports/backtest_reports[0m
[32m2025-06-04 14:48:32.362[0m | [1mINFO[0m | [36mFactorMakerFactorAnalyser[0m | [1m[FactorMakerFactorAnalyser] Report directory ensured at: /Users/chenzhao/Documents/crypto_vnpy/vnpy/tests/.vnpy/factor/reports/backtest_reports[0m
shape: (7_201, 4)
┌────────────────────────────┬─────────────────┬─────────────────┬─────────────────┐
│ datetime                   ┆ btcusdt.BINANCE ┆ ethusdt.BINANCE ┆ xrpusdt.BINANCE │
│ ---                        ┆ ---             ┆ ---             ┆ ---             │
│ datetime[μs]               ┆ f64             ┆ f64             ┆ f6

In [20]:
market_close_prices_df = backtest_engine.memory_bar["close"].join(
    factor_df.select(pl.col("datetime")),
    on="datetime",
    how="inner",
)



market_close_prices_df.select(
    [pl.col("datetime")] + # Start with the datetime column
    [
        (
            pl.col(symbol).pct_change().shift(-1)
            .otherwise(0.0)
            .fill_null(0.0) # Handles nulls from shift(-1) or original pct_change if any
            .alias(symbol)
        )
        for symbol in backtest_engine.vt_symbols
    ]
).sort("datetime")

SchemaError: datatypes of join keys don't match - `datetime`: datetime[μs] on left does not match `datetime`: datetime[μs, UTC] on right

Resolved plan until failure:

	---> FAILED HERE RESOLVING 'join' <---
DF ["datetime"]; PROJECT */1 COLUMNS

In [21]:
backtest_engine.memory_bar["close"]

datetime,btcusdt.BINANCE,ethusdt.BINANCE,xrpusdt.BINANCE
datetime[μs],f64,f64,f64
2025-05-30 14:48:31.703517,75.32033,50.323127,99.757349
2025-05-30 14:49:31.703517,69.585112,123.054973,135.831781
2025-05-30 14:50:31.703517,67.198489,62.201816,124.95987
2025-05-30 14:51:31.703517,130.22661,118.451291,95.316162
2025-05-30 14:52:31.703517,57.43686,149.734169,64.163991
…,…,…,…
2025-06-04 14:44:31.703517,129.329856,70.796505,147.343227
2025-06-04 14:45:31.703517,132.032638,96.113968,85.814321
2025-06-04 14:46:31.703517,66.368801,52.728696,124.871295
2025-06-04 14:47:31.703517,117.543443,141.160633,76.071138


In [22]:
factor_df

datetime,btcusdt.BINANCE,ethusdt.BINANCE,xrpusdt.BINANCE
"datetime[μs, UTC]",f64,f64,f64
2025-05-30 14:48:31.703517 UTC,0.0,0.0,0.0
2025-05-30 14:49:31.703517 UTC,-0.366008,4.641576,2.302186
2025-05-30 14:50:31.703517 UTC,-0.727703,3.413422,2.9254
2025-05-30 14:51:31.703517 UTC,3.108542,6.036053,1.258957
2025-05-30 14:52:31.703517 UTC,0.700206,9.327731,-1.854376
…,…,…,…
2025-06-04 14:44:31.703517 UTC,1.45069,-1.218276,1.350531
2025-06-04 14:45:31.703517 UTC,1.855304,-1.324281,-1.237633
2025-06-04 14:46:31.703517 UTC,-2.214612,-4.113984,-0.339514
2025-06-04 14:47:31.703517 UTC,-1.414318,-0.017893,-2.909403
