In [1]:
%matplotlib inline
from vnpy.factor.backtesting.backtesting import BacktestEngine
from vnpy.factor.setting import get_backtest_data_cache_path, get_backtest_report_path


backtest_engine = BacktestEngine(
    factor_module_name="vnpy.factor.factors",
    output_data_dir_for_analyser_reports=get_backtest_report_path(),
    output_data_dir_for_calculator_cache=get_backtest_data_cache_path()
)

[vnpy.trader.setting] Updated SETTINGS from vt_setting.json
[32m2025-06-05 05:45:38.784[0m | [1mINFO[0m | [36mFactorMakerBacktestOrchestrator[0m | [1m[FactorMakerBacktestOrchestrator] Successfully imported factor module: 'vnpy.factor.factors'[0m
[32m2025-06-05 05:45:38.785[0m | [1mINFO[0m | [36mFactorMakerBacktestOrchestrator[0m | [1m[FactorMakerBacktestOrchestrator] FactorMakerBacktestOrchestrator initialized.[0m


In [2]:
vt_symbols = ['btcusdt.BINANCE', 'ethusdt.BINANCE', 'xrpusdt.BINANCE']

In [3]:
backtest_engine.output_data_dir_for_calculator_cache

PosixPath('/Users/chenzhao/Documents/crypto_vnpy/vnpy/tests/.vnpy/factor/cache/backtest_factor_data_cache')

In [4]:
from vnpy.factor.setting import FACTOR_DEFINITIONS_FILEPATH
from vnpy.factor.utils.factor_utils import load_factor_setting


factor_definations = load_factor_setting(FACTOR_DEFINITIONS_FILEPATH)

In [5]:
macd_factor_defination = factor_definations[2]
macd_factor_defination

{'class_name': 'MACDFactor',
 'factor_name': 'MACDFactor',
 'factor_key': 'factor_1m_macdfactor@fast_period_12-signal_period_9-slow_period_26',
 'freq': '1m',
 'params': {'signal_period': 9},
 'dependencies_factor': [{'class_name': 'EMAFactor',
   'factor_name': 'fast_ema',
   'factor_key': 'factor_1m_emafactor@period_12',
   'freq': '1m',
   'params': {'period': 12},
   'dependencies_factor': [],
   'dependencies_freq': [],
   'dependencies_symbol': [],
   'dependencies_exchange': [],
   'last_run_datetime': '2025-06-04 15:35:49',
   'factor_mode': 'LIVE'},
  {'class_name': 'EMAFactor',
   'factor_name': 'slow_ema',
   'factor_key': 'factor_1m_emafactor@period_26',
   'freq': '1m',
   'params': {'period': 26},
   'dependencies_factor': [],
   'dependencies_freq': [],
   'dependencies_symbol': [],
   'dependencies_exchange': [],
   'last_run_datetime': '2025-06-04 15:35:49',
   'factor_mode': 'LIVE'}],
 'dependencies_freq': [],
 'dependencies_symbol': [],
 'dependencies_exchange': [],


In [6]:
import importlib

from vnpy.factor.utils.factor_utils import init_factors


factor_module = importlib.import_module("vnpy.factor.factors")
macd_factor = init_factors(
    module_for_primary_classes=factor_module,
    settings_data=[macd_factor_defination],
    dependencies_module_lookup_for_instances=factor_module
)[0]

In [7]:
macd_factor.get_nested_params_for_optimizer()

{'signal_period': 9, 'fast_ema.period': 12, 'slow_ema.period': 26}

In [8]:
target_factor_instance, flattened_factors = backtest_engine._init_and_flatten_factor(macd_factor, vt_symbols)

[32m2025-06-05 05:45:38.870[0m | [1mINFO[0m | [36mFactorMakerBacktestOrchestrator[0m | [1m[FactorMakerBacktestOrchestrator] Initializing and flattening factor based on definition. Symbols: ['btcusdt.BINANCE', 'ethusdt.BINANCE', 'xrpusdt.BINANCE'][0m
[32m2025-06-05 05:45:38.872[0m | [1mINFO[0m | [36mFactorMakerBacktestOrchestrator[0m | [1m[FactorMakerBacktestOrchestrator] Target factor instance created: factor_1m_macdfactor@signal_period_9[0m
[32m2025-06-05 05:45:38.873[0m | [34m[1mDEBUG[0m | [36mFactorMakerBacktestOrchestrator[0m | [34m[1m[FactorMakerBacktestOrchestrator] Factor tree flattened. Total factors in graph: 3[0m


In [9]:
calculator = backtest_engine._create_calculator()

[32m2025-06-05 05:45:38.878[0m | [1mINFO[0m | [36mFactorMakerFactorCalculator[0m | [1m[FactorMakerFactorCalculator] FactorCalculator initialized. Factor cache dir: /Users/chenzhao/Documents/crypto_vnpy/vnpy/tests/.vnpy/factor/cache/backtest_factor_data_cache[0m
[32m2025-06-05 05:45:38.879[0m | [1mINFO[0m | [36mFactorMakerFactorCalculator[0m | [1m[FactorMakerFactorCalculator] Factor cache directory ensured at: /Users/chenzhao/Documents/crypto_vnpy/vnpy/tests/.vnpy/factor/cache/backtest_factor_data_cache[0m


In [10]:
from datetime import datetime

import numpy as np
import polars as pl
from typing import List, Dict

# Schema for the intermediate flat DataFrame
_OHLCV_FLAT_SCHEMA = {
    "datetime": pl.Datetime(time_unit="us", time_zone='UTC'),
    "symbol": pl.Utf8,
    "open": pl.Float64,
    "high": pl.Float64,
    "low": pl.Float64,
    "close": pl.Float64,
    "volume": pl.Float64,
}


def generate_fake_ohlcv_wide_dict(
    start_date: datetime,
    end_date: datetime,
    interval: str,
    vt_symbols: List[str]
) -> Dict[str, pl.DataFrame]:
    """
    Generates a dictionary of Polars DataFrames with fake OHLCV data.
    Each key in the dictionary is an OHLCV type ("open", "high", "low", "close", "volume").
    Each DataFrame has a "datetime" column and columns for each symbol in vt_symbols,
    containing the respective OHLCV values.

    Args:
        start_date: The start datetime for the data generation.
        end_date: The end datetime for the data generation.
        interval: Polars interval string for fake data generation (e.g., "1m", "1h").
        vt_symbols: List of symbol strings (e.g., ["BTCUSDT", "ETHUSDT"]).

    Returns:
        Dict[str, pl.DataFrame]: A dictionary where keys are OHLCV types and
                                 values are "wide" DataFrames. Returns an empty
                                 dictionary if no data can be generated.
    """
    if not vt_symbols:
        return {}

    # --- Inlined logic from _generate_fake_ohlcv_flat_df ---
    flat_df: pl.DataFrame
    try:
        fake_dates = pl.datetime_range(
            start=start_date,
            end=end_date,
            interval=interval,
            time_unit="us",  # Assuming interval is in minutes
            eager=True,
            time_zone="UTC"
        )
    except Exception: # Handle potential errors in datetime_range
        return {} # Return empty dict if date range generation fails

    if len(fake_dates) == 0:
        return {} # Return empty dict if no dates are generated

    num_fake_rows_per_symbol = len(fake_dates)
    all_rows_data = []

    for symbol in vt_symbols:
        # Generate somewhat realistic OHLCV data
        close_prices = np.random.rand(num_fake_rows_per_symbol) * 100 + 50
        open_prices = close_prices + (np.random.rand(num_fake_rows_per_symbol) - 0.5) * 10
        high_prices = np.maximum(open_prices, close_prices) + np.random.rand(num_fake_rows_per_symbol) * 5
        low_prices = np.minimum(open_prices, close_prices) - np.random.rand(num_fake_rows_per_symbol) * 5
        low_prices = np.maximum(low_prices, 0.01) # Ensure low is not zero or negative
        volumes = np.random.rand(num_fake_rows_per_symbol) * 1000 + 500

        for i in range(num_fake_rows_per_symbol):
            all_rows_data.append({
                "datetime": fake_dates[i],
                "symbol": symbol,
                "open": open_prices[i],
                "high": high_prices[i],
                "low": low_prices[i],
                "close": close_prices[i],
                "volume": volumes[i],
            })

    if not all_rows_data: # Should only happen if vt_symbols was empty, but that's checked above
        return {}

    flat_df = pl.DataFrame(all_rows_data, schema=_OHLCV_FLAT_SCHEMA)
    # --- End of inlined logic ---

    if flat_df.is_empty(): # Double check, though previous checks should cover this
        return {}

    ohlcv_dict: Dict[str, pl.DataFrame] = {}
    ohlcv_types = ["open", "high", "low", "close", "volume"]

    # Get unique sorted datetimes once for potential error case in pivot
    unique_sorted_datetimes = flat_df.get_column("datetime").unique().sort()

    for ohlcv_type in ohlcv_types:
        try:
            pivoted_df = flat_df.pivot(
                index="datetime",
                on="symbol",  # Changed "columns" to "on"
                values=ohlcv_type
            )
            # Sort by datetime just in case pivoting changes order
            pivoted_df = pivoted_df.sort("datetime")
            ohlcv_dict[ohlcv_type] = pivoted_df
        except Exception as e:
            # Handle potential errors during pivoting
            print(f"Error pivoting data for {ohlcv_type}: {e}. Creating empty structure for this type.")
            # Create an empty DataFrame with the expected structure
            empty_df_for_type = pl.DataFrame({"datetime": unique_sorted_datetimes})
            for sym in vt_symbols:
                 empty_df_for_type = empty_df_for_type.with_columns(pl.lit(None, dtype=pl.Float64).alias(sym))
            ohlcv_dict[ohlcv_type] = empty_df_for_type

    return ohlcv_dict

In [11]:
from datetime import timedelta


end_dt = datetime.now()
start_dt = end_dt - timedelta(days=5) # Generate for the last 30 minutes
time_interval = "1m" # 5-minute interval

ohlcv_data_dictionary = generate_fake_ohlcv_wide_dict(
    start_date=start_dt,
    end_date=end_dt,
    interval=time_interval,
    vt_symbols=vt_symbols
)

In [12]:
backtest_engine.memory_bar = ohlcv_data_dictionary
backtest_engine.num_data_rows = backtest_engine.memory_bar["close"].height

In [13]:
ohlcv_data_dictionary

{'open': shape: (7_201, 4)
 ┌────────────────────────────────┬─────────────────┬─────────────────┬─────────────────┐
 │ datetime                       ┆ btcusdt.BINANCE ┆ ethusdt.BINANCE ┆ xrpusdt.BINANCE │
 │ ---                            ┆ ---             ┆ ---             ┆ ---             │
 │ datetime[μs, UTC]              ┆ f64             ┆ f64             ┆ f64             │
 ╞════════════════════════════════╪═════════════════╪═════════════════╪═════════════════╡
 │ 2025-05-31 05:45:38.897487 UTC ┆ 92.378695       ┆ 141.924696      ┆ 78.634087       │
 │ 2025-05-31 05:46:38.897487 UTC ┆ 99.487872       ┆ 80.028162       ┆ 125.192295      │
 │ 2025-05-31 05:47:38.897487 UTC ┆ 49.392837       ┆ 117.956937      ┆ 79.854268       │
 │ 2025-05-31 05:48:38.897487 UTC ┆ 55.552963       ┆ 115.965107      ┆ 88.856818       │
 │ 2025-05-31 05:49:38.897487 UTC ┆ 117.603122      ┆ 92.229428       ┆ 98.193345       │
 │ …                              ┆ …               ┆ …               ┆ …

In [14]:
factor_df = backtest_engine._run_factor_computation(
        calculator=calculator,
        target_factor_instance=target_factor_instance,
        flattened_factors=flattened_factors,
        vt_symbols_for_run=vt_symbols,  # Use the symbols for this specific run
    )

[32m2025-06-05 05:45:39.034[0m | [1mINFO[0m | [36mFactorMakerBacktestOrchestrator[0m | [1m[FactorMakerBacktestOrchestrator] Starting factor value computation phase...[0m
[32m2025-06-05 05:45:39.036[0m | [34m[1mDEBUG[0m | [36mFactorMakerFactorCalculator[0m | [34m[1m[FactorMakerFactorCalculator] Aligning vt_symbols in factor 'factor_1m_emafactor@period_12' to ['btcusdt.BINANCE', 'ethusdt.BINANCE', 'xrpusdt.BINANCE'][0m
[32m2025-06-05 05:45:39.036[0m | [34m[1mDEBUG[0m | [36mFactorMakerFactorCalculator[0m | [34m[1m[FactorMakerFactorCalculator] Aligning vt_symbols in factor 'factor_1m_emafactor@period_26' to ['btcusdt.BINANCE', 'ethusdt.BINANCE', 'xrpusdt.BINANCE'][0m
[32m2025-06-05 05:45:39.037[0m | [1mINFO[0m | [36mFactorMakerFactorCalculator[0m | [1m[FactorMakerFactorCalculator] Starting calculation for: factor_1m_macdfactor@signal_period_9 with symbols ['btcusdt.BINANCE', 'ethusdt.BINANCE', 'xrpusdt.BINANCE'][0m
[32m2025-06-05 05:45:39.037[0m | [34

In [15]:
factor_df

datetime,btcusdt.BINANCE,ethusdt.BINANCE,xrpusdt.BINANCE
"datetime[μs, UTC]",f64,f64,f64
2025-05-31 05:45:38.897487 UTC,0.0,0.0,0.0
2025-05-31 05:46:38.897487 UTC,-0.080398,-4.485348,3.182308
2025-05-31 05:47:38.897487 UTC,-2.96846,-4.513495,2.145178
2025-05-31 05:48:38.897487 UTC,-4.148103,-4.360179,1.590452
2025-05-31 05:49:38.897487 UTC,-0.823006,-5.155451,2.091
…,…,…,…
2025-06-05 05:41:38.897487 UTC,1.208305,1.223247,0.086871
2025-06-05 05:42:38.897487 UTC,2.102586,0.217333,2.325433
2025-06-05 05:43:38.897487 UTC,2.868933,-1.705968,3.040766
2025-06-05 05:44:38.897487 UTC,3.769433,-0.650993,1.992774


In [16]:
calculator.close()

[32m2025-06-05 05:45:39.397[0m | [1mINFO[0m | [36mFactorMakerFactorCalculator[0m | [1m[FactorMakerFactorCalculator] FactorCalculator closed.[0m
[32m2025-06-05 05:45:39.445[0m | [34m[1mDEBUG[0m | [36mFactorMakerFactorCalculator[0m | [34m[1m[FactorMakerFactorCalculator] GC performed.[0m
Factor memory file cleared and re-initialized: /Users/chenzhao/Documents/crypto_vnpy/vnpy/tests/.vnpy/factor/cache/backtest_factor_data_cache/factor_1m_emafactor@period_12.arrow
Factor memory file cleared and re-initialized: /Users/chenzhao/Documents/crypto_vnpy/vnpy/tests/.vnpy/factor/cache/backtest_factor_data_cache/factor_1m_emafactor@period_26.arrow
Factor memory file cleared and re-initialized: /Users/chenzhao/Documents/crypto_vnpy/vnpy/tests/.vnpy/factor/cache/backtest_factor_data_cache/factor_1m_macdfactor@signal_period_9.arrow
[32m2025-06-05 05:45:39.447[0m | [34m[1mDEBUG[0m | [36mFactorMakerFactorCalculator[0m | [34m[1m[FactorMakerFactorCalculator] FactorMemory instance

In [17]:
market_close_prices_df = backtest_engine.memory_bar["close"].clone()

In [18]:
actual_analysis_start_dt = factor_df.select(pl.col('datetime').min()).item()
actual_analysis_end_dt = factor_df.select(pl.col('datetime').max()).item()

In [19]:
report_path = backtest_engine._run_factor_analysis(
    factor_df=factor_df,
    market_close_prices_df=market_close_prices_df,
    target_factor_instance=target_factor_instance,
    analysis_start_dt=actual_analysis_start_dt,
    analysis_end_dt=actual_analysis_end_dt,
    num_quantiles=2,
    returns_look_ahead_period=1,
    long_percentile_threshold=0.5,
    short_percentile_threshold=0.5,
    report_filename_prefix='test',
)

[32m2025-06-05 05:45:39.464[0m | [1mINFO[0m | [36mFactorMakerBacktestOrchestrator[0m | [1m[FactorMakerBacktestOrchestrator] Starting factor analysis phase...[0m
[32m2025-06-05 05:45:39.465[0m | [1mINFO[0m | [36mFactorMakerFactorAnalyser[0m | [1m[FactorMakerFactorAnalyser] FactorAnalyser initialized. Report dir: /Users/chenzhao/Documents/crypto_vnpy/vnpy/tests/.vnpy/factor/reports/backtest_reports[0m
[32m2025-06-05 05:45:39.466[0m | [1mINFO[0m | [36mFactorMakerFactorAnalyser[0m | [1m[FactorMakerFactorAnalyser] Starting analysis for factor: factor_1m_macdfactor@signal_period_9[0m
[32m2025-06-05 05:45:39.466[0m | [1mINFO[0m | [36mFactorMakerFactorAnalyser[0m | [1m[FactorMakerFactorAnalyser] Preparing 1-period symbol forward returns data...[0m
[32m2025-06-05 05:45:39.468[0m | [1mINFO[0m | [36mFactorMakerFactorAnalyser[0m | [1m[FactorMakerFactorAnalyser] Symbol forward returns prepared for 3 symbols.[0m
[32m2025-06-05 05:45:39.470[0m | [1mINFO[0m 