diff --git a/nautilus_trader/backtest/data.pxd b/nautilus_trader/backtest/data.pxd index 448a642753a..255b786421f 100644 --- a/nautilus_trader/backtest/data.pxd +++ b/nautilus_trader/backtest/data.pxd @@ -58,6 +58,7 @@ cdef class BacktestDataProducer(DataClient): cdef datetime[:] _quote_timestamps cdef int _quote_index cdef int _quote_index_last + cdef QuoteTick _next_quote_tick cdef unsigned short[:] _trade_symbols cdef str[:] _trade_prices @@ -67,14 +68,12 @@ cdef class BacktestDataProducer(DataClient): cdef datetime[:] _trade_timestamps cdef int _trade_index cdef int _trade_index_last - cdef TradeTick _next_trade_tick - cdef QuoteTick _next_quote_tick cdef readonly list execution_resolutions cdef readonly datetime min_timestamp cdef readonly datetime max_timestamp - cdef readonly bint has_data + cdef readonly bint has_tick_data cpdef void setup(self, datetime start, datetime stop) except * cpdef void reset(self) except * diff --git a/nautilus_trader/backtest/data.pyx b/nautilus_trader/backtest/data.pyx index c6036e8ca3c..6136a78be63 100644 --- a/nautilus_trader/backtest/data.pyx +++ b/nautilus_trader/backtest/data.pyx @@ -417,6 +417,7 @@ cdef class BacktestDataProducer(DataClient): self._quote_timestamps = None self._quote_index = 0 self._quote_index_last = 0 + self._next_quote_tick = None self._trade_symbols = None self._trade_prices = None @@ -426,8 +427,9 @@ cdef class BacktestDataProducer(DataClient): self._trade_timestamps = None self._trade_index = 0 self._trade_index_last = 0 + self._next_trade_tick = None - self.has_data = False + self.has_tick_data = False self._log.info(f"Prepared {len(self._quote_tick_data):,} total tick rows in " f"{round((datetime.utcnow() - timing_start_total).total_seconds(), 2)}s.") @@ -511,11 +513,11 @@ cdef class BacktestDataProducer(DataClient): # Prepare initial tick self._iterate_trade_ticks() - self.has_data = True + self.has_tick_data = True self._log.info(f"Data stream size: {format_bytes(total_size)}") - cdef Tick next_tick(self): # TODO: Refactor + cdef Tick next_tick(self): cdef Tick next_tick # Quote ticks only if self._next_trade_tick is None: @@ -565,7 +567,7 @@ cdef class BacktestDataProducer(DataClient): else: self._next_quote_tick = None if self._next_trade_tick is None: - self.has_data = False + self.has_tick_data = False cdef inline void _iterate_trade_ticks(self) except *: if self._trade_index <= self._trade_index_last: @@ -574,7 +576,7 @@ cdef class BacktestDataProducer(DataClient): else: self._next_trade_tick = None if self._next_quote_tick is None: - self.has_data = False + self.has_tick_data = False # -- COMMANDS -------------------------------------------------------------------------------------- @@ -610,7 +612,7 @@ cdef class BacktestDataProducer(DataClient): self._trade_index = 0 self._trade_index_last = len(self._quote_tick_data) - 1 - self.has_data = False + self.has_tick_data = False self._log.info("Reset.") diff --git a/nautilus_trader/backtest/engine.pyx b/nautilus_trader/backtest/engine.pyx index ce5d3cfe796..de9bdbb66f3 100644 --- a/nautilus_trader/backtest/engine.pyx +++ b/nautilus_trader/backtest/engine.pyx @@ -359,7 +359,7 @@ cdef class BacktestEngine: cdef Tick tick # -- MAIN BACKTEST LOOP -----------------------------------------------# - while self.data_client.has_data: + while self.data_client.has_tick_data: tick = self.data_client.next_tick() self._advance_time(tick.timestamp) self.exchange.process_tick(tick) diff --git a/nautilus_trader/data/aggregation.pyx b/nautilus_trader/data/aggregation.pyx index 2f29fdb0089..b12c0a9087e 100644 --- a/nautilus_trader/data/aggregation.pyx +++ b/nautilus_trader/data/aggregation.pyx @@ -476,6 +476,7 @@ cdef class TimeBarAggregator(BarAggregator): cpdef datetime get_start_time(self): cdef datetime now = self._clock.utc_now() + cdef int step = self.bar_type.spec.step if self.bar_type.spec.aggregation == BarAggregation.SECOND: return datetime( year=now.year, @@ -483,7 +484,7 @@ cdef class TimeBarAggregator(BarAggregator): day=now.day, hour=now.hour, minute=now.minute, - second=now.second, + second=now.second - (now.second % step), tzinfo=now.tzinfo, ) elif self.bar_type.spec.aggregation == BarAggregation.MINUTE: @@ -492,7 +493,7 @@ cdef class TimeBarAggregator(BarAggregator): month=now.month, day=now.day, hour=now.hour, - minute=now.minute, + minute=now.minute - (now.minute % step), tzinfo=now.tzinfo, ) elif self.bar_type.spec.aggregation == BarAggregation.HOUR: @@ -500,14 +501,14 @@ cdef class TimeBarAggregator(BarAggregator): year=now.year, month=now.month, day=now.day, - hour=now.hour, + hour=now.hour - (now.hour % step), tzinfo=now.tzinfo, ) elif self.bar_type.spec.aggregation == BarAggregation.DAY: return datetime( year=now.year, month=now.month, - day=now.day, + day=now.day - (now.day % step), ) else: # Design time error diff --git a/nautilus_trader/data/wrangling.pyx b/nautilus_trader/data/wrangling.pyx index 6ff5993c19c..4d1d2129998 100644 --- a/nautilus_trader/data/wrangling.pyx +++ b/nautilus_trader/data/wrangling.pyx @@ -13,6 +13,8 @@ # limitations under the License. # ------------------------------------------------------------------------------------------------- +import random + import pandas as pd from cpython.datetime cimport datetime @@ -72,7 +74,7 @@ cdef class QuoteTickDataWrangler: Condition.type_or_none(data_bars_bid, dict, "bid_data") Condition.type_or_none(data_bars_ask, dict, "ask_data") - if data_ticks is not None and len(data_ticks) > 0: + if data_ticks is not None and not data_ticks.empty: self._data_ticks = as_utc_index(data_ticks) else: Condition.not_none(data_bars_bid, "data_bars_bid") @@ -85,7 +87,7 @@ cdef class QuoteTickDataWrangler: self.processed_data = [] self.resolution = BarAggregation.UNDEFINED - def pre_process(self, int symbol_indexer): + def pre_process(self, int symbol_indexer, random_seed=None): """ Pre-process the tick data in preparation for building ticks. @@ -93,9 +95,15 @@ cdef class QuoteTickDataWrangler: ---------- symbol_indexer : int The symbol indexer for the built ticks. + random_seed : int, optional + The random seed for shuffling order of high and low ticks from bar + data. If random_seed is None then won't shuffle. """ - if self._data_ticks is not None and len(self._data_ticks) > 0: + if random_seed is not None: + Condition.type(random_seed, int, "random_seed") + + if self._data_ticks is not None and not self._data_ticks.empty: # Build ticks from data self.processed_data = self._data_ticks self.processed_data["symbol"] = symbol_indexer @@ -137,11 +145,12 @@ cdef class QuoteTickDataWrangler: Condition.not_none(bars_bid, "bars_bid") Condition.not_none(bars_ask, "bars_ask") - Condition.true(len(bars_bid) > 0, "len(bars_bid) > 0") - Condition.true(len(bars_ask) > 0, "len(bars_ask) > 0") + Condition.false(bars_bid.empty, "bars_bid.empty") + Condition.false(bars_ask.empty, "bars_ask.empty") Condition.true(all(bars_bid.index) == all(bars_ask.index), "bars_bid.index == bars_ask.index") Condition.true(bars_bid.shape == bars_ask.shape, "bars_bid.shape == bars_ask.shape") + # Ensure index is tz-aware UTC bars_bid = as_utc_index(bars_bid) bars_ask = as_utc_index(bars_ask) @@ -184,11 +193,6 @@ cdef class QuoteTickDataWrangler: df_ticks_l = pd.DataFrame(data=data_low) df_ticks_c = pd.DataFrame(data=data_close) - # TODO: Pending refactoring - df_ticks_o.index = bars_bid.index.shift(periods=-300, freq="ms") - df_ticks_h.index = bars_bid.index.shift(periods=-200, freq="ms") - df_ticks_l.index = bars_bid.index.shift(periods=-100, freq="ms") - # Pre-process prices into formatted strings price_cols = ["bid", "ask"] df_ticks_o[price_cols] = df_ticks_o[price_cols].applymap(lambda x: f'{x:.{self.instrument.price_precision}f}') @@ -203,10 +207,25 @@ cdef class QuoteTickDataWrangler: df_ticks_l[size_cols] = df_ticks_l[size_cols].applymap(lambda x: f'{x:.{self.instrument.size_precision}f}') df_ticks_c[size_cols] = df_ticks_c[size_cols].applymap(lambda x: f'{x:.{self.instrument.size_precision}f}') + df_ticks_o.index = df_ticks_o.index.shift(periods=-300, freq="ms") + df_ticks_h.index = df_ticks_h.index.shift(periods=-200, freq="ms") + df_ticks_l.index = df_ticks_l.index.shift(periods=-100, freq="ms") + # Merge tick data df_ticks_final = pd.concat([df_ticks_o, df_ticks_h, df_ticks_l, df_ticks_c]) df_ticks_final.sort_index(axis=0, kind="mergesort", inplace=True) + cdef int i + # Randomly shift high low prices + if random_seed is not None: + random.seed(random_seed) + for i in range(0, len(df_ticks_o)): + if random.getrandbits(1): + high = df_ticks_h.iloc[i] + low = df_ticks_l.iloc[i] + df_ticks_final.iloc[i + 1] = low + df_ticks_final.iloc[i + 2] = high + # Build ticks from data self.processed_data = df_ticks_final self.processed_data["symbol"] = symbol_indexer diff --git a/tests/acceptance_tests/test_backtest_acceptance.py b/tests/acceptance_tests/test_backtest_acceptance.py index a5f5af177c7..846d47f3697 100644 --- a/tests/acceptance_tests/test_backtest_acceptance.py +++ b/tests/acceptance_tests/test_backtest_acceptance.py @@ -87,9 +87,9 @@ def test_run_ema_cross_strategy(self): self.engine.run(strategies=[strategy]) # Assert - Should return expected PNL - self.assertEqual(2688, strategy.fast_ema.count) + self.assertEqual(2689, strategy.fast_ema.count) self.assertEqual(115043, self.engine.iteration) - self.assertEqual(Money(1005961.63, USD), self.engine.portfolio.account(self.venue).balance()) + self.assertEqual(Money(997688.53, USD), self.engine.portfolio.account(self.venue).balance()) def test_rerun_ema_cross_strategy_returns_identical_performance(self): # Arrange @@ -133,10 +133,10 @@ def test_run_multiple_strategies(self): self.engine.run(strategies=[strategy1, strategy2]) # Assert - self.assertEqual(2688, strategy1.fast_ema.count) - self.assertEqual(2688, strategy2.fast_ema.count) + self.assertEqual(2689, strategy1.fast_ema.count) + self.assertEqual(2689, strategy2.fast_ema.count) self.assertEqual(115043, self.engine.iteration) - self.assertEqual(Money(959831.42, USD), self.engine.portfolio.account(self.venue).balance()) + self.assertEqual(Money(948357.20, USD), self.engine.portfolio.account(self.venue).balance()) class BacktestAcceptanceTestsGBPUSDWithBars(unittest.TestCase): @@ -191,9 +191,9 @@ def test_run_ema_cross_with_minute_bar_spec(self): self.engine.run(strategies=[strategy]) # Assert - self.assertEqual(8352, strategy.fast_ema.count) + self.assertEqual(8353, strategy.fast_ema.count) self.assertEqual(120467, self.engine.iteration) - self.assertEqual(Money(945548.59, GBP), self.engine.portfolio.account(self.venue).balance()) + self.assertEqual(Money(947965.44, GBP), self.engine.portfolio.account(self.venue).balance()) class BacktestAcceptanceTestsAUDUSDWithTicks(unittest.TestCase): diff --git a/tests/unit_tests/trading/test_trading_trader.py b/tests/unit_tests/trading/test_trading_trader.py index ff1873e4670..a795ff486ee 100644 --- a/tests/unit_tests/trading/test_trading_trader.py +++ b/tests/unit_tests/trading/test_trading_trader.py @@ -17,8 +17,8 @@ from nautilus_trader.analysis.performance import PerformanceAnalyzer from nautilus_trader.backtest.config import BacktestConfig -from nautilus_trader.backtest.data import BacktestDataProducer from nautilus_trader.backtest.data import BacktestDataContainer +from nautilus_trader.backtest.data import BacktestDataProducer from nautilus_trader.backtest.exchange import SimulatedExchange from nautilus_trader.backtest.execution import BacktestExecClient from nautilus_trader.backtest.loaders import InstrumentLoader