Skip to content

Commit

Permalink
Overhaul backtest data processing
Browse files Browse the repository at this point in the history
- Preparation for TradeTick processing
- [WIP] Implement random shuffling of high/low for bars
- Fix TimeBarAggregator start times
  • Loading branch information
cjdsellers committed Nov 17, 2020
1 parent e1f2787 commit 4ce0d82
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 32 deletions.
5 changes: 2 additions & 3 deletions nautilus_trader/backtest/data.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ cdef class BacktestDataProducer(DataClient):
cdef datetime[:] _quote_timestamps
cdef int _quote_index
cdef int _quote_index_last
cdef QuoteTick _next_quote_tick

cdef unsigned short[:] _trade_symbols
cdef str[:] _trade_prices
Expand All @@ -67,14 +68,12 @@ cdef class BacktestDataProducer(DataClient):
cdef datetime[:] _trade_timestamps
cdef int _trade_index
cdef int _trade_index_last

cdef TradeTick _next_trade_tick
cdef QuoteTick _next_quote_tick

cdef readonly list execution_resolutions
cdef readonly datetime min_timestamp
cdef readonly datetime max_timestamp
cdef readonly bint has_data
cdef readonly bint has_tick_data

cpdef void setup(self, datetime start, datetime stop) except *
cpdef void reset(self) except *
Expand Down
14 changes: 8 additions & 6 deletions nautilus_trader/backtest/data.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ cdef class BacktestDataProducer(DataClient):
self._quote_timestamps = None
self._quote_index = 0
self._quote_index_last = 0
self._next_quote_tick = None

self._trade_symbols = None
self._trade_prices = None
Expand All @@ -426,8 +427,9 @@ cdef class BacktestDataProducer(DataClient):
self._trade_timestamps = None
self._trade_index = 0
self._trade_index_last = 0
self._next_trade_tick = None

self.has_data = False
self.has_tick_data = False

self._log.info(f"Prepared {len(self._quote_tick_data):,} total tick rows in "
f"{round((datetime.utcnow() - timing_start_total).total_seconds(), 2)}s.")
Expand Down Expand Up @@ -511,11 +513,11 @@ cdef class BacktestDataProducer(DataClient):
# Prepare initial tick
self._iterate_trade_ticks()

self.has_data = True
self.has_tick_data = True

self._log.info(f"Data stream size: {format_bytes(total_size)}")

cdef Tick next_tick(self): # TODO: Refactor
cdef Tick next_tick(self):
cdef Tick next_tick
# Quote ticks only
if self._next_trade_tick is None:
Expand Down Expand Up @@ -565,7 +567,7 @@ cdef class BacktestDataProducer(DataClient):
else:
self._next_quote_tick = None
if self._next_trade_tick is None:
self.has_data = False
self.has_tick_data = False

cdef inline void _iterate_trade_ticks(self) except *:
if self._trade_index <= self._trade_index_last:
Expand All @@ -574,7 +576,7 @@ cdef class BacktestDataProducer(DataClient):
else:
self._next_trade_tick = None
if self._next_quote_tick is None:
self.has_data = False
self.has_tick_data = False

# -- COMMANDS --------------------------------------------------------------------------------------

Expand Down Expand Up @@ -610,7 +612,7 @@ cdef class BacktestDataProducer(DataClient):
self._trade_index = 0
self._trade_index_last = len(self._quote_tick_data) - 1

self.has_data = False
self.has_tick_data = False

self._log.info("Reset.")

Expand Down
2 changes: 1 addition & 1 deletion nautilus_trader/backtest/engine.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ cdef class BacktestEngine:

cdef Tick tick
# -- MAIN BACKTEST LOOP -----------------------------------------------#
while self.data_client.has_data:
while self.data_client.has_tick_data:
tick = self.data_client.next_tick()
self._advance_time(tick.timestamp)
self.exchange.process_tick(tick)
Expand Down
9 changes: 5 additions & 4 deletions nautilus_trader/data/aggregation.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -476,14 +476,15 @@ cdef class TimeBarAggregator(BarAggregator):

cpdef datetime get_start_time(self):
cdef datetime now = self._clock.utc_now()
cdef int step = self.bar_type.spec.step
if self.bar_type.spec.aggregation == BarAggregation.SECOND:
return datetime(
year=now.year,
month=now.month,
day=now.day,
hour=now.hour,
minute=now.minute,
second=now.second,
second=now.second - (now.second % step),
tzinfo=now.tzinfo,
)
elif self.bar_type.spec.aggregation == BarAggregation.MINUTE:
Expand All @@ -492,22 +493,22 @@ cdef class TimeBarAggregator(BarAggregator):
month=now.month,
day=now.day,
hour=now.hour,
minute=now.minute,
minute=now.minute - (now.minute % step),
tzinfo=now.tzinfo,
)
elif self.bar_type.spec.aggregation == BarAggregation.HOUR:
return datetime(
year=now.year,
month=now.month,
day=now.day,
hour=now.hour,
hour=now.hour - (now.hour % step),
tzinfo=now.tzinfo,
)
elif self.bar_type.spec.aggregation == BarAggregation.DAY:
return datetime(
year=now.year,
month=now.month,
day=now.day,
day=now.day - (now.day % step),
)
else:
# Design time error
Expand Down
39 changes: 29 additions & 10 deletions nautilus_trader/data/wrangling.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
# limitations under the License.
# -------------------------------------------------------------------------------------------------

import random

import pandas as pd

from cpython.datetime cimport datetime
Expand Down Expand Up @@ -72,7 +74,7 @@ cdef class QuoteTickDataWrangler:
Condition.type_or_none(data_bars_bid, dict, "bid_data")
Condition.type_or_none(data_bars_ask, dict, "ask_data")

if data_ticks is not None and len(data_ticks) > 0:
if data_ticks is not None and not data_ticks.empty:
self._data_ticks = as_utc_index(data_ticks)
else:
Condition.not_none(data_bars_bid, "data_bars_bid")
Expand All @@ -85,17 +87,23 @@ cdef class QuoteTickDataWrangler:
self.processed_data = []
self.resolution = BarAggregation.UNDEFINED

def pre_process(self, int symbol_indexer):
def pre_process(self, int symbol_indexer, random_seed=None):
"""
Pre-process the tick data in preparation for building ticks.
Parameters
----------
symbol_indexer : int
The symbol indexer for the built ticks.
random_seed : int, optional
The random seed for shuffling order of high and low ticks from bar
data. If random_seed is None then won't shuffle.
"""
if self._data_ticks is not None and len(self._data_ticks) > 0:
if random_seed is not None:
Condition.type(random_seed, int, "random_seed")

if self._data_ticks is not None and not self._data_ticks.empty:
# Build ticks from data
self.processed_data = self._data_ticks
self.processed_data["symbol"] = symbol_indexer
Expand Down Expand Up @@ -137,11 +145,12 @@ cdef class QuoteTickDataWrangler:

Condition.not_none(bars_bid, "bars_bid")
Condition.not_none(bars_ask, "bars_ask")
Condition.true(len(bars_bid) > 0, "len(bars_bid) > 0")
Condition.true(len(bars_ask) > 0, "len(bars_ask) > 0")
Condition.false(bars_bid.empty, "bars_bid.empty")
Condition.false(bars_ask.empty, "bars_ask.empty")
Condition.true(all(bars_bid.index) == all(bars_ask.index), "bars_bid.index == bars_ask.index")
Condition.true(bars_bid.shape == bars_ask.shape, "bars_bid.shape == bars_ask.shape")

# Ensure index is tz-aware UTC
bars_bid = as_utc_index(bars_bid)
bars_ask = as_utc_index(bars_ask)

Expand Down Expand Up @@ -184,11 +193,6 @@ cdef class QuoteTickDataWrangler:
df_ticks_l = pd.DataFrame(data=data_low)
df_ticks_c = pd.DataFrame(data=data_close)

# TODO: Pending refactoring
df_ticks_o.index = bars_bid.index.shift(periods=-300, freq="ms")
df_ticks_h.index = bars_bid.index.shift(periods=-200, freq="ms")
df_ticks_l.index = bars_bid.index.shift(periods=-100, freq="ms")

# Pre-process prices into formatted strings
price_cols = ["bid", "ask"]
df_ticks_o[price_cols] = df_ticks_o[price_cols].applymap(lambda x: f'{x:.{self.instrument.price_precision}f}')
Expand All @@ -203,10 +207,25 @@ cdef class QuoteTickDataWrangler:
df_ticks_l[size_cols] = df_ticks_l[size_cols].applymap(lambda x: f'{x:.{self.instrument.size_precision}f}')
df_ticks_c[size_cols] = df_ticks_c[size_cols].applymap(lambda x: f'{x:.{self.instrument.size_precision}f}')

df_ticks_o.index = df_ticks_o.index.shift(periods=-300, freq="ms")
df_ticks_h.index = df_ticks_h.index.shift(periods=-200, freq="ms")
df_ticks_l.index = df_ticks_l.index.shift(periods=-100, freq="ms")

# Merge tick data
df_ticks_final = pd.concat([df_ticks_o, df_ticks_h, df_ticks_l, df_ticks_c])
df_ticks_final.sort_index(axis=0, kind="mergesort", inplace=True)

cdef int i
# Randomly shift high low prices
if random_seed is not None:
random.seed(random_seed)
for i in range(0, len(df_ticks_o)):
if random.getrandbits(1):
high = df_ticks_h.iloc[i]
low = df_ticks_l.iloc[i]
df_ticks_final.iloc[i + 1] = low
df_ticks_final.iloc[i + 2] = high

# Build ticks from data
self.processed_data = df_ticks_final
self.processed_data["symbol"] = symbol_indexer
Expand Down
14 changes: 7 additions & 7 deletions tests/acceptance_tests/test_backtest_acceptance.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ def test_run_ema_cross_strategy(self):
self.engine.run(strategies=[strategy])

# Assert - Should return expected PNL
self.assertEqual(2688, strategy.fast_ema.count)
self.assertEqual(2689, strategy.fast_ema.count)
self.assertEqual(115043, self.engine.iteration)
self.assertEqual(Money(1005961.63, USD), self.engine.portfolio.account(self.venue).balance())
self.assertEqual(Money(997688.53, USD), self.engine.portfolio.account(self.venue).balance())

def test_rerun_ema_cross_strategy_returns_identical_performance(self):
# Arrange
Expand Down Expand Up @@ -133,10 +133,10 @@ def test_run_multiple_strategies(self):
self.engine.run(strategies=[strategy1, strategy2])

# Assert
self.assertEqual(2688, strategy1.fast_ema.count)
self.assertEqual(2688, strategy2.fast_ema.count)
self.assertEqual(2689, strategy1.fast_ema.count)
self.assertEqual(2689, strategy2.fast_ema.count)
self.assertEqual(115043, self.engine.iteration)
self.assertEqual(Money(959831.42, USD), self.engine.portfolio.account(self.venue).balance())
self.assertEqual(Money(948357.20, USD), self.engine.portfolio.account(self.venue).balance())


class BacktestAcceptanceTestsGBPUSDWithBars(unittest.TestCase):
Expand Down Expand Up @@ -191,9 +191,9 @@ def test_run_ema_cross_with_minute_bar_spec(self):
self.engine.run(strategies=[strategy])

# Assert
self.assertEqual(8352, strategy.fast_ema.count)
self.assertEqual(8353, strategy.fast_ema.count)
self.assertEqual(120467, self.engine.iteration)
self.assertEqual(Money(945548.59, GBP), self.engine.portfolio.account(self.venue).balance())
self.assertEqual(Money(947965.44, GBP), self.engine.portfolio.account(self.venue).balance())


class BacktestAcceptanceTestsAUDUSDWithTicks(unittest.TestCase):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit_tests/trading/test_trading_trader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

from nautilus_trader.analysis.performance import PerformanceAnalyzer
from nautilus_trader.backtest.config import BacktestConfig
from nautilus_trader.backtest.data import BacktestDataProducer
from nautilus_trader.backtest.data import BacktestDataContainer
from nautilus_trader.backtest.data import BacktestDataProducer
from nautilus_trader.backtest.exchange import SimulatedExchange
from nautilus_trader.backtest.execution import BacktestExecClient
from nautilus_trader.backtest.loaders import InstrumentLoader
Expand Down

0 comments on commit 4ce0d82

Please sign in to comment.