diff --git a/README.rst b/README.rst index e0612e28e..031da9e77 100644 --- a/README.rst +++ b/README.rst @@ -13,13 +13,14 @@ computational trading targeted at `hardcore Linux users `_ . we use as much bleeding edge tech as possible including (but not limited to): - latest python for glue_ -- trio_ for `structured concurrency`_ -- tractor_ for distributed, multi-core, real-time streaming -- marketstore_ for historical and real-time tick data persistence and sharing -- techtonicdb_ for L2 book storage +- trio_ & tractor_ for our distributed, multi-core, real-time streaming + `structured concurrency`_ runtime B) - Qt_ for pristine high performance UIs - pyqtgraph_ for real-time charting -- ``numpy`` and ``numba`` for `fast numerics`_ +- ``polars`` ``numpy`` and ``numba`` for `fast numerics`_ +- `apache arrow and parquet`_ for time series history management + persistence and sharing +- (prototyped) techtonicdb_ for L2 book storage .. |travis| image:: https://img.shields.io/travis/pikers/piker/master.svg :target: https://travis-ci.org/pikers/piker @@ -31,6 +32,7 @@ we use as much bleeding edge tech as possible including (but not limited to): .. _Qt: https://www.qt.io/ .. _pyqtgraph: https://github.com/pyqtgraph/pyqtgraph .. _glue: https://numpy.org/doc/stable/user/c-info.python-as-glue.html#using-python-as-glue +.. _apache arrow and parquet: https://arrow.apache.org/faq/ .. _fast numerics: https://zerowithdot.com/python-numpy-and-pandas-performance/ .. _comp_trader: https://jfaleiro.wordpress.com/2019/10/09/computational-trader/ @@ -58,23 +60,20 @@ building the best looking, most reliable, keyboard friendly trading platform is the dream; join the cause. -install -******* -``piker`` is currently under heavy pre-alpha development and as such -should be cloned from this repo and hacked on directly. +sane install with `poetry` +************************** +TODO! -for a development install:: - git clone git@github.com:pikers/piker.git - cd piker - virtualenv env - source ./env/bin/activate - pip install -r requirements.txt -e . +rigorous install on ``nixos`` using ``poetry2nix`` +************************************************** +TODO! -install for nixos -***************** -for users of `NixOS` we offer a development shell envoirment that can be -loaded with:: + +hacky install on nixos +********************** +`NixOS` is our core devs' distro of choice for which we offer +a stringently defined development shell envoirment that can be loaded with:: nix-shell develop.nix @@ -85,91 +84,19 @@ run:: once after loading the shell -install for tinas -***************** -for windows peeps you can start by installing all the prerequisite software: - -- install git with all default settings - https://git-scm.com/download/win -- install anaconda all default settings - https://www.anaconda.com/products/individual -- install microsoft build tools (check the box for Desktop development for C++, you might be able to uncheck some optional downloads) - https://visualstudio.microsoft.com/visual-cpp-build-tools/ -- install visual studio code default settings - https://code.visualstudio.com/download - - -then, `crack a conda shell`_ and run the following commands:: - - mkdir code # create code directory - cd code # change directory to code - git clone https://github.com/pikers/piker.git # downloads piker installation package from github - cd piker # change directory to piker - - conda create -n pikonda # creates conda environment named pikonda - conda activate pikonda # activates pikonda - - conda install -c conda-forge python-levenshtein # in case it is not already installed - conda install pip # may already be installed - pip # will show if pip is installed - - pip install -e . -r requirements.txt # install piker in editable mode - -test Piker to see if it is working:: - - piker -b binance chart btcusdt.binance # formatting for loading a chart - piker -b kraken -b binance chart xbtusdt.kraken - piker -b kraken -b binance -b ib chart qqq.nasdaq.ib - piker -b ib chart tsla.nasdaq.ib - -potential error:: - - FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\user\\AppData\\Roaming\\piker\\brokers.toml' - -solution: - -- navigate to file directory above (may be different on your machine, location should be listed in the error code) -- copy and paste file from 'C:\\Users\\user\\code\\data/brokers.toml' or create a blank file using notepad at the location above - -Visual Studio Code setup: - -- now that piker is installed we can set up vscode as the default terminal for running piker and editing the code -- open Visual Studio Code -- file --> Add Folder to Workspace --> C:\Users\user\code\piker (adds piker directory where all piker files are located) -- file --> Save Workspace As --> save it wherever you want and call it whatever you want, this is going to be your default workspace for running and editing piker code -- ctrl + shift + p --> start typing Python: Select Interpetter --> when the option comes up select it --> Select at the workspace level --> select the one that shows ('pikonda') -- change the default terminal to cmd.exe instead of powershell (default) -- now when you create a new terminal VScode should automatically activate you conda env so that piker can be run as the first command after a new terminal is created - -also, try out fancyzones as part of powertoyz for a decent tiling windows manager to manage all the cool new software you are going to be running. - -.. _conda installed: https:// -.. _C++ build toolz: https:// -.. _crack a conda shell: https:// -.. _vscode: https:// - -.. link to the tina guide -.. _setup a coolio tiled wm console: https:// - -provider support -**************** -for live data feeds the in-progress set of supported brokers is: - -- IB_ via ``ib_insync``, also see our `container docs`_ -- binance_ and kraken_ for crypto over their public websocket API -- questrade_ (ish) which comes with effectively free L1 - -coming soon... - -- webull_ via the reverse engineered public API -- yahoo via yliveticker_ - -if you want your broker supported and they have an API let us know. - -.. _IB: https://interactivebrokers.github.io/tws-api/index.html -.. _container docs: https://github.com/pikers/piker/tree/master/dockering/ib -.. _questrade: https://www.questrade.com/api/documentation -.. _kraken: https://www.kraken.com/features/api#public-market-data -.. _binance: https://github.com/pikers/piker/pull/182 -.. _webull: https://github.com/tedchou12/webull -.. _yliveticker: https://github.com/yahoofinancelive/yliveticker -.. _coinbase: https://docs.pro.coinbase.com/#websocket-feed + +install wild-west style via `pip` +********************************* +``piker`` is currently under heavy pre-alpha development and as such +should be cloned from this repo and hacked on directly. + +for a development install:: + + git clone git@github.com:pikers/piker.git + cd piker + virtualenv env + source ./env/bin/activate + pip install -r requirements.txt -e . check out our charts diff --git a/develop.nix b/develop.nix index 30ae2f08e..205ea1143 100644 --- a/develop.nix +++ b/develop.nix @@ -6,12 +6,22 @@ stdenv.mkDerivation { # System requirements. readline - # Python requirements (enough to get a virtualenv going). - python310Full + # TODO: hacky non-poetry install stuff we need to get rid of!! virtualenv setuptools - pyqt5 pip + + # obviously, and see below for hacked linking + pyqt5 + + # Python requirements (enough to get a virtualenv going). + python310Full + + # numerics deps + python310Packages.python-Levenshtein + python310Packages.fastparquet + python310Packages.polars + ]; src = null; shellHook = '' diff --git a/docs/tina_readme.rst b/docs/tina_readme.rst new file mode 100644 index 000000000..b9e60dcee --- /dev/null +++ b/docs/tina_readme.rst @@ -0,0 +1,91 @@ +### NOTE this is likely out of date given it was written some +(years) time ago by a user that has since not really partaken in +contributing since. + +install for tinas +***************** +for windows peeps you can start by installing all the prerequisite software: + +- install git with all default settings - https://git-scm.com/download/win +- install anaconda all default settings - https://www.anaconda.com/products/individual +- install microsoft build tools (check the box for Desktop development for C++, you might be able to uncheck some optional downloads) - https://visualstudio.microsoft.com/visual-cpp-build-tools/ +- install visual studio code default settings - https://code.visualstudio.com/download + + +then, `crack a conda shell`_ and run the following commands:: + + mkdir code # create code directory + cd code # change directory to code + git clone https://github.com/pikers/piker.git # downloads piker installation package from github + cd piker # change directory to piker + + conda create -n pikonda # creates conda environment named pikonda + conda activate pikonda # activates pikonda + + conda install -c conda-forge python-levenshtein # in case it is not already installed + conda install pip # may already be installed + pip # will show if pip is installed + + pip install -e . -r requirements.txt # install piker in editable mode + +test Piker to see if it is working:: + + piker -b binance chart btcusdt.binance # formatting for loading a chart + piker -b kraken -b binance chart xbtusdt.kraken + piker -b kraken -b binance -b ib chart qqq.nasdaq.ib + piker -b ib chart tsla.nasdaq.ib + +potential error:: + + FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\user\\AppData\\Roaming\\piker\\brokers.toml' + +solution: + +- navigate to file directory above (may be different on your machine, location should be listed in the error code) +- copy and paste file from 'C:\\Users\\user\\code\\data/brokers.toml' or create a blank file using notepad at the location above + +Visual Studio Code setup: + +- now that piker is installed we can set up vscode as the default terminal for running piker and editing the code +- open Visual Studio Code +- file --> Add Folder to Workspace --> C:\Users\user\code\piker (adds piker directory where all piker files are located) +- file --> Save Workspace As --> save it wherever you want and call it whatever you want, this is going to be your default workspace for running and editing piker code +- ctrl + shift + p --> start typing Python: Select Interpetter --> when the option comes up select it --> Select at the workspace level --> select the one that shows ('pikonda') +- change the default terminal to cmd.exe instead of powershell (default) +- now when you create a new terminal VScode should automatically activate you conda env so that piker can be run as the first command after a new terminal is created + +also, try out fancyzones as part of powertoyz for a decent tiling windows manager to manage all the cool new software you are going to be running. + +.. _conda installed: https:// +.. _C++ build toolz: https:// +.. _crack a conda shell: https:// +.. _vscode: https:// + +.. link to the tina guide +.. _setup a coolio tiled wm console: https:// + +provider support +**************** +for live data feeds the in-progress set of supported brokers is: + +- IB_ via ``ib_insync``, also see our `container docs`_ +- binance_ and kraken_ for crypto over their public websocket API +- questrade_ (ish) which comes with effectively free L1 + +coming soon... + +- webull_ via the reverse engineered public API +- yahoo via yliveticker_ + +if you want your broker supported and they have an API let us know. + +.. _IB: https://interactivebrokers.github.io/tws-api/index.html +.. _container docs: https://github.com/pikers/piker/tree/master/dockering/ib +.. _questrade: https://www.questrade.com/api/documentation +.. _kraken: https://www.kraken.com/features/api#public-market-data +.. _binance: https://github.com/pikers/piker/pull/182 +.. _webull: https://github.com/tedchou12/webull +.. _yliveticker: https://github.com/yahoofinancelive/yliveticker +.. _coinbase: https://docs.pro.coinbase.com/#websocket-feed + + diff --git a/piker/_cacheables.py b/piker/_cacheables.py index 9be4d079e..434573648 100644 --- a/piker/_cacheables.py +++ b/piker/_cacheables.py @@ -20,9 +20,6 @@ ''' from collections import OrderedDict -from contextlib import ( - asynccontextmanager as acm, -) from typing import ( Awaitable, Callable, @@ -30,12 +27,8 @@ TypeVar, ) -from tractor.trionics import maybe_open_context - -from .brokers import get_brokermod from .log import get_logger - log = get_logger(__name__) T = TypeVar("T") @@ -104,21 +97,3 @@ async def decorated( return decorated return decorator - - -# TODO: move this to `.brokers.utils`.. -@acm -async def open_cached_client( - brokername: str, -) -> 'Client': # noqa - ''' - Get a cached broker client from the current actor's local vars. - - If one has not been setup do it and cache it. - - ''' - brokermod = get_brokermod(brokername) - async with maybe_open_context( - acm_func=brokermod.get_client, - ) as (cache_hit, client): - yield client diff --git a/piker/accounting/_mktinfo.py b/piker/accounting/_mktinfo.py index 046195caf..2d2ebccdc 100644 --- a/piker/accounting/_mktinfo.py +++ b/piker/accounting/_mktinfo.py @@ -121,7 +121,7 @@ class Asset(Struct, frozen=True): # NOTE: additional info optionally packed in by the backend, but # should not be explicitly required in our generic API. - info: dict = {} # make it frozen? + info: dict | None = None # TODO? # _to_dict_skip = {'info'} diff --git a/piker/accounting/cli.py b/piker/accounting/cli.py index ee91d1b32..0b18a3eb0 100644 --- a/piker/accounting/cli.py +++ b/piker/accounting/cli.py @@ -19,8 +19,9 @@ ''' from typing import ( - Any, + AsyncContextManager, ) +from types import ModuleType from rich.console import Console from rich.markdown import Markdown @@ -33,6 +34,7 @@ open_piker_runtime, ) from ..clearing._messages import BrokerdPosition +from ..config import load_ledger from ..calc import humanize @@ -45,7 +47,11 @@ def broker_init( **start_actor_kwargs, -) -> dict: +) -> tuple[ + ModuleType, + dict, + AsyncContextManager, +]: ''' Given an input broker name, load all named arguments which can be passed to a daemon + context spawn for @@ -83,13 +89,9 @@ def broker_init( from ..brokers._daemon import _setup_persistent_brokerd return ( + brokermod, start_actor_kwargs, # to `ActorNursery.start_actor()` - _setup_persistent_brokerd, # service task ep - getattr( # trades endpoint - brokermod, - 'trades_dialogue', - None, - ), + _setup_persistent_brokerd, # deamon service task ep ) @@ -119,10 +121,11 @@ def sync( console.print(md) return - start_kwargs, _, trades_ep = broker_init( + brokermod, start_kwargs, deamon_ep = broker_init( brokername, loglevel=loglevel, ) + brokername: str = brokermod.name async def main(): @@ -136,96 +139,116 @@ async def main(): tractor.open_nursery() as an, ): - log.info( - f'Piker runtime up as {actor.uid}@{sockaddr}' - ) - - portal = await an.start_actor( - loglevel=loglevel, - debug_mode=pdb, - **start_kwargs, - ) - - if ( - brokername == 'paper' - or trades_ep is None - ): - from ..clearing import _paper_engine as paper - open_trades_endpoint = paper.open_paperboi( - fqme=None, # tell paper to not start clearing loop - broker=brokername, - loglevel=loglevel, + try: + log.info( + f'Piker runtime up as {actor.uid}@{sockaddr}' ) - else: - # open live brokerd trades endpoint - open_trades_endpoint = portal.open_context( - trades_ep, + + portal = await an.start_actor( loglevel=loglevel, + debug_mode=pdb, + **start_kwargs, ) - positions: dict[str, Any] - accounts: list[str] - async with ( - open_trades_endpoint as ( - brokerd_ctx, - (positions, accounts), - ), - ): - assert len(accounts) == 1 - summary: str = ( - '[dim underline]Piker Position Summary[/] ' - f'[dim blue underline]{brokername}[/]' - '[dim].[/]' - f'[blue underline]{account}[/]' - f'[dim underline] -> total pps: [/]' - f'[green]{len(positions)}[/]\n' + from ..clearing import ( + open_brokerd_dialog, ) - for ppdict in positions: - ppmsg = BrokerdPosition(**ppdict) - size = ppmsg.size - if size: - ppu: float = round( - ppmsg.avg_price, - ndigits=2, - ) - cost_basis: str = humanize(size * ppu) - h_size: str = humanize(size) - - if size < 0: - pcolor = 'red' - else: - pcolor = 'green' - - # sematic-highlight of fqme - fqme = ppmsg.symbol - tokens = fqme.split('.') - styled_fqme = f'[blue underline]{tokens[0]}[/]' - for tok in tokens[1:]: - styled_fqme += '[dim].[/]' - styled_fqme += f'[dim blue underline]{tok}[/]' - - # TODO: instead display in a ``rich.Table``? - summary += ( - styled_fqme + - '[dim]: [/]' - f'[{pcolor}]{h_size}[/]' - '[dim blue]u @[/]' - f'[{pcolor}]{ppu}[/]' - '[dim blue] = [/]' - f'[{pcolor}]$ {cost_basis}\n[/]' + brokerd_stream: tractor.MsgStream + + async with open_brokerd_dialog( + brokermod, + portal, + exec_mode=( + 'paper' if account == 'paper' + else 'live' + ), + loglevel=loglevel, + ) as ( + brokerd_stream, + pp_msg_table, + accounts, + ): + try: + assert len(accounts) == 1 + if not pp_msg_table: + ld, fpath = load_ledger(brokername, account) + assert not ld, f'WTF did we fail to parse ledger:\n{ld}' + + console.print( + '[yellow]' + 'No pps found for ' + f'`{brokername}.{account}` ' + 'account!\n\n' + '[/][underline]' + 'None of the following ledger files exist:\n\n[/]' + f'{fpath.as_uri()}\n' + ) + return + + pps_by_symbol: dict[str, BrokerdPosition] = pp_msg_table[ + brokername, + account, + ] + + summary: str = ( + '[dim underline]Piker Position Summary[/] ' + f'[dim blue underline]{brokername}[/]' + '[dim].[/]' + f'[blue underline]{account}[/]' + f'[dim underline] -> total pps: [/]' + f'[green]{len(pps_by_symbol)}[/]\n' ) - - console.print(summary) - - # exit via ctx cancellation. - await brokerd_ctx.cancel(timeout=1) - # TODO: once ported to newer tractor branch we should - # be able to do a loop like this: - # while brokerd_ctx.cancel_called_remote is None: - # await trio.sleep(0.01) - # await brokerd_ctx.cancel() - - await portal.cancel_actor() + # for ppdict in positions: + for fqme, ppmsg in pps_by_symbol.items(): + # ppmsg = BrokerdPosition(**ppdict) + size = ppmsg.size + if size: + ppu: float = round( + ppmsg.avg_price, + ndigits=2, + ) + cost_basis: str = humanize(size * ppu) + h_size: str = humanize(size) + + if size < 0: + pcolor = 'red' + else: + pcolor = 'green' + + # sematic-highlight of fqme + fqme = ppmsg.symbol + tokens = fqme.split('.') + styled_fqme = f'[blue underline]{tokens[0]}[/]' + for tok in tokens[1:]: + styled_fqme += '[dim].[/]' + styled_fqme += f'[dim blue underline]{tok}[/]' + + # TODO: instead display in a ``rich.Table``? + summary += ( + styled_fqme + + '[dim]: [/]' + f'[{pcolor}]{h_size}[/]' + '[dim blue]u @[/]' + f'[{pcolor}]{ppu}[/]' + '[dim blue] = [/]' + f'[{pcolor}]$ {cost_basis}\n[/]' + ) + + console.print(summary) + + finally: + # exit via ctx cancellation. + brokerd_ctx: tractor.Context = brokerd_stream._ctx + await brokerd_ctx.cancel(timeout=1) + + # TODO: once ported to newer tractor branch we should + # be able to do a loop like this: + # while brokerd_ctx.cancel_called_remote is None: + # await trio.sleep(0.01) + # await brokerd_ctx.cancel() + + finally: + await portal.cancel_actor() trio.run(main) diff --git a/piker/brokers/__init__.py b/piker/brokers/__init__.py index 93393654b..986905e2c 100644 --- a/piker/brokers/__init__.py +++ b/piker/brokers/__init__.py @@ -17,10 +17,34 @@ """ Broker clients, daemons and general back end machinery. """ +from contextlib import ( + asynccontextmanager as acm, +) from importlib import import_module from types import ModuleType -__brokers__ = [ +from tractor.trionics import maybe_open_context + +from ._util import ( + log, + BrokerError, + SymbolNotFound, + NoData, + DataUnavailable, + DataThrottle, + resproc, +) + +__all__: list[str] = [ + 'BrokerError', + 'SymbolNotFound', + 'NoData', + 'DataUnavailable', + 'DataThrottle', + 'resproc', +] + +__brokers__: list[str] = [ 'binance', 'ib', 'kraken', @@ -58,3 +82,28 @@ def iter_brokermods(): ''' for name in __brokers__: yield get_brokermod(name) + + +@acm +async def open_cached_client( + brokername: str, + **kwargs, + +) -> 'Client': # noqa + ''' + Get a cached broker client from the current actor's local vars. + + If one has not been setup do it and cache it. + + ''' + brokermod = get_brokermod(brokername) + async with maybe_open_context( + acm_func=brokermod.get_client, + kwargs=kwargs, + + ) as (cache_hit, client): + + if cache_hit: + log.info(f'Reusing existing {client}') + + yield client diff --git a/piker/brokers/_daemon.py b/piker/brokers/_daemon.py index 8a81b1d67..368e81164 100644 --- a/piker/brokers/_daemon.py +++ b/piker/brokers/_daemon.py @@ -19,9 +19,12 @@ ``brokerd``. ''' +from __future__ import annotations from contextlib import ( asynccontextmanager as acm, ) +from typing import TYPE_CHECKING +import exceptiongroup as eg import tractor import trio @@ -29,6 +32,9 @@ from . import _util from . import get_brokermod +if TYPE_CHECKING: + from ..data import _FeedsBus + # `brokerd` enabled modules # TODO: move this def to the `.data` subpkg.. # NOTE: keeping this list as small as possible is part of our caps-sec @@ -58,6 +64,10 @@ async def _setup_persistent_brokerd( the broker backend as needed. ''' + # NOTE: we only need to setup logging once (and only) here + # since all hosted daemon tasks will reference this same + # log instance's (actor local) state and thus don't require + # any further (level) configuration on their own B) log = _util.get_console_log( loglevel or tractor.current_actor().loglevel, name=f'{_util.subsys}.{brokername}', @@ -65,24 +75,40 @@ async def _setup_persistent_brokerd( # set global for this actor to this new process-wide instance B) _util.log = log - from piker.data.feed import ( - _bus, - get_feed_bus, - ) - global _bus - assert not _bus - - async with trio.open_nursery() as service_nursery: - # assign a nursery to the feeds bus for spawning - # background tasks from clients - get_feed_bus(brokername, service_nursery) - - # unblock caller - await ctx.started() - - # we pin this task to keep the feeds manager active until the - # parent actor decides to tear it down - await trio.sleep_forever() + from piker.data import feed + assert not feed._bus + + # allocate a nursery to the bus for spawning background + # tasks to service client IPC requests, normally + # `tractor.Context` connections to explicitly required + # `brokerd` endpoints such as: + # - `stream_quotes()`, + # - `manage_history()`, + # - `allocate_persistent_feed()`, + # - `open_symbol_search()` + # NOTE: see ep invocation details inside `.data.feed`. + try: + async with trio.open_nursery() as service_nursery: + bus: _FeedsBus = feed.get_feed_bus( + brokername, + service_nursery, + ) + assert bus is feed._bus + + # unblock caller + await ctx.started() + + # we pin this task to keep the feeds manager active until the + # parent actor decides to tear it down + await trio.sleep_forever() + + except eg.ExceptionGroup: + # TODO: likely some underlying `brokerd` IPC connection + # broke so here we handle a respawn and re-connect attempt! + # This likely should pair with development of the OCO task + # nusery in dev over @ `tractor` B) + # https://github.com/goodboy/tractor/pull/363 + raise async def spawn_brokerd( diff --git a/piker/brokers/binance.py b/piker/brokers/binance.py index 48b28d6f6..a8791ae9f 100644 --- a/piker/brokers/binance.py +++ b/piker/brokers/binance.py @@ -48,52 +48,47 @@ MktPair, digits_to_dec, ) -from .._cacheables import open_cached_client -from ._util import ( +from . import ( resproc, SymbolNotFound, DataUnavailable, + open_cached_client, ) from ._util import ( - log, + get_logger, get_console_log, ) -from ..data.types import Struct -from ..data.validate import FeedInit -from ..data._web_bs import ( +from piker.data.types import Struct +from piker.data.validate import FeedInit +from piker.data import def_iohlcv_fields +from piker.data._web_bs import ( open_autorecon_ws, NoBsWs, ) +log = get_logger(__name__) + + _url = 'https://api.binance.com' # Broker specific ohlc schema (rest) -_ohlc_dtype = [ - ('index', int), - ('time', int), - ('open', float), - ('high', float), - ('low', float), - ('close', float), - ('volume', float), - ('bar_wap', float), # will be zeroed by sampler if not filled - - # XXX: some additional fields are defined in the docs: - # https://binance-docs.github.io/apidocs/spot/en/#kline-candlestick-data +# XXX TODO? some additional fields are defined in the docs: +# https://binance-docs.github.io/apidocs/spot/en/#kline-candlestick-data +# _ohlc_dtype = [ # ('close_time', int), # ('quote_vol', float), # ('num_trades', int), # ('buy_base_vol', float), # ('buy_quote_vol', float), # ('ignore', float), -] +# ] # UI components allow this to be declared such that additional # (historical) fields can be exposed. -ohlc_dtype = np.dtype(_ohlc_dtype) +# ohlc_dtype = np.dtype(_ohlc_dtype) _show_wap_in_history = False @@ -330,7 +325,7 @@ async def bars( bar.typecast() row = [] - for j, (name, ftype) in enumerate(_ohlc_dtype[1:]): + for j, (name, ftype) in enumerate(def_iohlcv_fields[1:]): # TODO: maybe we should go nanoseconds on all # history time stamps? @@ -343,7 +338,10 @@ async def bars( new_bars.append((i,) + tuple(row)) - array = np.array(new_bars, dtype=_ohlc_dtype) if as_np else bars + array = np.array( + new_bars, + dtype=def_iohlcv_fields, + ) if as_np else bars return array @@ -356,7 +354,7 @@ async def get_client() -> Client: # validation type -class AggTrade(Struct): +class AggTrade(Struct, frozen=True): e: str # Event type E: int # Event time s: str # Symbol @@ -445,25 +443,30 @@ async def stream_messages( # decode/encode, see: # https://jcristharif.com/msgspec/structs.html#type-validation msg = AggTrade(**msg) - msg.typecast() yield 'trade', { 'symbol': msg.s, 'last': msg.p, 'brokerd_ts': time.time(), 'ticks': [{ 'type': 'trade', - 'price': msg.p, - 'size': msg.q, + 'price': float(msg.p), + 'size': float(msg.q), 'broker_ts': msg.T, }], } def make_sub(pairs: list[str], sub_name: str, uid: int) -> dict[str, str]: - """Create a request subscription packet dict. + ''' + Create a request subscription packet dict. - https://binance-docs.github.io/apidocs/spot/en/#live-subscribing-unsubscribing-to-streams - """ + - spot: + https://binance-docs.github.io/apidocs/spot/en/#live-subscribing-unsubscribing-to-streams + + - futes: + https://binance-docs.github.io/apidocs/futures/en/#websocket-market-streams + + ''' return { 'method': 'SUBSCRIBE', 'params': [ diff --git a/piker/brokers/cli.py b/piker/brokers/cli.py index 1bfb05d67..5ebca3e71 100644 --- a/piker/brokers/cli.py +++ b/piker/brokers/cli.py @@ -21,6 +21,7 @@ from functools import partial from operator import attrgetter from operator import itemgetter +from types import ModuleType import click import trio @@ -241,7 +242,7 @@ def quote(config, tickers): ''' # global opts - brokermod = config['brokermods'][0] + brokermod = list(config['brokermods'].values())[0] quotes = trio.run(partial(core.stocks_quote, brokermod, tickers)) if not quotes: @@ -268,7 +269,7 @@ def bars(config, symbol, count): ''' # global opts - brokermod = config['brokermods'][0] + brokermod = list(config['brokermods'].values())[0] # broker backend should return at the least a # list of candle dictionaries @@ -303,7 +304,7 @@ def record(config, rate, name, dhost, filename): ''' # global opts - brokermod = config['brokermods'][0] + brokermod = list(config['brokermods'].values())[0] loglevel = config['loglevel'] log = config['log'] @@ -368,7 +369,7 @@ def optsquote(config, symbol, date): ''' # global opts - brokermod = config['brokermods'][0] + brokermod = list(config['brokermods'].values())[0] quotes = trio.run( partial( @@ -385,26 +386,70 @@ def optsquote(config, symbol, date): @cli.command() @click.argument('tickers', nargs=-1, required=True) @click.pass_obj -def symbol_info(config, tickers): +def mkt_info( + config: dict, + tickers: list[str], +): ''' Print symbol quotes to the console ''' - # global opts - brokermod = config['brokermods'][0] + from msgspec.json import encode, decode + from ..accounting import MktPair + from ..service import ( + open_piker_runtime, + ) - quotes = trio.run(partial(core.symbol_info, brokermod, tickers)) - if not quotes: - log.error(f"No quotes could be found for {tickers}?") + # global opts + brokermods: dict[str, ModuleType] = config['brokermods'] + + mkts: list[MktPair] = [] + async def main(): + + async with open_piker_runtime( + name='mkt_info_query', + # loglevel=loglevel, + debug_mode=True, + + ) as (_, _): + for fqme in tickers: + bs_fqme, _, broker = fqme.rpartition('.') + brokermod: ModuleType = brokermods[broker] + mkt, bs_pair = await core.mkt_info( + brokermod, + bs_fqme, + ) + mkts.append((mkt, bs_pair)) + + trio.run(main) + + if not mkts: + log.error( + f'No market info could be found for {tickers}' + ) return - if len(quotes) < len(tickers): - syms = tuple(map(itemgetter('symbol'), quotes)) + if len(mkts) < len(tickers): + syms = tuple(map(itemgetter('fqme'), mkts)) for ticker in tickers: if ticker not in syms: - brokermod.log.warn(f"Could not find symbol {ticker}?") - - click.echo(colorize_json(quotes)) + log.warn(f"Could not find symbol {ticker}?") + + + # TODO: use ``rich.Table`` intead here! + for mkt, bs_pair in mkts: + click.echo( + '\n' + '----------------------------------------------------\n' + f'{type(bs_pair)}\n' + '----------------------------------------------------\n' + f'{colorize_json(bs_pair.to_dict())}\n' + '----------------------------------------------------\n' + f'as piker `MktPair` with fqme: {mkt.fqme}\n' + '----------------------------------------------------\n' + # NOTE: roundtrip to json codec for console print + f'{colorize_json(decode(encode(mkt)))}' + ) @cli.command() @@ -416,7 +461,7 @@ def search(config, pattern): ''' # global opts - brokermods = config['brokermods'] + brokermods = list(config['brokermods'].values()) # define tractor entrypoint async def main(func): diff --git a/piker/brokers/core.py b/piker/brokers/core.py index b3651c1d4..a9a83e7cd 100644 --- a/piker/brokers/core.py +++ b/piker/brokers/core.py @@ -29,7 +29,8 @@ from ._util import log from . import get_brokermod from ..service import maybe_spawn_brokerd -from .._cacheables import open_cached_client +from . import open_cached_client +from ..accounting import MktPair async def api(brokername: str, methname: str, **kwargs) -> dict: @@ -116,15 +117,19 @@ async def bars( return await client.bars(symbol, **kwargs) -async def symbol_info( +async def mkt_info( brokermod: ModuleType, - symbol: str, + fqme: str, **kwargs, -) -> Dict[str, Dict[str, Dict[str, Any]]]: - """Return symbol info from broker. - """ - async with brokermod.get_client() as client: - return await client.symbol_info(symbol, **kwargs) + +) -> MktPair: + ''' + Return MktPair info from broker including src and dst assets. + + ''' + return await brokermod.get_mkt_info( + fqme.replace(brokermod.name, '') + ) async def search_w_brokerd(name: str, pattern: str) -> dict: diff --git a/piker/brokers/deribit/__init__.py b/piker/brokers/deribit/__init__.py index f5c48b58d..4c0c18507 100644 --- a/piker/brokers/deribit/__init__.py +++ b/piker/brokers/deribit/__init__.py @@ -21,8 +21,6 @@ from piker.log import get_logger -log = get_logger(__name__) - from .api import ( get_client, ) @@ -30,13 +28,15 @@ open_history_client, open_symbol_search, stream_quotes, - backfill_bars + # backfill_bars, ) # from .broker import ( - # trades_dialogue, + # open_trade_dialog, # norm_trade_records, # ) +log = get_logger(__name__) + __all__ = [ 'get_client', # 'trades_dialogue', diff --git a/piker/brokers/deribit/api.py b/piker/brokers/deribit/api.py index 4159b18a7..93d4c498f 100644 --- a/piker/brokers/deribit/api.py +++ b/piker/brokers/deribit/api.py @@ -18,43 +18,33 @@ Deribit backend. ''' -import json -import time import asyncio - -from contextlib import asynccontextmanager as acm, AsyncExitStack -from functools import partial +from contextlib import ( + asynccontextmanager as acm, +) from datetime import datetime -from typing import Any, Optional, Iterable, Callable +from functools import partial +import time +from typing import ( + Any, + Optional, + Callable, +) import pendulum -import asks import trio -from trio_typing import Nursery, TaskStatus +from trio_typing import TaskStatus from fuzzywuzzy import process as fuzzy import numpy as np - -from piker.data.types import Struct -from piker.data._web_bs import ( - NoBsWs, - open_autorecon_ws, - open_jsonrpc_session -) - -from .._util import resproc - -from piker import config -from piker.log import get_logger - from tractor.trionics import ( broadcast_receiver, - BroadcastReceiver, maybe_open_context ) from tractor import to_asyncio - +# XXX WOOPS XD +# yeah you'll need to install it since it was removed in #489 by +# accident; well i thought we had removed all usage.. from cryptofeed import FeedHandler - from cryptofeed.defines import ( DERIBIT, L1_BOOK, TRADES, @@ -62,6 +52,17 @@ ) from cryptofeed.symbols import Symbol +from piker.data.types import Struct +from piker.data import def_iohlcv_fields +from piker.data._web_bs import ( + open_jsonrpc_session +) + + +from piker import config +from piker.log import get_logger + + log = get_logger(__name__) @@ -75,26 +76,13 @@ _testnet_ws_url = 'wss://test.deribit.com/ws/api/v2' -# Broker specific ohlc schema (rest) -_ohlc_dtype = [ - ('index', int), - ('time', int), - ('open', float), - ('high', float), - ('low', float), - ('close', float), - ('volume', float), - ('bar_wap', float), # will be zeroed by sampler if not filled -] - - class JSONRPCResult(Struct): jsonrpc: str = '2.0' id: int result: Optional[dict] = None error: Optional[dict] = None - usIn: int - usOut: int + usIn: int + usOut: int usDiff: int testnet: bool @@ -405,7 +393,7 @@ async def bars( new_bars.append((i,) + tuple(row)) - array = np.array(new_bars, dtype=_ohlc_dtype) if as_np else klines + array = np.array(new_bars, dtype=def_iohlcv_fields) if as_np else klines return array async def last_trades( diff --git a/piker/brokers/deribit/feed.py b/piker/brokers/deribit/feed.py index a94204020..ca6a3f54d 100644 --- a/piker/brokers/deribit/feed.py +++ b/piker/brokers/deribit/feed.py @@ -30,7 +30,7 @@ import numpy as np import tractor -from piker._cacheables import open_cached_client +from piker.brokers import open_cached_client from piker.log import get_logger, get_console_log from piker.data import ShmArray from piker.brokers._util import ( @@ -39,7 +39,6 @@ ) from cryptofeed import FeedHandler - from cryptofeed.defines import ( DERIBIT, L1_BOOK, TRADES, OPTION, CALL, PUT ) diff --git a/piker/brokers/ib/_util.py b/piker/brokers/ib/_util.py index 585ea18dc..f23aa99b4 100644 --- a/piker/brokers/ib/_util.py +++ b/piker/brokers/ib/_util.py @@ -29,14 +29,13 @@ import tractor -from .._util import log +from .._util import get_logger if TYPE_CHECKING: - from .api import ( - MethodProxy, - ib_Client - ) + from .api import Client + from ib_insync import IB +log = get_logger('piker.brokers.ib') _reset_tech: Literal[ 'vnc', @@ -50,7 +49,8 @@ async def data_reset_hack( - vnc_host: str, + # vnc_host: str, + client: Client, reset_type: Literal['data', 'connection'], ) -> None: @@ -81,9 +81,20 @@ async def data_reset_hack( that need to be wrangle. ''' + ib_client: IB = client.ib + + # look up any user defined vnc socket address mapped from + # a particular API socket port. + api_port: str = str(ib_client.client.port) + vnc_host: str + vnc_port: int + vnc_host, vnc_port = client.conf['vnc_addrs'].get( + api_port, + ('localhost', 3003) + ) no_setup_msg:str = ( - 'No data reset hack test setup for {vnc_host}!\n' + f'No data reset hack test setup for {vnc_host}!\n' 'See setup @\n' 'https://github.com/pikers/piker/tree/master/piker/brokers/ib' ) @@ -96,6 +107,7 @@ async def data_reset_hack( partial( vnc_click_hack, host=vnc_host, + port=vnc_port, ) ) except OSError: @@ -104,7 +116,7 @@ async def data_reset_hack( return False try: - import i3ipc + import i3ipc # noqa (since a deps dynamic check) except ModuleNotFoundError: log.warning(no_setup_msg) return False @@ -128,7 +140,8 @@ async def data_reset_hack( async def vnc_click_hack( - host: str = 'localhost', + host: str, + port: int, reset_type: str = 'data' ) -> None: ''' @@ -154,8 +167,12 @@ async def vnc_click_hack( async with asyncvnc.connect( host, - port=3003, + port=port, + + # TODO: doesn't work see: + # https://github.com/barneygale/asyncvnc/issues/7 # password='ibcansmbz', + ) as client: # move to middle of screen @@ -169,6 +186,11 @@ async def vnc_click_hack( def i3ipc_xdotool_manual_click_hack() -> None: + ''' + Do the data reset hack but expecting a local X-window using `xdotool`. + + ''' + import i3ipc i3 = i3ipc.Connection() # TODO: might be worth offering some kinda api for grabbing diff --git a/piker/brokers/ib/api.py b/piker/brokers/ib/api.py index 8636ddd26..171578aa9 100644 --- a/piker/brokers/ib/api.py +++ b/piker/brokers/ib/api.py @@ -51,9 +51,18 @@ from tractor import to_asyncio import pendulum from eventkit import Event -import ib_insync as ibis -from ib_insync.contract import ( +from ib_insync import ( + client as ib_client, + IB, Contract, + Crypto, + Commodity, + Forex, + Future, + ContFuture, + Stock, +) +from ib_insync.contract import ( ContractDetails, Option, ) @@ -70,15 +79,39 @@ Wrapper, RequestError, ) -from ib_insync.client import Client as ib_Client import numpy as np +# TODO: in hindsight, probably all imports should be +# non-relative for backends so that non-builting backends +# can be easily modelled after this style B) from piker import config from piker.brokers._util import ( log, get_logger, ) -from piker.data._source import base_ohlc_dtype + +_bar_load_dtype: list[tuple[str, type]] = [ + # NOTE XXX: only part that's diff + # from our default fields where + # time is normally an int. + # TODO: can we just cast to this + # at np.ndarray load time? + ('time', float), + + ('open', float), + ('high', float), + ('low', float), + ('close', float), + ('volume', float), + ('count', int), +] + +# Broker specific ohlc schema which includes a vwap field +_ohlc_dtype: list[tuple[str, type]] = _bar_load_dtype.copy() +_ohlc_dtype.insert( + 0, + ('index', int), +) _time_units = { @@ -136,7 +169,7 @@ def execDetails( return super().execDetails(reqId, contract, execu) -class NonShittyIB(ibis.IB): +class NonShittyIB(IB): ''' The beginning of overriding quite a few decisions in this lib. @@ -155,7 +188,7 @@ def __init__(self): # XXX: just to override this wrapper self.wrapper = NonShittyWrapper(self) - self.client = ib_Client(self.wrapper) + self.client = ib_client.Client(self.wrapper) self.client._logger = get_logger( 'ib_insync.client', ) @@ -295,7 +328,7 @@ def bars_to_np(bars: list) -> np.ndarray: nparr = np.array( np_ready, - dtype=base_ohlc_dtype, + dtype=_bar_load_dtype, ) assert nparr['time'][0] == bars[0].date.timestamp() assert nparr['time'][-1] == bars[-1].date.timestamp() @@ -351,9 +384,15 @@ class Client: def __init__( self, - ib: ibis.IB, + ib: IB, + config: dict[str, Any], ) -> None: + + # stash `brokers.toml` config on client for user settings + # as needed throughout this backend (eg. vnc sockaddr). + self.conf = config + self.ib = ib self.ib.RaiseRequestErrors = True @@ -398,7 +437,7 @@ async def bars( # optional "duration of time" equal to the # length of the returned history frame. - duration: Optional[str] = None, + duration: str | None = None, **kwargs, @@ -450,6 +489,8 @@ async def bars( # whatToShow='MIDPOINT', # whatToShow='TRADES', ) + + # tail case if no history for range or none prior. if not bars: # NOTE: there's 2 cases here to handle (and this should be # read alongside the implementation of @@ -464,6 +505,32 @@ async def bars( # rewrite the method in the first case? right now there's no # way to detect a timeout. + # NOTE XXX: ensure minimum duration in bars B) + # => we recursively call this method until we get at least + # as many bars such that they sum in aggregate to the the + # desired total time (duration) at most. + elif ( + end_dt + and ( + (len(bars) * sample_period_s) < dt_duration.in_seconds() + ) + ): + log.warning( + f'Recursing to get more bars from {end_dt} for {dt_duration}' + ) + end_dt -= dt_duration + ( + r_bars, + r_arr, + r_duration, + ) = await self.bars( + fqme, + start_dt=start_dt, + end_dt=end_dt, + ) + r_bars.extend(bars) + bars = r_bars + nparr = bars_to_np(bars) return bars, nparr, dt_duration @@ -580,7 +647,7 @@ async def search_symbols( # try get all possible contracts for symbol as per, # https://interactivebrokers.github.io/tws-api/basic_contracts.html#fut - con = ibis.Future( + con = Future( symbol=sym, exchange=exch, ) @@ -628,11 +695,11 @@ async def get_fute( # it's the "front" contract returned here if front: con = (await self.ib.qualifyContractsAsync( - ibis.ContFuture(symbol, exchange=exchange) + ContFuture(symbol, exchange=exchange) ))[0] else: con = (await self.ib.qualifyContractsAsync( - ibis.Future( + Future( symbol, exchange=exchange, lastTradeDateOrContractMonth=expiry, @@ -651,7 +718,7 @@ async def get_con( return self._cons[conid] except KeyError: con: Contract = await self.ib.qualifyContractsAsync( - ibis.Contract(conId=conid) + Contract(conId=conid) ) self._cons[conid] = con return con @@ -762,7 +829,7 @@ async def find_contracts( # if '/' in symbol: # currency = '' # symbol, currency = symbol.split('/') - con = ibis.Forex( + con = Forex( pair=''.join((symbol, currency)), currency=currency, ) @@ -771,12 +838,12 @@ async def find_contracts( # commodities elif exch == 'CMDTY': # eg. XAUUSD.CMDTY con_kwargs, bars_kwargs = _adhoc_symbol_map[symbol] - con = ibis.Commodity(**con_kwargs) + con = Commodity(**con_kwargs) con.bars_kwargs = bars_kwargs # crypto$ elif exch == 'PAXOS': # btc.paxos - con = ibis.Crypto( + con = Crypto( symbol=symbol, currency=currency, ) @@ -798,7 +865,7 @@ async def find_contracts( primaryExchange = exch exch = 'SMART' - con = ibis.Stock( + con = Stock( symbol=symbol, exchange=exch, primaryExchange=primaryExchange, @@ -896,7 +963,7 @@ async def get_quote( done, pending = await asyncio.wait( [ready], - timeout=0.1, + timeout=0.01, ) if ready in done: break @@ -1104,9 +1171,9 @@ def con2fqme( symbol = con.localSymbol.replace(' ', '') case ( - ibis.Commodity() + Commodity() # search API endpoint returns std con box.. - | ibis.Contract(secType='CMDTY') + | Contract(secType='CMDTY') ): # commodities and forex don't have an exchange name and # no real volume so we have to calculate the price @@ -1115,7 +1182,7 @@ def con2fqme( # no real volume on this tract calc_price = True - case ibis.Forex() | ibis.Contract(secType='CASH'): + case Forex() | Contract(secType='CASH'): dst, src = con.localSymbol.split('.') symbol = ''.join([dst, src]) suffix = con.exchange or 'idealpro' @@ -1192,7 +1259,7 @@ async def load_aio_clients( # the API TCP in `ib_insync` connection can be flaky af so instead # retry a few times to get the client going.. connect_retries: int = 3, - connect_timeout: float = 1, + connect_timeout: float = 10, disconnect_on_exit: bool = True, ) -> dict[str, Client]: @@ -1206,7 +1273,7 @@ async def load_aio_clients( ''' global _accounts2clients, _client_cache, _scan_ignore - conf = get_config() + conf: dict[str, Any] = get_config() ib = None client = None @@ -1257,7 +1324,7 @@ async def load_aio_clients( ): continue - ib = NonShittyIB() + ib: IB = NonShittyIB() for i in range(connect_retries): try: @@ -1272,7 +1339,7 @@ async def load_aio_clients( timeout=connect_timeout, ) # create and cache client - client = Client(ib) + client = Client(ib=ib, config=conf) # update all actor-global caches log.info(f"Caching client for {sockaddr}") @@ -1291,7 +1358,7 @@ async def load_aio_clients( ) as ce: _err = ce log.warning( - f'Failed to connect on {port} for {i} time with,\n' + f'Failed to connect on {host}:{port} for {i} time with,\n' f'{ib.client.apiError.value()}\n' 'retrying with a new client id..') @@ -1376,7 +1443,7 @@ async def open_client_proxies() -> tuple[ # TODO: maybe this should be the default in tractor? key=tractor.current_actor().uid, - ) as (cache_hit, (clients, from_aio)), + ) as (cache_hit, (clients, _)), AsyncExitStack() as stack ): @@ -1405,7 +1472,7 @@ def get_preferred_data_client( ''' conf = get_config() - data_accounts = conf['prefer_data_account'] + data_accounts: list[str] = conf['prefer_data_account'] for name in data_accounts: client = clients.get(f'ib.{name}') diff --git a/piker/brokers/ib/broker.py b/piker/brokers/ib/broker.py index 73477c0ad..d6c361334 100644 --- a/piker/brokers/ib/broker.py +++ b/piker/brokers/ib/broker.py @@ -64,7 +64,6 @@ open_pps, PpTable, ) -from .._util import get_console_log from piker.clearing._messages import ( Order, Status, @@ -217,7 +216,7 @@ async def recv_trade_updates( client.inline_errors(to_trio) # sync with trio task - to_trio.send_nowait(None) + to_trio.send_nowait(client.ib) def push_tradesies( eventkit_obj, @@ -513,8 +512,9 @@ async def open_trade_event_stream( async with tractor.to_asyncio.open_channel_from( recv_trade_updates, client=client, - ) as (first, trade_event_stream): + ) as (_, trade_event_stream): + # assert ibclient is client.ib task_status.started(trade_event_stream) await trio.sleep_forever() @@ -523,13 +523,10 @@ async def open_trade_event_stream( async def trades_dialogue( ctx: tractor.Context, - loglevel: str = None, + # loglevel: str = None, ) -> AsyncIterator[dict[str, Any]]: - # XXX: required to propagate ``tractor`` loglevel to piker logging - get_console_log(loglevel or tractor.current_actor().loglevel) - accounts_def = config.load_accounts(['ib']) global _client_cache diff --git a/piker/brokers/ib/feed.py b/piker/brokers/ib/feed.py index b4edae17f..07e9b44f2 100644 --- a/piker/brokers/ib/feed.py +++ b/piker/brokers/ib/feed.py @@ -30,13 +30,14 @@ from math import isnan import time from typing import ( + Any, Callable, - Optional, Awaitable, ) from async_generator import aclosing from fuzzywuzzy import process as fuzzy +import ib_insync as ibis import numpy as np import pendulum import tractor @@ -50,10 +51,10 @@ ) from .api import ( # _adhoc_futes_set, + Client, con2fqme, log, load_aio_clients, - ibis, MethodProxy, open_client_proxies, get_preferred_data_client, @@ -72,6 +73,7 @@ from piker.data.validate import FeedInit +# XXX NOTE: See available types table docs: # https://interactivebrokers.github.io/tws-api/tick_types.html tick_types = { 77: 'trade', @@ -91,9 +93,9 @@ # ``ib_insync`` already packs these into # quotes under the following fields. - # 55: 'trades_per_min', # `'tradeRate'` - # 56: 'vlm_per_min', # `'volumeRate'` - # 89: 'shortable', # `'shortableShares'` + 55: 'trades_per_min', # `'tradeRate'` + 56: 'vlm_per_min', # `'volumeRate'` + 89: 'shortable_units', # `'shortableShares'` } @@ -180,8 +182,8 @@ async def open_history_client( async def get_hist( timeframe: float, - end_dt: Optional[datetime] = None, - start_dt: Optional[datetime] = None, + end_dt: datetime | None = None, + start_dt: datetime | None = None, ) -> tuple[np.ndarray, str]: nonlocal max_timeout, mean, count @@ -192,6 +194,7 @@ async def get_hist( fqme, timeframe, end_dt=end_dt, + start_dt=start_dt, ) latency = time.time() - query_start if ( @@ -275,7 +278,7 @@ async def wait_on_data_reset( # ) # try to wait on the reset event(s) to arrive, a timeout # will trigger a retry up to 6 times (for now). - client = proxy._aio_ns.ib.client + client: Client = proxy._aio_ns done = trio.Event() with trio.move_on_after(timeout) as cs: @@ -284,10 +287,10 @@ async def wait_on_data_reset( log.warning( 'Sending DATA RESET request:\n' - f'{client}' + f'{client.ib.client}' ) res = await data_reset_hack( - vnc_host=client.host, + client=client, reset_type=reset_type, ) @@ -325,6 +328,7 @@ async def wait_on_data_reset( _data_resetter_task: trio.Task | None = None _failed_resets: int = 0 + async def get_bars( proxy: MethodProxy, @@ -333,6 +337,7 @@ async def get_bars( # blank to start which tells ib to look up the latest datum end_dt: str = '', + start_dt: str | None = '', # TODO: make this more dynamic based on measured frame rx latency? # how long before we trigger a feed reset (seconds) @@ -387,15 +392,31 @@ async def query(): bars, bars_array, dt_duration = out + # not enough bars signal, likely due to venue + # operational gaps. + too_little: bool = False if ( - not bars - and end_dt - ): - log.warning( - f'History is blank for {dt_duration} from {end_dt}' + end_dt + and ( + not bars + or (too_little := + start_dt + and (len(bars) * timeframe) + < dt_duration.in_seconds() + ) ) - end_dt -= dt_duration - continue + ): + if ( + end_dt + or too_little + ): + log.warning( + f'History is blank for {dt_duration} from {end_dt}' + ) + end_dt -= dt_duration + continue + + raise NoData(f'{end_dt}') if bars_array is None: raise SymbolNotFound(fqme) @@ -544,6 +565,7 @@ async def query(): await reset_done.wait() _data_resetter_task = None if unset_resetter else _data_resetter_task + assert result return result, data_cs is not None @@ -602,13 +624,12 @@ async def _setup_quote_stream( ''' global _quote_streams - to_trio.send_nowait(None) - async with load_aio_clients( disconnect_on_exit=False, ) as accts2clients: caccount_name, client = get_preferred_data_client(accts2clients) contract = contract or (await client.find_contract(symbol)) + to_trio.send_nowait(contract) # cuz why not ticker: Ticker = client.ib.reqMktData(contract, ','.join(opts)) # NOTE: it's batch-wise and slow af but I guess could @@ -700,7 +721,9 @@ async def open_aio_quote_stream( symbol=symbol, contract=contract, - ) as (first, from_aio): + ) as (contract, from_aio): + + assert contract # cache feed for later consumers _quote_streams[symbol] = from_aio @@ -783,7 +806,6 @@ async def get_mkt_info( # bs_fqme, _, broker = fqme.partition('.') proxy: MethodProxy - get_details: bool = False if proxy is not None: client_ctx = nullcontext(proxy) else: @@ -800,7 +822,6 @@ async def get_mkt_info( raise # TODO: more consistent field translation - init_info: dict = {} atype = _asset_type_map[con.secType] if atype == 'commodity': @@ -912,7 +933,8 @@ async def stream_quotes( con: Contract = details.contract first_ticker: Ticker = await proxy.get_quote(contract=con) first_quote: dict = normalize(first_ticker) - log.runtime(f'FIRST QUOTE: {first_quote}') + + log.warning(f'FIRST QUOTE: {first_quote}') # TODO: we should instead spawn a task that waits on a feed to start # and let it wait indefinitely..instead of this hard coded stuff. @@ -1027,7 +1049,6 @@ async def reset_on_feed(): async for ticker in stream: quote = normalize(ticker) fqme = quote['fqme'] - # print(f'sending {fqme}:\n{quote}') await send_chan.send({fqme: quote}) # ugh, clear ticks since we've consumed them @@ -1045,7 +1066,7 @@ async def open_symbol_search( await ctx.started({}) async with ( - open_client_proxies() as (proxies, clients), + open_client_proxies() as (proxies, _), open_data_client() as data_proxy, ): async with ctx.open_stream() as stream: diff --git a/piker/brokers/kraken/__init__.py b/piker/brokers/kraken/__init__.py index cd04c950f..0589981b7 100644 --- a/piker/brokers/kraken/__init__.py +++ b/piker/brokers/kraken/__init__.py @@ -25,11 +25,6 @@ wrapping around ``ib_insync``. ''' - -from piker.log import get_logger - -log = get_logger(__name__) - from .api import ( get_client, ) @@ -44,8 +39,10 @@ norm_trade_records, ) + __all__ = [ 'get_client', + 'get_mkt_info', 'trades_dialogue', 'open_history_client', 'open_symbol_search', diff --git a/piker/brokers/kraken/api.py b/piker/brokers/kraken/api.py index 1ebdb7597..de2be68c1 100644 --- a/piker/brokers/kraken/api.py +++ b/piker/brokers/kraken/api.py @@ -41,6 +41,7 @@ from piker import config from piker.data.types import Struct +from piker.data import def_iohlcv_fields from piker.accounting._mktinfo import ( Asset, digits_to_dec, @@ -52,29 +53,15 @@ DataThrottle, ) from piker.accounting import Transaction -from . import log +from piker.log import get_logger + +log = get_logger('piker.brokers.kraken') # // _url = 'https://api.kraken.com/0' - - -# Broker specific ohlc schema which includes a vwap field -_ohlc_dtype = [ - ('index', int), - ('time', int), - ('open', float), - ('high', float), - ('low', float), - ('close', float), - ('volume', float), - ('count', int), - ('bar_wap', float), -] - -# UI components allow this to be declared such that additional -# (historical) fields can be exposed. -ohlc_dtype = np.dtype(_ohlc_dtype) - +# TODO: this is the only backend providing this right? +# in which case we should drop it from the defaults and +# instead make a custom fields descr in this module! _show_wap_in_history = True _symbol_info_translation: dict[str, str] = { 'tick_decimals': 'pair_decimals', @@ -622,11 +609,11 @@ async def bars( new_bars.append( (i,) + tuple( ftype(bar[j]) for j, (name, ftype) in enumerate( - _ohlc_dtype[1:] + def_iohlcv_fields[1:] ) ) ) - array = np.array(new_bars, dtype=_ohlc_dtype) if as_np else bars + array = np.array(new_bars, dtype=def_iohlcv_fields) if as_np else bars return array except KeyError: errmsg = json['error'][0] diff --git a/piker/brokers/kraken/broker.py b/piker/brokers/kraken/broker.py index 28f5d026c..fc2eff62d 100644 --- a/piker/brokers/kraken/broker.py +++ b/piker/brokers/kraken/broker.py @@ -63,8 +63,8 @@ BrokerdPosition, BrokerdStatus, ) -from . import log from .api import ( + log, Client, BrokerError, get_client, @@ -422,7 +422,6 @@ def trades2pps( @tractor.context async def trades_dialogue( ctx: tractor.Context, - loglevel: str = None, ) -> AsyncIterator[dict[str, Any]]: diff --git a/piker/brokers/kraken/feed.py b/piker/brokers/kraken/feed.py index 526590fe0..dc70672fc 100644 --- a/piker/brokers/kraken/feed.py +++ b/piker/brokers/kraken/feed.py @@ -42,8 +42,10 @@ Asset, MktPair, ) -from piker._cacheables import ( +from piker.brokers import ( open_cached_client, +) +from piker._cacheables import ( async_lifo_cache, ) from piker.brokers._util import ( @@ -54,8 +56,8 @@ from piker.data.types import Struct from piker.data.validate import FeedInit from piker.data._web_bs import open_autorecon_ws, NoBsWs -from . import log from .api import ( + log, Client, Pair, ) diff --git a/piker/brokers/kucoin.py b/piker/brokers/kucoin.py index 8cf06300e..b2953467b 100755 --- a/piker/brokers/kucoin.py +++ b/piker/brokers/kucoin.py @@ -55,14 +55,17 @@ digits_to_dec, MktPair, ) -from piker.data.validate import FeedInit from piker import config -from piker._cacheables import ( +from piker.brokers import ( open_cached_client, +) +from piker._cacheables import ( async_lifo_cache, ) from piker.log import get_logger +from piker.data.validate import FeedInit from piker.data.types import Struct +from piker.data import def_iohlcv_fields from piker.data._web_bs import ( open_autorecon_ws, NoBsWs, @@ -71,17 +74,6 @@ log = get_logger(__name__) -_ohlc_dtype = [ - ('index', int), - ('time', int), - ('open', float), - ('high', float), - ('low', float), - ('close', float), - ('volume', float), - ('bar_wap', float), # will be zeroed by sampler if not filled -] - class KucoinMktPair(Struct, frozen=True): ''' @@ -465,18 +457,27 @@ async def _get_bars( ('low', float), ('close', float), ('volume', float), - ('bar_wap', float), # will be zeroed by sampler if not filled ] ''' # Generate generic end and start time if values not passed # Currently gives us 12hrs of data - if end_dt is None: + if ( + end_dt is None + and start_dt is None + ): end_dt = pendulum.now('UTC').add(minutes=1) + start_dt = end_dt.start_of('minute').subtract(minutes=limit) + + if ( + start_dt + and end_dt is None + ): + # just set end to limit's worth in future + end_dt = start_dt.start_of('minute').add(minutes=limit) - if start_dt is None: - start_dt = end_dt.start_of( - 'minute').subtract(minutes=limit) + else: + start_dt = end_dt.start_of('minute').subtract(minutes=limit) start_dt = int(start_dt.timestamp()) end_dt = int(end_dt.timestamp()) @@ -530,12 +531,12 @@ async def _get_bars( # volume float(bar[5]), # bar_wap - 0.0, + # 0.0, ) ) array = np.array( - new_bars, dtype=_ohlc_dtype) if as_np else bars + new_bars, dtype=def_iohlcv_fields) if as_np else bars return array diff --git a/piker/brokers/questrade.py b/piker/brokers/questrade.py index 1d447b23b..31133f232 100644 --- a/piker/brokers/questrade.py +++ b/piker/brokers/questrade.py @@ -40,7 +40,8 @@ import asks from ..calc import humanize, percent_change -from .._cacheables import open_cached_client, async_lifo_cache +from . import open_cached_client +from piker._cacheables import async_lifo_cache from .. import config from ._util import resproc, BrokerError, SymbolNotFound from ..log import ( diff --git a/piker/clearing/__init__.py b/piker/clearing/__init__.py index b2cc5fa7e..ec796ac9d 100644 --- a/piker/clearing/__init__.py +++ b/piker/clearing/__init__.py @@ -23,11 +23,15 @@ open_ems, OrderClient, ) +from ._ems import ( + open_brokerd_dialog, +) __all__ = [ 'open_ems', 'OrderClient', + 'open_brokerd_dialog', ] diff --git a/piker/clearing/_client.py b/piker/clearing/_client.py index 65a21fef7..436b4f8e1 100644 --- a/piker/clearing/_client.py +++ b/piker/clearing/_client.py @@ -37,7 +37,6 @@ Cancel, BrokerdPosition, ) -from ..brokers import get_brokermod if TYPE_CHECKING: from ._messages import ( @@ -132,6 +131,8 @@ def _mk_cancel_msg( f'Maybe there is a stale entry or line?\n' f'You should report this as a bug!' ) + return + fqme = str(cmd.symbol) return Cancel( oid=uuid, @@ -244,13 +245,6 @@ async def open_ems( loglevel=loglevel, ) as portal: - mod = get_brokermod(broker) - if ( - not getattr(mod, 'trades_dialogue', None) - or mode == 'paper' - ): - mode = 'paper' - from ._ems import _emsd_main async with ( # connect to emsd diff --git a/piker/clearing/_ems.py b/piker/clearing/_ems.py index e41ddbf17..1bb57ae79 100644 --- a/piker/clearing/_ems.py +++ b/piker/clearing/_ems.py @@ -24,6 +24,7 @@ # ChainMap, ) from contextlib import asynccontextmanager as acm +from decimal import Decimal from math import isnan from pprint import pformat import time @@ -34,6 +35,7 @@ Callable, Hashable, Optional, + TYPE_CHECKING, ) from bidict import bidict @@ -48,16 +50,10 @@ from ..data._normalize import iterticks from ..accounting._mktinfo import ( unpack_fqme, - float_digits, -) -from ..data.feed import ( - Feed, - Flume, - maybe_open_feed, + dec_digits, ) from ..ui._notify import notify_from_ems_status_msg from ..data.types import Struct -from . import _paper_engine as paper from ._messages import ( Order, Status, @@ -70,6 +66,12 @@ BrokerdPosition, ) +if TYPE_CHECKING: + from ..data.feed import ( + Feed, + Flume, + ) + # TODO: numba all of this def mk_check( @@ -129,11 +131,16 @@ class DarkBook(Struct): triggers: dict[ str, # symbol dict[ - str, # uuid + str, # uuid for triggerable execution tuple[ Callable[[float], bool], # predicate - str, # name - dict, # cmd / msg type + tuple[str, ...], # tickfilter + dict | Order, # cmd / msg type + + # live submission constraint parameters + float, # percent_away max price diff + float, # abs_diff_away max price diff + int, # min_tick_digits to round the clearable price ] ] ] = {} @@ -176,7 +183,8 @@ async def clear_dark_triggers( async for quotes in quote_stream: # start = time.time() for sym, quote in quotes.items(): - execs = book.triggers.get(sym, {}) + # TODO: make this a msg-compat struct + execs: tuple = book.triggers.get(sym, {}) for tick in iterticks( quote, # dark order price filter(s) @@ -199,7 +207,8 @@ async def clear_dark_triggers( # TODO: send this msg instead? cmd, percent_away, - abs_diff_away + abs_diff_away, + price_tick_digits, ) in ( tuple(execs.items()) ): @@ -232,8 +241,11 @@ async def clear_dark_triggers( size=size, ): bfqme: str = symbol.replace(f'.{broker}', '') - submit_price = price + abs_diff_away - resp = 'triggered' # hidden on client-side + submit_price: float = round( + price + abs_diff_away, + ndigits=price_tick_digits, + ) + resp: str = 'triggered' # hidden on client-side log.info( f'Dark order triggered for price {price}\n' @@ -263,11 +275,11 @@ async def clear_dark_triggers( ) # remove exec-condition from set - log.info(f'removing pred for {oid}') - pred = execs.pop(oid, None) - if not pred: + log.info(f'Removing trigger for {oid}') + trigger: tuple | None = execs.pop(oid, None) + if not trigger: log.warning( - f'pred for {oid} was already removed!?' + f'trigger for {oid} was already removed!?' ) # update actives @@ -307,15 +319,177 @@ class TradesRelay(Struct): # map of symbols to dicts of accounts to pp msgs positions: dict[ - # brokername, acctid + # brokername, acctid -> tuple[str, str], - list[BrokerdPosition], + # fqme -> msg + dict[str, BrokerdPosition], ] # allowed account names accounts: tuple[str] +@acm +async def open_brokerd_dialog( + brokermod: ModuleType, + portal: tractor.Portal, + exec_mode: str, + fqme: str | None = None, + loglevel: str | None = None, + +) -> tuple[ + tractor.MsgStream, + # {(brokername, accountname) -> {fqme -> msg}} + dict[(str, str), dict[str, BrokerdPosition]], + list[str], +]: + ''' + Open either a live trades control dialog or a dialog with a new + paper engine instance depending on live trading support for the + broker backend, configuration, or client code usage. + + ''' + broker: str = brokermod.name + + def mk_paper_ep(): + from . import _paper_engine as paper_mod + + nonlocal brokermod, exec_mode + + # for logging purposes + brokermod = paper_mod + + # for paper mode we need to mock this trades response feed + # so we load bidir stream to a new sub-actor running + # a paper-simulator clearing engine. + + # load the paper trading engine + log.info(f'{broker}: Entering `paper` trading mode') + + # load the paper trading engine as a subactor of this emsd + # actor to simulate the real IPC load it'll have when also + # pulling data from feeds + if not fqme: + log.warning( + f'Paper engine activate for {broker} but no fqme provided?' + ) + + return paper_mod.open_paperboi( + fqme=fqme, + broker=broker, + loglevel=loglevel, + ) + + # take the first supported ep we detect + # on the backend mod. + trades_endpoint: Callable + for ep_name in [ + 'open_trade_dialog', # probably final name? + 'trades_dialogue', # legacy + ]: + trades_endpoint = getattr( + brokermod, + ep_name, + None, + ) + if trades_endpoint: + break + else: + log.warning( + f'No live trading EP found: {brokermod.name}?' + ) + exec_mode: str = 'paper' + + if ( + trades_endpoint is not None + or exec_mode != 'paper' + ): + # open live brokerd trades endpoint + open_trades_endpoint = portal.open_context( + trades_endpoint, + ) + + @acm + async def maybe_open_paper_ep(): + if exec_mode == 'paper': + async with mk_paper_ep() as msg: + yield msg + return + + # open trades-dialog endpoint with backend broker + async with open_trades_endpoint as msg: + ctx, first = msg + + # runtime indication that the backend can't support live + # order ctrl yet, so boot the paperboi B0 + if first == 'paper': + async with mk_paper_ep() as msg: + yield msg + return + else: + # working live ep case B) + yield msg + return + + pps_by_broker_account: dict[(str, str), BrokerdPosition] = {} + + async with ( + maybe_open_paper_ep() as ( + brokerd_ctx, + (position_msgs, accounts), + ), + brokerd_ctx.open_stream() as brokerd_trades_stream, + ): + # XXX: really we only want one stream per `emsd` + # actor to relay global `brokerd` order events + # unless we're going to expect each backend to + # relay only orders affiliated with a particular + # ``trades_dialogue()`` session (seems annoying + # for implementers). So, here we cache the relay + # task and instead of running multiple tasks + # (which will result in multiples of the same + # msg being relayed for each EMS client) we just + # register each client stream to this single + # relay loop in the dialog table. + + # begin processing order events from the target + # brokerd backend by receiving order submission + # response messages, normalizing them to EMS + # messages and relaying back to the piker order + # client set. + + # locally cache and track positions per account with + # a nested table of msgs: + # tuple(brokername, acctid) -> + # (fqme: str -> + # `BrokerdPosition`) + for msg in position_msgs: + + msg = BrokerdPosition(**msg) + log.info( + f'loading pp for {brokermod.__name__}:\n' + f'{pformat(msg.to_dict())}', + ) + + # TODO: state any mismatch here? + account: str = msg.account + assert account in accounts + + pps_by_broker_account.setdefault( + (broker, account), + {}, + )[msg.symbol] = msg + + # should be unique entries, verdad! + assert len(set(accounts)) == len(accounts) + + yield ( + brokerd_trades_stream, + pps_by_broker_account, + accounts, + ) + + class Router(Struct): ''' Order router which manages and tracks per-broker dark book, @@ -347,6 +521,7 @@ class Router(Struct): ] = defaultdict(set) # TODO: mapping of ems dialog ids to msg flow history + # - use the new ._util.OrderDialogs? # msgflows: defaultdict[ # str, # ChainMap[dict[str, dict]], @@ -407,118 +582,25 @@ async def maybe_open_brokerd_dialog( yield relay return - def mk_paper_ep(): - nonlocal brokermod, exec_mode - - # for logging purposes - brokermod = paper - - # for paper mode we need to mock this trades response feed - # so we load bidir stream to a new sub-actor running - # a paper-simulator clearing engine. - - # load the paper trading engine - exec_mode = 'paper' - log.info(f'{broker}: Entering `paper` trading mode') - - # load the paper trading engine as a subactor of this emsd - # actor to simulate the real IPC load it'll have when also - # pulling data from feeds - return paper.open_paperboi( - fqme=fqme, - loglevel=loglevel, - ) - - trades_endpoint = getattr(brokermod, 'trades_dialogue', None) - if ( - trades_endpoint is not None - or exec_mode != 'paper' - ): - # open live brokerd trades endpoint - open_trades_endpoint = portal.open_context( - trades_endpoint, - loglevel=loglevel, - ) - - else: - exec_mode: str = 'paper' - - @acm - async def maybe_open_paper_ep(): - if exec_mode == 'paper': - async with mk_paper_ep() as msg: - yield msg - return - - # open trades-dialog endpoint with backend broker - async with open_trades_endpoint as msg: - ctx, first = msg - - # runtime indication that the backend can't support live - # order ctrl yet, so boot the paperboi B0 - if first == 'paper': - async with mk_paper_ep() as msg: - yield msg - return - else: - # working live ep case B) - yield msg - return - - positions: list[BrokerdPosition] - accounts: tuple[str] - async with ( - maybe_open_paper_ep() as ( - brokerd_ctx, - (positions, accounts), - ), - brokerd_ctx.open_stream() as brokerd_trades_stream, + async with open_brokerd_dialog( + brokermod=brokermod, + portal=portal, + exec_mode=exec_mode, + fqme=fqme, + loglevel=loglevel, + + ) as ( + brokerd_stream, + pp_msg_table, + accounts, ): - # XXX: really we only want one stream per `emsd` - # actor to relay global `brokerd` order events - # unless we're going to expect each backend to - # relay only orders affiliated with a particular - # ``trades_dialogue()`` session (seems annoying - # for implementers). So, here we cache the relay - # task and instead of running multiple tasks - # (which will result in multiples of the same - # msg being relayed for each EMS client) we just - # register each client stream to this single - # relay loop in the dialog table. - - # begin processing order events from the target - # brokerd backend by receiving order submission - # response messages, normalizing them to EMS - # messages and relaying back to the piker order - # client set. - - # locally cache and track positions per account with - # a nested table of msgs: - # tuple(brokername, acctid) -> - # (fqme: str -> - # `BrokerdPosition`) + # create a new relay and sync it's state according + # to brokerd-backend reported position msgs. relay = TradesRelay( - brokerd_stream=brokerd_trades_stream, - positions={}, - accounts=accounts, + brokerd_stream=brokerd_stream, + positions=pp_msg_table, + accounts=tuple(accounts), ) - for msg in positions: - - msg = BrokerdPosition(**msg) - log.info( - f'loading pp for {brokermod.__name__}:\n' - f'{pformat(msg.to_dict())}', - ) - - # TODO: state any mismatch here? - account = msg.account - assert account in accounts - - relay.positions.setdefault( - (broker, account), - {}, - )[msg.symbol] = msg - self.relays[broker] = relay # this context should block here indefinitely until @@ -550,12 +632,17 @@ async def open_trade_relays( indefinitely. ''' + from ..data.feed import maybe_open_feed + async with ( maybe_open_feed( [fqme], loglevel=loglevel, ) as feed, ): + # extract expanded fqme in case input was of a less + # qualified form, eg. xbteur.kraken -> xbteur.spot.kraken + fqme: str = list(feed.flumes.keys())[0] brokername, _, _, _ = unpack_fqme(fqme) brokermod = feed.mods[brokername] broker = brokermod.name @@ -590,7 +677,7 @@ async def open_trade_relays( client_ready = trio.Event() task_status.started( - (relay, feed, client_ready) + (fqme, relay, feed, client_ready) ) # sync to the client side by waiting for the stream @@ -1141,14 +1228,15 @@ async def process_client_order_cmds( and status.resp == 'dark_open' ): # remove from dark book clearing - entry = dark_book.triggers[fqme].pop(oid, None) + entry: tuple | None = dark_book.triggers[fqme].pop(oid, None) if entry: ( pred, tickfilter, cmd, percent_away, - abs_diff_away + abs_diff_away, + min_tick_digits, ) = entry # tell client side that we've cancelled the @@ -1283,33 +1371,36 @@ async def process_client_order_cmds( # TODO: make this configurable from our top level # config, prolly in a .clearing` section? spread_slap: float = 5 - min_tick = float(flume.mkt.size_tick) - min_tick_digits = float_digits(min_tick) + min_tick = Decimal(flume.mkt.price_tick) + min_tick_digits: int = dec_digits(min_tick) + + tickfilter: tuple[str, ...] + percent_away: float if action == 'buy': tickfilter = ('ask', 'last', 'trade') - percent_away = 0.005 + percent_away: float = 0.005 # TODO: we probably need to scale this based # on some near term historical spread # measure? - abs_diff_away = round( + abs_diff_away = float(round( spread_slap * min_tick, ndigits=min_tick_digits, - ) + )) elif action == 'sell': tickfilter = ('bid', 'last', 'trade') - percent_away = -0.005 - abs_diff_away = round( + percent_away: float = -0.005 + abs_diff_away: float = float(round( -spread_slap * min_tick, ndigits=min_tick_digits, - ) + )) else: # alert tickfilter = ('trade', 'utrade', 'last') - percent_away = 0 - abs_diff_away = 0 + percent_away: float = 0 + abs_diff_away: float = 0 # submit execution/order to EMS scan loop # NOTE: this may result in an override of an existing @@ -1321,7 +1412,8 @@ async def process_client_order_cmds( tickfilter, req, percent_away, - abs_diff_away + abs_diff_away, + min_tick_digits, ) resp = 'dark_open' @@ -1378,13 +1470,13 @@ async def cached_mngr( loglevel: str = 'info', ): - relay, feed, client_ready = await _router.nursery.start( + fqme, relay, feed, client_ready = await _router.nursery.start( _router.open_trade_relays, fqme, exec_mode, loglevel, ) - yield relay, feed, client_ready + yield fqme, relay, feed, client_ready async with tractor.trionics.maybe_open_context( acm_func=cached_mngr, @@ -1397,13 +1489,13 @@ async def cached_mngr( key=cache_on_fqme_unless_paper, ) as ( cache_hit, - (relay, feed, client_ready) + (fqme, relay, feed, client_ready) ): if cache_hit: log.info(f'Reusing existing trades relay for {fqme}:\n' f'{relay}\n') - yield relay, feed, client_ready + yield fqme, relay, feed, client_ready @tractor.context @@ -1486,7 +1578,7 @@ async def _emsd_main( fqme, exec_mode, loglevel, - ) as (relay, feed, client_ready): + ) as (fqme, relay, feed, client_ready): brokerd_stream = relay.brokerd_stream dark_book = _router.get_dark_book(broker) diff --git a/piker/clearing/_messages.py b/piker/clearing/_messages.py index 126cd3476..219749716 100644 --- a/piker/clearing/_messages.py +++ b/piker/clearing/_messages.py @@ -23,7 +23,6 @@ # deque, # ) from typing import ( - Optional, Literal, ) @@ -140,7 +139,7 @@ class Status(Struct): # this maps normally to the ``BrokerdOrder.reqid`` below, an id # normally allocated internally by the backend broker routing system - reqid: Optional[int | str] = None + reqid: int | str | None = None # the (last) source order/request msg if provided # (eg. the Order/Cancel which causes this msg) and @@ -153,7 +152,7 @@ class Status(Struct): # event that wasn't originated by piker's emsd (eg. some external # trading system which does it's own order control but that you # might want to "track" using piker UIs/systems). - src: Optional[str] = None + src: str | None = None # set when a cancel request msg was set for this order flow dialog # but the brokerd dialog isn't yet in a cancelled state. @@ -181,7 +180,7 @@ class BrokerdCancel(Struct): # for setting a unique order id then this value will be relayed back # on the emsd order request stream as the ``BrokerdOrderAck.reqid`` # field - reqid: Optional[int | str] = None + reqid: int | str | None = None action: str = 'cancel' @@ -205,7 +204,7 @@ class BrokerdOrder(Struct): # for setting a unique order id then this value will be relayed back # on the emsd order request stream as the ``BrokerdOrderAck.reqid`` # field - reqid: Optional[int | str] = None + reqid: int | str | None = None # --------------- @@ -233,17 +232,18 @@ class BrokerdOrderAck(Struct): class BrokerdStatus(Struct): - reqid: int | str time_ns: int + reqid: int | str status: Literal[ 'open', 'canceled', - 'fill', 'pending', 'error', + 'closed', ] - account: str + # TODO: do we need this? + account: str | None = None, name: str = 'status' filled: float = 0.0 reason: str = '' @@ -259,24 +259,24 @@ class BrokerdStatus(Struct): class BrokerdFill(Struct): ''' - A single message indicating a "fill-details" event from the broker - if avaiable. + A single message indicating a "fill-details" event from the + broker if avaiable. ''' # brokerd timestamp required for order mode arrow placement on x-axis # TODO: maybe int if we force ns? # we need to normalize this somehow since backends will use their # own format and likely across many disparate epoch clocks... + time_ns: int broker_time: float reqid: int | str - time_ns: int # order exeuction related size: float price: float name: str = 'fill' - action: Optional[str] = None + action: str | None = None broker_details: dict = {} # meta-data (eg. commisions etc.) @@ -288,12 +288,14 @@ class BrokerdError(Struct): ''' oid: str - symbol: str reason: str + # TODO: drop this right? + symbol: str | None = None + # if no brokerd order request was actually submitted (eg. we errored # at the ``pikerd`` layer) then there will be ``reqid`` allocated. - reqid: Optional[int | str] = None + reqid: int | str | None = None name: str = 'error' broker_details: dict = {} diff --git a/piker/clearing/_paper_engine.py b/piker/clearing/_paper_engine.py index 44171dbc0..34e7ec58e 100644 --- a/piker/clearing/_paper_engine.py +++ b/piker/clearing/_paper_engine.py @@ -124,7 +124,7 @@ async def submit_limit( # for dark orders since we want the dark_executed # to trigger first thus creating a lookup entry # in the broker trades event processing loop - await trio.sleep(0.05) + await trio.sleep(0.01) if ( action == 'sell' @@ -191,7 +191,7 @@ async def submit_cancel( self._sells[symbol].pop(oid, None) # TODO: net latency model - await trio.sleep(0.05) + await trio.sleep(0.01) msg = BrokerdStatus( status='canceled', @@ -224,7 +224,7 @@ async def fake_fill( ''' # TODO: net latency model - await trio.sleep(0.05) + await trio.sleep(0.01) fill_time_ns = time.time_ns() fill_time_s = time.time() @@ -527,7 +527,7 @@ async def handle_order_requests( @tractor.context -async def trades_dialogue( +async def open_trade_dialog( ctx: tractor.Context, broker: str, @@ -574,7 +574,7 @@ async def trades_dialogue( if fqme: bs_fqme, _, broker = fqme.rpartition('.') mkt, _ = await brokermod.get_mkt_info(bs_fqme) - mkt_by_fqme[fqme] = mkt + mkt_by_fqme[mkt.fqme] = mkt # for each sym in the ledger load it's `MktPair` info for tid, txdict in ledger.data.items(): @@ -695,21 +695,21 @@ async def open_paperboi( async with ( tractor.find_actor(service_name) as portal, - tractor.open_nursery() as tn, + tractor.open_nursery() as an, ): # NOTE: only spawn if no paperboi already is up since we likely # don't need more then one actor for simulated order clearing # per broker-backend. if portal is None: log.info('Starting new paper-engine actor') - portal = await tn.start_actor( + portal = await an.start_actor( service_name, enable_modules=[__name__] ) we_spawned = True async with portal.open_context( - trades_dialogue, + open_trade_dialog, broker=broker, fqme=fqme, loglevel=loglevel, diff --git a/piker/cli/__init__.py b/piker/cli/__init__.py index 706101357..a812555e7 100644 --- a/piker/cli/__init__.py +++ b/piker/cli/__init__.py @@ -20,6 +20,7 @@ ''' import os from contextlib import AsyncExitStack +from types import ModuleType import click import trio @@ -100,7 +101,6 @@ async def main(): registry_addr=reg_addr, ) as service_mngr, # normally delivers a ``Services`` handle - trio.open_nursery() as n, AsyncExitStack() as stack, ): @@ -154,6 +154,8 @@ def cli( assert os.path.isdir(configdir), f"`{configdir}` is not a valid path" config._override_config_dir(configdir) + # TODO: for typer see + # https://typer.tiangolo.com/tutorial/commands/context/ ctx.ensure_object(dict) if not brokers: @@ -161,7 +163,9 @@ def cli( from piker.brokers import __brokers__ brokers = __brokers__ - brokermods = [get_brokermod(broker) for broker in brokers] + brokermods: dict[str, ModuleType] = { + broker: get_brokermod(broker) for broker in brokers + } assert brokermods reg_addr: None | tuple[str, int] = None @@ -227,12 +231,15 @@ async def list_services(): def _load_clis() -> None: from ..service import marketstore # noqa - from ..service import elastic - from ..data import cli # noqa + from ..service import elastic # noqa from ..brokers import cli # noqa from ..ui import cli # noqa from ..watchlists import cli # noqa + # typer implemented + from ..storage import cli # noqa + from ..accounting import cli # noqa + # load downstream cli modules _load_clis() diff --git a/piker/config.py b/piker/config.py index e2c63ea49..0220f3e6d 100644 --- a/piker/config.py +++ b/piker/config.py @@ -187,6 +187,15 @@ def _conf_fn_w_ext( return f'{name}.toml' +def get_conf_dir() -> Path: + ''' + Return the user configuration directory ``Path`` + on the local filesystem. + + ''' + return _config_dir + + def get_conf_path( conf_name: str = 'brokers', @@ -300,9 +309,9 @@ def load_account( ) -> tuple[dict, Path]: ''' Load a accounting (with positions) file from - $PIKER_CONFIG_DIR/accounting/account...toml. + $CONFIG_DIR/accounting/account...toml - Where normally $PIKER_CONFIG_DIR = ~/.config/piker/ + Where normally $CONFIG_DIR = ~/.config/piker/ and we implicitly create a accounting subdir which should normally be linked to a git repo managed by the user B) @@ -357,7 +366,13 @@ def load_ledger( acctid: str, ) -> tuple[dict, Path]: + ''' + Load a ledger (TOML) file from user's config directory: + $CONFIG_DIR/accounting/ledgers/trades__.toml + Return its `dict`-content and file path. + + ''' ldir: Path = _config_dir / 'accounting' / 'ledgers' if not ldir.is_dir(): ldir.mkdir() diff --git a/piker/data/__init__.py b/piker/data/__init__.py index ba6af4caf..cd0a11833 100644 --- a/piker/data/__init__.py +++ b/piker/data/__init__.py @@ -22,12 +22,6 @@ sharing live streams over a network. """ -import tractor -import trio - -from ._util import ( - get_console_log, -) from ._normalize import iterticks from ._sharedmem import ( maybe_open_shm_array, @@ -36,6 +30,10 @@ get_shm_token, ShmArray, ) +from ._source import ( + def_iohlcv_fields, + def_ohlcv_fields, +) from .feed import ( open_feed, ) @@ -49,4 +47,6 @@ 'attach_shm_array', 'open_shm_array', 'get_shm_token', + 'def_iohlcv_fields', + 'def_ohlcv_fields', ] diff --git a/piker/data/_formatters.py b/piker/data/_formatters.py index ef892c7a6..4fbe3151c 100644 --- a/piker/data/_formatters.py +++ b/piker/data/_formatters.py @@ -222,6 +222,7 @@ def format_to_1d( profiler: Profiler, slice_to_inview: bool = True, + force_full_realloc: bool = False, ) -> tuple[ np.ndarray, @@ -248,7 +249,10 @@ def format_to_1d( # we first need to allocate xy data arrays # from the source data. - if self.y_nd is None: + if ( + self.y_nd is None + or force_full_realloc + ): self.xy_nd_start = shm._first.value self.xy_nd_stop = shm._last.value self.x_nd, self.y_nd = self.allocate_xy_nd( diff --git a/piker/data/_pathops.py b/piker/data/_pathops.py index 48a11f402..a17f289a4 100644 --- a/piker/data/_pathops.py +++ b/piker/data/_pathops.py @@ -1,5 +1,5 @@ # piker: trading gear for hackers -# Copyright (C) 2018-present Tyler Goodlet (in stewardship of piker0) +# Copyright (C) 2018-present Tyler Goodlet (in stewardship of pikers) # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -289,158 +289,3 @@ def ohlc_flatten( num=len(flat), ) return x, flat - - -def slice_from_time( - arr: np.ndarray, - start_t: float, - stop_t: float, - step: float, # sampler period step-diff - -) -> slice: - ''' - Calculate array indices mapped from a time range and return them in - a slice. - - Given an input array with an epoch `'time'` series entry, calculate - the indices which span the time range and return in a slice. Presume - each `'time'` step increment is uniform and when the time stamp - series contains gaps (the uniform presumption is untrue) use - ``np.searchsorted()`` binary search to look up the appropriate - index. - - ''' - profiler = Profiler( - msg='slice_from_time()', - disabled=not pg_profile_enabled(), - ms_threshold=ms_slower_then, - ) - - times = arr['time'] - t_first = floor(times[0]) - t_last = ceil(times[-1]) - - # the greatest index we can return which slices to the - # end of the input array. - read_i_max = arr.shape[0] - - # compute (presumed) uniform-time-step index offsets - i_start_t = floor(start_t) - read_i_start = floor(((i_start_t - t_first) // step)) - 1 - - i_stop_t = ceil(stop_t) - - # XXX: edge case -> always set stop index to last in array whenever - # the input stop time is detected to be greater then the equiv time - # stamp at that last entry. - if i_stop_t >= t_last: - read_i_stop = read_i_max - else: - read_i_stop = ceil((i_stop_t - t_first) // step) + 1 - - # always clip outputs to array support - # for read start: - # - never allow a start < the 0 index - # - never allow an end index > the read array len - read_i_start = min( - max(0, read_i_start), - read_i_max - 1, - ) - read_i_stop = max( - 0, - min(read_i_stop, read_i_max), - ) - - # check for larger-then-latest calculated index for given start - # time, in which case we do a binary search for the correct index. - # NOTE: this is usually the result of a time series with time gaps - # where it is expected that each index step maps to a uniform step - # in the time stamp series. - t_iv_start = times[read_i_start] - if ( - t_iv_start > i_start_t - ): - # do a binary search for the best index mapping to ``start_t`` - # given we measured an overshoot using the uniform-time-step - # calculation from above. - - # TODO: once we start caching these per source-array, - # we can just overwrite ``read_i_start`` directly. - new_read_i_start = np.searchsorted( - times, - i_start_t, - side='left', - ) - - # TODO: minimize binary search work as much as possible: - # - cache these remap values which compensate for gaps in the - # uniform time step basis where we calc a later start - # index for the given input ``start_t``. - # - can we shorten the input search sequence by heuristic? - # up_to_arith_start = index[:read_i_start] - - if ( - new_read_i_start <= read_i_start - ): - # t_diff = t_iv_start - start_t - # print( - # f"WE'RE CUTTING OUT TIME - STEP:{step}\n" - # f'start_t:{start_t} -> 0index start_t:{t_iv_start}\n' - # f'diff: {t_diff}\n' - # f'REMAPPED START i: {read_i_start} -> {new_read_i_start}\n' - # ) - read_i_start = new_read_i_start - - t_iv_stop = times[read_i_stop - 1] - if ( - t_iv_stop > i_stop_t - ): - # t_diff = stop_t - t_iv_stop - # print( - # f"WE'RE CUTTING OUT TIME - STEP:{step}\n" - # f'calced iv stop:{t_iv_stop} -> stop_t:{stop_t}\n' - # f'diff: {t_diff}\n' - # # f'SHOULD REMAP STOP: {read_i_start} -> {new_read_i_start}\n' - # ) - new_read_i_stop = np.searchsorted( - times[read_i_start:], - # times, - i_stop_t, - side='right', - ) - - if ( - new_read_i_stop <= read_i_stop - ): - read_i_stop = read_i_start + new_read_i_stop + 1 - - # sanity checks for range size - # samples = (i_stop_t - i_start_t) // step - # index_diff = read_i_stop - read_i_start + 1 - # if index_diff > (samples + 3): - # breakpoint() - - # read-relative indexes: gives a slice where `shm.array[read_slc]` - # will be the data spanning the input time range `start_t` -> - # `stop_t` - read_slc = slice( - int(read_i_start), - int(read_i_stop), - ) - - profiler( - 'slicing complete' - # f'{start_t} -> {abs_slc.start} | {read_slc.start}\n' - # f'{stop_t} -> {abs_slc.stop} | {read_slc.stop}\n' - ) - - # NOTE: if caller needs absolute buffer indices they can - # slice the buffer abs index like so: - # index = arr['index'] - # abs_indx = index[read_slc] - # abs_slc = slice( - # int(abs_indx[0]), - # int(abs_indx[-1]), - # ) - - return read_slc diff --git a/piker/data/_sampling.py b/piker/data/_sampling.py index 20bf9b493..641edf539 100644 --- a/piker/data/_sampling.py +++ b/piker/data/_sampling.py @@ -230,6 +230,7 @@ async def broadcast( self, period_s: float, time_stamp: float | None = None, + info: dict | None = None, ) -> None: ''' @@ -258,10 +259,14 @@ async def broadcast( try: for stream in (subs - sent): try: - await stream.send({ + msg = { 'index': time_stamp or last_ts, 'period': period_s, - }) + } + if info: + msg.update(info) + + await stream.send(msg) sent.add(stream) except ( @@ -287,9 +292,19 @@ async def broadcast( ) @classmethod - async def broadcast_all(self) -> None: - for period_s in self.subscribers: - await self.broadcast(period_s) + async def broadcast_all( + self, + info: dict | None = None, + ) -> None: + + # NOTE: take a copy of subs since removals can happen + # during the broadcast checkpoint which can cause + # a `RuntimeError` on interation of the underlying `dict`. + for period_s in list(self.subscribers): + await self.broadcast( + period_s, + info=info, + ) @tractor.context @@ -359,14 +374,21 @@ async def register_with_sampler( # except broadcast requests from the subscriber async for msg in stream: - if msg == 'broadcast_all': - await Sampler.broadcast_all() + if 'broadcast_all' in msg: + await Sampler.broadcast_all( + info=msg['broadcast_all'], + ) finally: if ( sub_for_broadcasts and subs ): - subs.remove(stream) + try: + subs.remove(stream) + except KeyError: + log.warning( + f'{stream._ctx.chan.uid} sub already removed!?' + ) else: # if no shms are passed in we just wait until cancelled # by caller. @@ -463,6 +485,8 @@ async def open_sample_stream( cache_key: str | None = None, allow_new_sampler: bool = True, + ensure_is_active: bool = False, + ) -> AsyncIterator[dict[str, float]]: ''' Subscribe to OHLC sampling "step" events: when the time aggregation @@ -505,11 +529,20 @@ async def open_sample_stream( }, ) as (ctx, first) ): - async with ( - ctx.open_stream() as istream, + if ensure_is_active: + assert len(first) > 1 - # TODO: we don't need this task-bcasting right? - # istream.subscribe() as istream, + async with ( + ctx.open_stream( + allow_overruns=True, + ) as istream, + + # TODO: we DO need this task-bcasting so that + # for eg. the history chart update loop eventually + # receceives all backfilling event msgs such that + # the underlying graphics format arrays are + # re-allocated until all history is loaded! + istream.subscribe() as istream, ): yield istream @@ -591,14 +624,14 @@ async def sample_and_broadcast( 'high', 'low', 'close', - 'bar_wap', # can be optionally provided + # 'bar_wap', # can be optionally provided 'volume', ]][-1] = ( o, max(high, last), min(low, last), last, - quote.get('bar_wap', 0), + # quote.get('bar_wap', 0), volume, ) @@ -707,13 +740,21 @@ async def sample_and_broadcast( ) -# a working tick-type-classes template +# tick-type-classes template for all possible "lowest level" events +# that can can be emitted by the "top of book" L1 queues and +# price-matching (with eventual clearing) in a double auction +# market (queuing) system. _tick_groups = { 'clears': {'trade', 'dark_trade', 'last'}, 'bids': {'bid', 'bsize'}, 'asks': {'ask', 'asize'}, } +# XXX alo define the flattened set of all such "fundamental ticks" +# so that it can be used as filter, eg. in the graphics display +# loop to compute running windowed y-ranges B) +_auction_ticks: set[str] = set.union(*_tick_groups.values()) + def frame_ticks( first_quote: dict, diff --git a/piker/data/_sharedmem.py b/piker/data/_sharedmem.py index 2ed1c8922..78f66f634 100644 --- a/piker/data/_sharedmem.py +++ b/piker/data/_sharedmem.py @@ -33,19 +33,10 @@ import tractor from ._util import log -from ._source import base_iohlc_dtype +from ._source import def_iohlcv_fields from .types import Struct -# how much is probably dependent on lifestyle -_secs_in_day = int(60 * 60 * 24) -# we try for a buncha times, but only on a run-every-other-day kinda week. -_days_worth = 16 -_default_size = _days_worth * _secs_in_day -# where to start the new data append index -_rt_buffer_start = int((_days_worth - 1) * _secs_in_day) - - def cuckoff_mantracker(): ''' Disable all ``multiprocessing``` "resource tracking" machinery since @@ -70,7 +61,6 @@ def ensure_running(self): mantracker._resource_tracker = ManTracker() mantracker.register = mantracker._resource_tracker.register mantracker.ensure_running = mantracker._resource_tracker.ensure_running - # ensure_running = mantracker._resource_tracker.ensure_running mantracker.unregister = mantracker._resource_tracker.unregister mantracker.getfd = mantracker._resource_tracker.getfd @@ -168,7 +158,7 @@ def _make_token( to access a shared array. ''' - dtype = base_iohlc_dtype if dtype is None else dtype + dtype = def_iohlcv_fields if dtype is None else dtype return _Token( shm_name=key, shm_first_index_name=key + "_first", @@ -258,7 +248,6 @@ def array(self) -> np.ndarray: # to load an empty array.. if len(a) == 0 and self._post_init: raise RuntimeError('Empty array race condition hit!?') - # breakpoint() return a @@ -323,7 +312,7 @@ def push( field_map: Optional[dict[str, str]] = None, prepend: bool = False, update_first: bool = True, - start: Optional[int] = None, + start: int | None = None, ) -> int: ''' @@ -365,7 +354,11 @@ def push( # tries to access ``.array`` (which due to the index # overlap will be empty). Pretty sure we've fixed it now # but leaving this here as a reminder. - if prepend and update_first and length: + if ( + prepend + and update_first + and length + ): assert index < self._first.value if ( @@ -439,10 +432,10 @@ def flush(self) -> None: def open_shm_array( - - key: Optional[str] = None, - size: int = _default_size, # see above - dtype: Optional[np.dtype] = None, + size: int, + key: str | None = None, + dtype: np.dtype | None = None, + append_start_index: int | None = None, readonly: bool = False, ) -> ShmArray: @@ -507,10 +500,13 @@ def open_shm_array( # ``ShmArray._start.value: int = 0`` and the yet-to-be written # real-time section will start at ``ShmArray.index: int``. - # this sets the index to 3/4 of the length of the buffer - # leaving a "days worth of second samples" for the real-time - # section. - last.value = first.value = _rt_buffer_start + # this sets the index to nearly 2/3rds into the the length of + # the buffer leaving at least a "days worth of second samples" + # for the real-time section. + if append_start_index is None: + append_start_index = round(size * 0.616) + + last.value = first.value = append_start_index shmarr = ShmArray( array, @@ -524,7 +520,6 @@ def open_shm_array( # "unlink" created shm on process teardown by # pushing teardown calls onto actor context stack - stack = tractor.current_actor().lifetime_stack stack.callback(shmarr.close) stack.callback(shmarr.destroy) @@ -619,7 +614,10 @@ def attach_shm_array( def maybe_open_shm_array( key: str, - dtype: Optional[np.dtype] = None, + size: int, + dtype: np.dtype | None = None, + append_start_index: int | None = None, + readonly: bool = False, **kwargs, ) -> tuple[ShmArray, bool]: @@ -640,11 +638,16 @@ def maybe_open_shm_array( use ``attach_shm_array``. ''' - size = kwargs.pop('size', _default_size) try: # see if we already know this key token = _known_tokens[key] - return attach_shm_array(token=token, **kwargs), False + return ( + attach_shm_array( + token=token, + readonly=readonly, + ), + False, + ) except KeyError: log.debug(f"Could not find {key} in shms cache") if dtype: @@ -663,8 +666,16 @@ def maybe_open_shm_array( # Attempt to open a block and expect # to fail if a block has been allocated # on the OS by someone else. - return open_shm_array(key=key, dtype=dtype, **kwargs), True - + return ( + open_shm_array( + key=key, + size=size, + dtype=dtype, + append_start_index=append_start_index, + readonly=readonly, + ), + True, + ) def try_read( array: np.ndarray diff --git a/piker/data/_source.py b/piker/data/_source.py index d1d8be023..fc22d6f4e 100644 --- a/piker/data/_source.py +++ b/piker/data/_source.py @@ -23,26 +23,42 @@ import numpy as np -ohlc_fields = [ - ('time', float), +def_iohlcv_fields: list[tuple[str, type]] = [ + + # YES WE KNOW, this isn't needed in polars but we use it for doing + # ring-buffer like pre/append ops our our `ShmArray` real-time + # numpy-array buffering system such that there is a master index + # that can be used for index-arithmetic when write data to the + # "middle" of the array. See the ``tractor.ipc.shm`` pkg for more + # details. + ('index', int), + + # presume int for epoch stamps since it's most common + # and makes the most sense to avoid float rounding issues. + # TODO: if we want higher reso we should use the new + # ``time.time_ns()`` in python 3.10+ + ('time', int), ('open', float), ('high', float), ('low', float), ('close', float), ('volume', float), - ('bar_wap', float), -] -ohlc_with_index = ohlc_fields.copy() -ohlc_with_index.insert(0, ('index', int)) + # TODO: can we elim this from default field set to save on mem? + # i think only kraken really uses this in terms of what we get from + # their ohlc history API? + # ('bar_wap', float), # shouldn't be default right? +] -# our minimum structured array layout for ohlc data -base_iohlc_dtype = np.dtype(ohlc_with_index) -base_ohlc_dtype = np.dtype(ohlc_fields) +# remove index field +def_ohlcv_fields: list[tuple[str, type]] = def_iohlcv_fields.copy() +def_ohlcv_fields.pop(0) +assert (len(def_iohlcv_fields) - len(def_ohlcv_fields)) == 1 # TODO: for now need to construct this manually for readonly arrays, see # https://github.com/numba/numba/issues/4511 # from numba import from_dtype +# base_ohlc_dtype = np.dtype(def_ohlc_fields) # numba_ohlc_dtype = from_dtype(base_ohlc_dtype) # map time frame "keys" to seconds values diff --git a/piker/data/_timeseries.py b/piker/data/_timeseries.py new file mode 100644 index 000000000..f43e0c73a --- /dev/null +++ b/piker/data/_timeseries.py @@ -0,0 +1,326 @@ +# piker: trading gear for hackers +# Copyright (C) 2018-present Tyler Goodlet (in stewardship of pikers) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +Financial time series processing utilities usually +pertaining to OHLCV style sampled data. + +Routines are generally implemented in either ``numpy`` or ``polars`` B) + +''' +from __future__ import annotations +from typing import Literal +from math import ( + ceil, + floor, +) + +import numpy as np +import polars as pl + +from ._sharedmem import ShmArray +from .._profile import ( + Profiler, + pg_profile_enabled, + ms_slower_then, +) + + +def slice_from_time( + arr: np.ndarray, + start_t: float, + stop_t: float, + step: float, # sampler period step-diff + +) -> slice: + ''' + Calculate array indices mapped from a time range and return them in + a slice. + + Given an input array with an epoch `'time'` series entry, calculate + the indices which span the time range and return in a slice. Presume + each `'time'` step increment is uniform and when the time stamp + series contains gaps (the uniform presumption is untrue) use + ``np.searchsorted()`` binary search to look up the appropriate + index. + + ''' + profiler = Profiler( + msg='slice_from_time()', + disabled=not pg_profile_enabled(), + ms_threshold=ms_slower_then, + ) + + times = arr['time'] + t_first = floor(times[0]) + t_last = ceil(times[-1]) + + # the greatest index we can return which slices to the + # end of the input array. + read_i_max = arr.shape[0] + + # compute (presumed) uniform-time-step index offsets + i_start_t = floor(start_t) + read_i_start = floor(((i_start_t - t_first) // step)) - 1 + + i_stop_t = ceil(stop_t) + + # XXX: edge case -> always set stop index to last in array whenever + # the input stop time is detected to be greater then the equiv time + # stamp at that last entry. + if i_stop_t >= t_last: + read_i_stop = read_i_max + else: + read_i_stop = ceil((i_stop_t - t_first) // step) + 1 + + # always clip outputs to array support + # for read start: + # - never allow a start < the 0 index + # - never allow an end index > the read array len + read_i_start = min( + max(0, read_i_start), + read_i_max - 1, + ) + read_i_stop = max( + 0, + min(read_i_stop, read_i_max), + ) + + # check for larger-then-latest calculated index for given start + # time, in which case we do a binary search for the correct index. + # NOTE: this is usually the result of a time series with time gaps + # where it is expected that each index step maps to a uniform step + # in the time stamp series. + t_iv_start = times[read_i_start] + if ( + t_iv_start > i_start_t + ): + # do a binary search for the best index mapping to ``start_t`` + # given we measured an overshoot using the uniform-time-step + # calculation from above. + + # TODO: once we start caching these per source-array, + # we can just overwrite ``read_i_start`` directly. + new_read_i_start = np.searchsorted( + times, + i_start_t, + side='left', + ) + + # TODO: minimize binary search work as much as possible: + # - cache these remap values which compensate for gaps in the + # uniform time step basis where we calc a later start + # index for the given input ``start_t``. + # - can we shorten the input search sequence by heuristic? + # up_to_arith_start = index[:read_i_start] + + if ( + new_read_i_start <= read_i_start + ): + # t_diff = t_iv_start - start_t + # print( + # f"WE'RE CUTTING OUT TIME - STEP:{step}\n" + # f'start_t:{start_t} -> 0index start_t:{t_iv_start}\n' + # f'diff: {t_diff}\n' + # f'REMAPPED START i: {read_i_start} -> {new_read_i_start}\n' + # ) + read_i_start = new_read_i_start + + t_iv_stop = times[read_i_stop - 1] + if ( + t_iv_stop > i_stop_t + ): + # t_diff = stop_t - t_iv_stop + # print( + # f"WE'RE CUTTING OUT TIME - STEP:{step}\n" + # f'calced iv stop:{t_iv_stop} -> stop_t:{stop_t}\n' + # f'diff: {t_diff}\n' + # # f'SHOULD REMAP STOP: {read_i_start} -> {new_read_i_start}\n' + # ) + new_read_i_stop = np.searchsorted( + times[read_i_start:], + # times, + i_stop_t, + side='right', + ) + + if ( + new_read_i_stop <= read_i_stop + ): + read_i_stop = read_i_start + new_read_i_stop + 1 + + # sanity checks for range size + # samples = (i_stop_t - i_start_t) // step + # index_diff = read_i_stop - read_i_start + 1 + # if index_diff > (samples + 3): + # breakpoint() + + # read-relative indexes: gives a slice where `shm.array[read_slc]` + # will be the data spanning the input time range `start_t` -> + # `stop_t` + read_slc = slice( + int(read_i_start), + int(read_i_stop), + ) + + profiler( + 'slicing complete' + # f'{start_t} -> {abs_slc.start} | {read_slc.start}\n' + # f'{stop_t} -> {abs_slc.stop} | {read_slc.stop}\n' + ) + + # NOTE: if caller needs absolute buffer indices they can + # slice the buffer abs index like so: + # index = arr['index'] + # abs_indx = index[read_slc] + # abs_slc = slice( + # int(abs_indx[0]), + # int(abs_indx[-1]), + # ) + + return read_slc + + +def detect_null_time_gap( + shm: ShmArray, + imargin: int = 1, + +) -> tuple[float, float] | None: + ''' + Detect if there are any zero-epoch stamped rows in + the presumed 'time' field-column. + + Filter to the gap and return a surrounding index range. + + NOTE: for now presumes only ONE gap XD + + ''' + zero_pred: np.ndarray = shm.array['time'] == 0 + zero_t: np.ndarray = shm.array[zero_pred] + if zero_t.size: + istart, iend = zero_t['index'][[0, -1]] + start, end = shm._array['time'][ + [istart - imargin, iend + imargin] + ] + return ( + istart - imargin, + start, + end, + iend + imargin, + ) + + return None + + +t_unit: Literal[ + 'days', + 'hours', + 'minutes', + 'seconds', + 'miliseconds', + 'microseconds', + 'nanoseconds', +] + + +def with_dts( + df: pl.DataFrame, + time_col: str = 'time', +) -> pl.DataFrame: + ''' + Insert datetime (casted) columns to a (presumably) OHLC sampled + time series with an epoch-time column keyed by ``time_col``. + + ''' + return df.with_columns([ + pl.col(time_col).shift(1).suffix('_prev'), + pl.col(time_col).diff().alias('s_diff'), + pl.from_epoch(pl.col(time_col)).alias('dt'), + ]).with_columns([ + pl.from_epoch(pl.col(f'{time_col}_prev')).alias('dt_prev'), + pl.col('dt').diff().alias('dt_diff'), + ]) #.with_columns( + # pl.col('dt').diff().dt.days().alias('days_dt_diff'), + # ) + + +def detect_time_gaps( + df: pl.DataFrame, + + time_col: str = 'time', + # epoch sampling step diff + expect_period: float = 60, + + # datetime diff unit and gap value + # crypto mkts + # gap_dt_unit: t_unit = 'minutes', + # gap_thresh: int = 1, + + # legacy stock mkts + gap_dt_unit: t_unit = 'days', + gap_thresh: int = 2, + +) -> pl.DataFrame: + ''' + Filter to OHLC datums which contain sample step gaps. + + For eg. legacy markets which have venue close gaps and/or + actual missing data segments. + + ''' + dt_gap_col: str = f'{gap_dt_unit}_diff' + return with_dts( + df + ).filter( + pl.col('s_diff').abs() > expect_period + ).with_columns( + getattr( + pl.col('dt_diff').dt, + gap_dt_unit, # NOTE: must be valid ``Expr.dt.`` + )().alias(dt_gap_col) + ).filter( + pl.col(dt_gap_col).abs() > gap_thresh + ) + + +def detect_price_gaps( + df: pl.DataFrame, + gt_multiplier: float = 2., + price_fields: list[str] = ['high', 'low'], + +) -> pl.DataFrame: + ''' + Detect gaps in clearing price over an OHLC series. + + 2 types of gaps generally exist; up gaps and down gaps: + + - UP gap: when any next sample's lo price is strictly greater + then the current sample's hi price. + + - DOWN gap: when any next sample's hi price is strictly + less then the current samples lo price. + + ''' + # return df.filter( + # pl.col('high') - ) > expect_period, + # ).select([ + # pl.dt.datetime(pl.col(time_col).shift(1)).suffix('_previous'), + # pl.all(), + # ]).select([ + # pl.all(), + # (pl.col(time_col) - pl.col(f'{time_col}_previous')).alias('diff'), + # ]) + ... diff --git a/piker/data/cli.py b/piker/data/cli.py deleted file mode 100644 index 59db1037e..000000000 --- a/piker/data/cli.py +++ /dev/null @@ -1,255 +0,0 @@ -# piker: trading gear for hackers -# Copyright (C) 2018-present Tyler Goodlet (in stewardship of piker0) - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -""" -marketstore cli. - -""" -import trio -import tractor -import click - -from ..service.marketstore import ( - # get_client, - # stream_quotes, - ingest_quote_stream, - # _url, - # _tick_tbk_ids, - # mk_tbk, -) -from ..cli import cli -from .. import watchlists as wl -from ._util import ( - log, -) - - -@cli.command() -@click.option( - '--url', - default='ws://localhost:5993/ws', - help='HTTP URL of marketstore instance' -) -@click.argument('names', nargs=-1) -@click.pass_obj -def ms_stream( - config: dict, - names: list[str], - url: str, -) -> None: - ''' - Connect to a marketstore time bucket stream for (a set of) symbols(s) - and print to console. - - ''' - async def main(): - # async for quote in stream_quotes(symbols=names): - # log.info(f"Received quote:\n{quote}") - ... - - trio.run(main) - - -# @cli.command() -# @click.option( -# '--url', -# default=_url, -# help='HTTP URL of marketstore instance' -# ) -# @click.argument('names', nargs=-1) -# @click.pass_obj -# def ms_destroy(config: dict, names: list[str], url: str) -> None: -# """Destroy symbol entries in the local marketstore instance. -# """ -# async def main(): -# nonlocal names -# async with get_client(url) as client: -# -# if not names: -# names = await client.list_symbols() -# -# # default is to wipe db entirely. -# answer = input( -# "This will entirely wipe you local marketstore db @ " -# f"{url} of the following symbols:\n {pformat(names)}" -# "\n\nDelete [N/y]?\n") -# -# if answer == 'y': -# for sym in names: -# # tbk = _tick_tbk.format(sym) -# tbk = tuple(sym, *_tick_tbk_ids) -# print(f"Destroying {tbk}..") -# await client.destroy(mk_tbk(tbk)) -# else: -# print("Nothing deleted.") -# -# tractor.run(main) - - -@cli.command() -@click.option( - '--tsdb_host', - default='localhost' -) -@click.option( - '--tsdb_port', - default=5993 -) -@click.argument('symbols', nargs=-1) -@click.pass_obj -def storesh( - config, - tl, - host, - port, - symbols: list[str], -): - ''' - Start an IPython shell ready to query the local marketstore db. - - ''' - from piker.data.marketstore import open_tsdb_client - from piker.service import open_piker_runtime - - async def main(): - nonlocal symbols - - async with open_piker_runtime( - 'storesh', - enable_modules=['piker.service._ahab'], - ): - symbol = symbols[0] - - async with open_tsdb_client(symbol): - # TODO: ask if user wants to write history for detected - # available shm buffers? - from tractor.trionics import ipython_embed - await ipython_embed() - - trio.run(main) - - -@cli.command() -@click.option( - '--host', - default='localhost' -) -@click.option( - '--port', - default=5993 -) -@click.option( - '--delete', - '-d', - is_flag=True, - help='Delete history (1 Min) for symbol(s)', -) -@click.argument('symbols', nargs=-1) -@click.pass_obj -def storage( - config, - host, - port, - symbols: list[str], - delete: bool, - -): - ''' - Start an IPython shell ready to query the local marketstore db. - - ''' - from piker.service.marketstore import open_tsdb_client - from piker.service import open_piker_runtime - - async def main(): - nonlocal symbols - - async with open_piker_runtime( - 'tsdb_storage', - enable_modules=['piker.service._ahab'], - ): - symbol = symbols[0] - async with open_tsdb_client(symbol) as storage: - if delete: - for fqme in symbols: - syms = await storage.client.list_symbols() - - resp60s = await storage.delete_ts(fqme, 60) - - msgish = resp60s.ListFields()[0][1] - if 'error' in str(msgish): - - # TODO: MEGA LOL, apparently the symbols don't - # flush out until you refresh something or other - # (maybe the WALFILE)... #lelandorlulzone, classic - # alpaca(Rtm) design here .. - # well, if we ever can make this work we - # probably want to dogsplain the real reason - # for the delete errurz..llululu - if fqme not in syms: - log.error(f'Pair {fqme} dne in DB') - - log.error(f'Deletion error: {fqme}\n{msgish}') - - resp1s = await storage.delete_ts(fqme, 1) - msgish = resp1s.ListFields()[0][1] - if 'error' in str(msgish): - log.error(f'Deletion error: {fqme}\n{msgish}') - - trio.run(main) - - -@cli.command() -@click.option('--test-file', '-t', help='Test quote stream file') -@click.option('--tl', is_flag=True, help='Enable tractor logging') -@click.argument('name', nargs=1, required=True) -@click.pass_obj -def ingest(config, name, test_file, tl): - ''' - Ingest real-time broker quotes and ticks to a marketstore instance. - - ''' - # global opts - loglevel = config['loglevel'] - tractorloglevel = config['tractorloglevel'] - # log = config['log'] - - watchlist_from_file = wl.ensure_watchlists(config['wl_path']) - watchlists = wl.merge_watchlist(watchlist_from_file, wl._builtins) - symbols = watchlists[name] - - grouped_syms = {} - for sym in symbols: - symbol, _, provider = sym.rpartition('.') - if provider not in grouped_syms: - grouped_syms[provider] = [] - - grouped_syms[provider].append(symbol) - - async def entry_point(): - async with tractor.open_nursery() as n: - for provider, symbols in grouped_syms.items(): - await n.run_in_actor( - ingest_quote_stream, - name='ingest_marketstore', - symbols=symbols, - brokername=provider, - tries=1, - actorloglevel=loglevel, - loglevel=tractorloglevel - ) - - tractor.run(entry_point) diff --git a/piker/data/feed.py b/piker/data/feed.py index 1714cf193..ea7f360b9 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -39,7 +39,6 @@ Optional, Awaitable, Sequence, - Union, ) import trio @@ -145,8 +144,7 @@ def get_subs( key: str, ) -> set[ tuple[ - Union[tractor.MsgStream, trio.MemorySendChannel], - # tractor.Context, + tractor.MsgStream | trio.MemorySendChannel, float | None, # tick throttle in Hz ] ]: @@ -161,7 +159,6 @@ def add_subs( key: str, subs: set[tuple[ tractor.MsgStream | trio.MemorySendChannel, - # tractor.Context, float | None, # tick throttle in Hz ]], ) -> set[tuple]: @@ -169,7 +166,7 @@ def add_subs( Add a ``set`` of consumer subscription entries for the given key. ''' - _subs = self._subscribers[key] + _subs: set[tuple] = self._subscribers[key] _subs.update(subs) return _subs @@ -183,7 +180,7 @@ def remove_subs( Remove a ``set`` of consumer subscription entries for key. ''' - _subs = self.get_subs(key) + _subs: set[tuple] = self.get_subs(key) _subs.difference_update(subs) return _subs @@ -193,7 +190,7 @@ def remove_subs( def get_feed_bus( brokername: str, - nursery: Optional[trio.Nursery] = None, + nursery: trio.Nursery | None = None, ) -> _FeedsBus: ''' @@ -226,6 +223,7 @@ async def allocate_persistent_feed( loglevel: str, start_stream: bool = True, + init_timeout: float = 616, task_status: TaskStatus[FeedInit] = trio.TASK_STATUS_IGNORED, @@ -267,22 +265,23 @@ async def allocate_persistent_feed( # TODO: probably make a struct msg type for this as well # since eventually we do want to have more efficient IPC.. first_quote: dict[str, Any] - ( - init_msgs, - first_quote, - ) = await bus.nursery.start( - partial( - mod.stream_quotes, - send_chan=send, - feed_is_live=feed_is_live, - - # NOTE / TODO: eventualy we may support providing more then - # one input here such that a datad daemon can multiplex - # multiple live feeds from one task, instead of getting - # a new request (and thus new task) for each subscription. - symbols=[symstr], + with trio.fail_after(init_timeout): + ( + init_msgs, + first_quote, + ) = await bus.nursery.start( + partial( + mod.stream_quotes, + send_chan=send, + feed_is_live=feed_is_live, + + # NOTE / TODO: eventualy we may support providing more then + # one input here such that a datad daemon can multiplex + # multiple live feeds from one task, instead of getting + # a new request (and thus new task) for each subscription. + symbols=[symstr], + ) ) - ) # TODO: this is indexed by symbol for now since we've planned (for # some time) to expect backends to handle single @@ -340,7 +339,7 @@ async def allocate_persistent_feed( # yield back control to starting nursery once we receive either # some history or a real-time quote. - log.info(f'waiting on history to load: {fqme}') + log.info(f'loading OHLCV history: {fqme}') await some_data_ready.wait() flume = Flume( @@ -370,7 +369,8 @@ async def allocate_persistent_feed( mkt.bs_fqme: flume, }) - # signal the ``open_feed_bus()`` caller task to continue + # signal the ``open_feed_bus()`` caller task to continue since + # we now have (some) history pushed to the shm buffer. task_status.started(init) if not start_stream: @@ -718,7 +718,7 @@ async def install_brokerd_search( async with portal.open_context( brokermod.open_symbol_search - ) as (ctx, cache): + ) as (ctx, _): # shield here since we expect the search rpc to be # cancellable by the user as they see fit. @@ -907,6 +907,7 @@ async def open_feed( for fqme, flume_msg in flumes_msg_dict.items(): flume = Flume.from_msg(flume_msg) + # assert flume.mkt.fqme == fqme feed.flumes[fqme] = flume diff --git a/piker/data/history.py b/piker/data/history.py index ebfe8c65d..99fd425df 100644 --- a/piker/data/history.py +++ b/piker/data/history.py @@ -19,41 +19,38 @@ ''' from __future__ import annotations -from collections import ( - Counter, -) +# from collections import ( +# Counter, +# ) from datetime import datetime from functools import partial -import time +# import time from types import ModuleType from typing import ( Callable, - Optional, TYPE_CHECKING, ) import trio from trio_typing import TaskStatus import tractor -import pendulum +from pendulum import ( + Duration, + from_timestamp, +) import numpy as np -from .. import config from ..accounting import ( MktPair, ) from ._util import ( log, ) -from ..service import ( - check_for_service, -) from ._sharedmem import ( maybe_open_shm_array, ShmArray, - _secs_in_day, ) -from ._source import base_iohlc_dtype +from ._source import def_iohlcv_fields from ._sampling import ( open_sample_stream, ) @@ -62,177 +59,180 @@ ) if TYPE_CHECKING: - from ..service.marketstore import Storage + from bidict import bidict + from ..service.marketstore import StorageClient from .feed import _FeedsBus +# `ShmArray` buffer sizing configuration: +_mins_in_day = int(60 * 24) +# how much is probably dependent on lifestyle +# but we reco a buncha times (but only on a +# run-every-other-day kinda week). +_secs_in_day = int(60 * _mins_in_day) +_days_in_week: int = 7 + +_days_worth: int = 3 +_default_hist_size: int = 6 * 365 * _mins_in_day +_hist_buffer_start = int( + _default_hist_size - round(7 * _mins_in_day) +) + +_default_rt_size: int = _days_worth * _secs_in_day +# NOTE: start the append index in rt buffer such that 1 day's worth +# can be appenened before overrun. +_rt_buffer_start = int((_days_worth - 1) * _secs_in_day) + + def diff_history( array: np.ndarray, - timeframe: int, - start_dt: datetime, - end_dt: datetime, - last_tsdb_dt: datetime | None = None + append_until_dt: datetime | None = None, + prepend_until_dt: datetime | None = None, ) -> np.ndarray: # no diffing with tsdb dt index possible.. - if last_tsdb_dt is None: + if ( + prepend_until_dt is None + and append_until_dt is None + ): return array - time = array['time'] - return array[time > last_tsdb_dt.timestamp()] + times = array['time'] + if append_until_dt: + return array[times < append_until_dt.timestamp()] + else: + return array[times >= prepend_until_dt.timestamp()] -async def start_backfill( - mod: ModuleType, - mkt: MktPair, - shm: ShmArray, - timeframe: float, - sampler_stream: tractor.MsgStream, - feed_is_live: trio.Event, - last_tsdb_dt: Optional[datetime] = None, - storage: Optional[Storage] = None, - write_tsdb: bool = True, - tsdb_is_up: bool = False, +async def shm_push_in_between( + shm: ShmArray, + to_push: np.ndarray, + prepend_index: int, - task_status: TaskStatus[tuple] = trio.TASK_STATUS_IGNORED, + update_start_on_prepend: bool = False, ) -> int: + shm.push( + to_push, + prepend=True, + + # XXX: only update the ._first index if no tsdb + # segment was previously prepended by the + # parent task. + update_first=update_start_on_prepend, + + # XXX: only prepend from a manually calculated shm + # index if there was already a tsdb history + # segment prepended (since then the + # ._first.value is going to be wayyy in the + # past!) + start=( + prepend_index + if not update_start_on_prepend + else None + ), + ) + # XXX: extremely important, there can be no checkpoints + # in the block above to avoid entering new ``frames`` + # values while we're pipelining the current ones to + # memory... + array = shm.array + zeros = array[array['low'] == 0] + if ( + 0 < zeros.size < 1000 + ): + tractor.breakpoint() - hist: Callable[ - [int, datetime, datetime], - tuple[np.ndarray, str] - ] - config: dict[str, int] - async with mod.open_history_client( - mkt, - ) as (hist, config): - log.info(f'{mod} history client returned backfill config: {config}') - - # get latest query's worth of history all the way - # back to what is recorded in the tsdb - array, start_dt, end_dt = await hist( - timeframe, - end_dt=None, - ) - times = array['time'] - # sample period step size in seconds - step_size_s = ( - pendulum.from_timestamp(times[-1]) - - pendulum.from_timestamp(times[-2]) - ).seconds - - # if the market is open (aka we have a live feed) but the - # history sample step index seems off we report the surrounding - # data and drop into a bp. this case shouldn't really ever - # happen if we're doing history retrieval correctly. - if ( - step_size_s == 60 - and feed_is_live.is_set() - ): - inow = round(time.time()) - diff = inow - times[-1] - if abs(diff) > 60: - surr = array[-6:] - diff_in_mins = round(diff/60., ndigits=2) - log.warning( - f'STEP ERROR `{mkt.fqme}` for period {step_size_s}s:\n' - f'Off by `{diff}` seconds (or `{diff_in_mins}` mins)\n' - 'Surrounding 6 time stamps:\n' - f'{list(surr["time"])}\n' - 'Here is surrounding 6 samples:\n' - f'{surr}\nn' - ) +async def start_backfill( + get_hist, + mod: ModuleType, + mkt: MktPair, + shm: ShmArray, + timeframe: float, - # uncomment this for a hacker who wants to investigate - # this case manually.. - # await tractor.breakpoint() + backfill_from_shm_index: int, + backfill_from_dt: datetime, - # frame's worth of sample-period-steps, in seconds - frame_size_s = len(array) * step_size_s + sampler_stream: tractor.MsgStream, - to_push = diff_history( - array, - timeframe, - start_dt, - end_dt, - last_tsdb_dt=last_tsdb_dt, - ) + backfill_until_dt: datetime | None = None, + storage: StorageClient | None = None, - log.info(f'Pushing {to_push.size} to shm!') - shm.push(to_push, prepend=True) + write_tsdb: bool = True, - # TODO: *** THIS IS A BUG *** - # we need to only broadcast to subscribers for this fqme.. - # otherwise all fsps get reset on every chart.. - await sampler_stream.send('broadcast_all') + task_status: TaskStatus[tuple] = trio.TASK_STATUS_IGNORED, - # signal that backfilling to tsdb's end datum is complete - bf_done = trio.Event() +) -> int: # let caller unblock and deliver latest history frame - task_status.started(( - start_dt, - end_dt, - bf_done, - )) + # and use to signal that backfilling the shm gap until + # the tsdb end is complete! + bf_done = trio.Event() + task_status.started(bf_done) # based on the sample step size, maybe load a certain amount history - if last_tsdb_dt is None: - if step_size_s not in (1, 60): - raise ValueError( - '`piker` only needs to support 1m and 1s sampling ' - 'but ur api is trying to deliver a longer ' - f'timeframe of {step_size_s} seconds..\n' - 'So yuh.. dun do dat brudder.' - ) + update_start_on_prepend: bool = False + if backfill_until_dt is None: + # TODO: drop this right and just expose the backfill + # limits inside a [storage] section in conf.toml? # when no tsdb "last datum" is provided, we just load # some near-term history. + # periods = { + # 1: {'days': 1}, + # 60: {'days': 14}, + # } + + # do a decently sized backfill and load it into storage. periods = { - 1: {'days': 1}, - 60: {'days': 14}, + 1: {'days': 6}, + 60: {'years': 6}, } + period_duration: int = periods[timeframe] - if tsdb_is_up: - # do a decently sized backfill and load it into storage. - periods = { - 1: {'days': 6}, - 60: {'years': 6}, - } - - period_duration = periods[step_size_s] + update_start_on_prepend = True # NOTE: manually set the "latest" datetime which we intend to # backfill history "until" so as to adhere to the history # settings above when the tsdb is detected as being empty. - last_tsdb_dt = start_dt.subtract(**period_duration) + backfill_until_dt = backfill_from_dt.subtract(**period_duration) + + # TODO: can we drop this? without conc i don't think this + # is necessary any more? # configure async query throttling # rate = config.get('rate', 1) # XXX: legacy from ``trimeter`` code but unsupported now. # erlangs = config.get('erlangs', 1) - # avoid duplicate history frames with a set of datetime frame # starts and associated counts of how many duplicates we see # per time stamp. - starts: Counter[datetime] = Counter() + # starts: Counter[datetime] = Counter() + + # conduct "backward history gap filling" where we push to + # the shm buffer until we have history back until the + # latest entry loaded from the tsdb's table B) + last_start_dt: datetime = backfill_from_dt + next_prepend_index: int = backfill_from_shm_index + + while last_start_dt > backfill_until_dt: - # inline sequential loop where we simply pass the - # last retrieved start dt to the next request as - # it's end dt. - while end_dt > last_tsdb_dt: log.debug( - f'Requesting {step_size_s}s frame ending in {start_dt}' + f'Requesting {timeframe}s frame ending in {last_start_dt}' ) try: - array, next_start_dt, end_dt = await hist( + ( + array, + next_start_dt, + next_end_dt, + ) = await get_hist( timeframe, - end_dt=start_dt, + end_dt=last_start_dt, ) # broker says there never was or is no more history to pull @@ -247,33 +247,36 @@ async def start_backfill( # request loop until the condition is resolved? return - if ( - next_start_dt in starts - and starts[next_start_dt] <= 6 - ): - start_dt = min(starts) - log.warning( - f"{mkt.fqme}: skipping duplicate frame @ {next_start_dt}" - ) - starts[start_dt] += 1 - continue - - elif starts[next_start_dt] > 6: - log.warning( - f'NO-MORE-DATA: backend {mod.name} before {next_start_dt}?' - ) - return - - # only update new start point if not-yet-seen - start_dt = next_start_dt - starts[start_dt] += 1 - - assert array['time'][0] == start_dt.timestamp() - - diff = end_dt - start_dt + # TODO: drop this? see todo above.. + # if ( + # next_start_dt in starts + # and starts[next_start_dt] <= 6 + # ): + # start_dt = min(starts) + # log.warning( + # f"{mkt.fqme}: skipping duplicate frame @ {next_start_dt}" + # ) + # starts[start_dt] += 1 + # await tractor.breakpoint() + # continue + + # elif starts[next_start_dt] > 6: + # log.warning( + # f'NO-MORE-DATA: backend {mod.name} before {next_start_dt}?' + # ) + # return + + # # only update new start point if not-yet-seen + # starts[next_start_dt] += 1 + + assert array['time'][0] == next_start_dt.timestamp() + + diff = last_start_dt - next_start_dt frame_time_diff_s = diff.seconds - expected_frame_size_s = frame_size_s + step_size_s + # frame's worth of sample-period-steps, in seconds + frame_size_s = len(array) * timeframe + expected_frame_size_s = frame_size_s + timeframe if frame_time_diff_s > expected_frame_size_s: # XXX: query result includes a start point prior to our @@ -281,49 +284,86 @@ async def start_backfill( # history gap (eg. market closed period, outage, etc.) # so just report it to console for now. log.warning( - f'History frame ending @ {end_dt} appears to have a gap:\n' + f'History frame ending @ {last_start_dt} appears to have a gap:\n' f'{diff} ~= {frame_time_diff_s} seconds' ) to_push = diff_history( array, - timeframe, - start_dt, - end_dt, - last_tsdb_dt=last_tsdb_dt, + prepend_until_dt=backfill_until_dt, ) ln = len(to_push) if ln: - log.info(f'{ln} bars for {start_dt} -> {end_dt}') + log.info(f'{ln} bars for {next_start_dt} -> {last_start_dt}') else: log.warning( - f'{ln} BARS TO PUSH after diff?!: {start_dt} -> {end_dt}' + '0 BARS TO PUSH after diff!?\n' + f'{next_start_dt} -> {last_start_dt}' ) - # bail gracefully on shm allocation overrun/full condition + # bail gracefully on shm allocation overrun/full + # condition try: - shm.push(to_push, prepend=True) - except ValueError: - log.info( - f'Shm buffer overrun on: {start_dt} -> {end_dt}?' + await shm_push_in_between( + shm, + to_push, + prepend_index=next_prepend_index, + update_start_on_prepend=update_start_on_prepend, + ) + await sampler_stream.send({ + 'broadcast_all': { + 'backfilling': (mkt.fqme, timeframe), + }, + }) + + # decrement next prepend point + next_prepend_index = next_prepend_index - ln + last_start_dt = next_start_dt + + except ValueError as ve: + _ve = ve + log.error( + f'Shm prepend OVERRUN on: {next_start_dt} -> {last_start_dt}?' + ) + + if next_prepend_index < ln: + log.warning( + f'Shm buffer can only hold {next_prepend_index} more rows..\n' + f'Appending those from recent {ln}-sized frame, no more!' + ) + + to_push = to_push[-next_prepend_index + 1:] + await shm_push_in_between( + shm, + to_push, + prepend_index=next_prepend_index, + update_start_on_prepend=update_start_on_prepend, ) + await sampler_stream.send({ + 'broadcast_all': { + 'backfilling': (mkt.fqme, timeframe), + }, + }) + # can't push the entire frame? so # push only the amount that can fit.. break log.info( f'Shm pushed {ln} frame:\n' - f'{start_dt} -> {end_dt}' + f'{next_start_dt} -> {last_start_dt}' ) + # FINALLY, maybe write immediately to the tsdb backend for + # long-term storage. if ( storage is not None and write_tsdb ): log.info( f'Writing {ln} frame to storage:\n' - f'{start_dt} -> {end_dt}' + f'{next_start_dt} -> {last_start_dt}' ) if mkt.dst.atype not in {'crypto', 'crypto_currency'}: @@ -336,68 +376,229 @@ async def start_backfill( else: col_sym_key: str = mkt.get_fqme(delim_char='') + # TODO: implement parquet append!? await storage.write_ohlcv( col_sym_key, - to_push, + shm.array, timeframe, ) + else: + # finally filled gap + log.info( + f'Finished filling gap to tsdb start @ {backfill_until_dt}!' + ) + # conduct tsdb timestamp gap detection and backfill any + # seemingly missing sequence segments.. + # TODO: ideally these never exist but somehow it seems + # sometimes we're writing zero-ed segments on certain + # (teardown) cases? + from ._timeseries import detect_null_time_gap + + gap_indices: tuple | None = detect_null_time_gap(shm) + while gap_indices: + ( + istart, + start, + end, + iend, + ) = gap_indices + + start_dt = from_timestamp(start) + end_dt = from_timestamp(end) + ( + array, + next_start_dt, + next_end_dt, + ) = await get_hist( + timeframe, + start_dt=start_dt, + end_dt=end_dt, + ) - # TODO: can we only trigger this if the respective - # history in "in view"?!? + # XXX TODO: pretty sure if i plot tsla, btcusdt.binance + # and mnq.cme.ib this causes a Qt crash XXDDD + + # make sure we don't overrun the buffer start + len_to_push: int = min(iend, array.size) + to_push: np.ndarray = array[-len_to_push:] + await shm_push_in_between( + shm, + to_push, + prepend_index=iend, + update_start_on_prepend=False, + ) + + # TODO: UI side needs IPC event to update.. + # - make sure the UI actually always handles + # this update! + # - remember that in the display side, only refersh this + # if the respective history is actually "in view". + # loop + await sampler_stream.send({ + 'broadcast_all': { + 'backfilling': (mkt.fqme, timeframe), + }, + }) + gap_indices: tuple | None = detect_null_time_gap(shm) # XXX: extremely important, there can be no checkpoints # in the block above to avoid entering new ``frames`` # values while we're pipelining the current ones to # memory... - await sampler_stream.send('broadcast_all') + # await sampler_stream.send('broadcast_all') # short-circuit (for now) bf_done.set() -async def basic_backfill( - bus: _FeedsBus, - mod: ModuleType, - mkt: MktPair, - shms: dict[int, ShmArray], - sampler_stream: tractor.MsgStream, - feed_is_live: trio.Event, +async def back_load_from_tsdb( + storemod: ModuleType, + storage: StorageClient, -) -> None: + fqme: str, - # do a legacy incremental backfill from the provider. - log.info('No TSDB (marketstored) found, doing basic backfill..') + tsdb_history: np.ndarray, - # start history backfill task ``backfill_bars()`` is - # a required backend func this must block until shm is - # filled with first set of ohlc bars - for timeframe, shm in shms.items(): - try: - await bus.nursery.start( - partial( - start_backfill, - mod, - mkt, - shm, - timeframe, - sampler_stream, - feed_is_live, - ) - ) - except DataUnavailable: - # XXX: timeframe not supported for backend - continue + last_tsdb_dt: datetime, + latest_start_dt: datetime, + latest_end_dt: datetime, + + bf_done: trio.Event, + + timeframe: int, + shm: ShmArray, +): + assert len(tsdb_history) + + # sync to backend history task's query/load completion + # if bf_done: + # await bf_done.wait() + + # TODO: eventually it'd be nice to not require a shm array/buffer + # to accomplish this.. maybe we can do some kind of tsdb direct to + # graphics format eventually in a child-actor? + if storemod.name == 'nativedb': + return + + await tractor.breakpoint() + assert shm._first.value == 0 + + array = shm.array + + # if timeframe == 1: + # times = shm.array['time'] + # assert (times[1] - times[0]) == 1 + + if len(array): + shm_last_dt = from_timestamp( + shm.array[0]['time'] + ) + else: + shm_last_dt = None + + if last_tsdb_dt: + assert shm_last_dt >= last_tsdb_dt + + # do diff against start index of last frame of history and only + # fill in an amount of datums from tsdb allows for most recent + # to be loaded into mem *before* tsdb data. + if ( + last_tsdb_dt + and latest_start_dt + ): + backfilled_size_s = ( + latest_start_dt - last_tsdb_dt + ).seconds + # if the shm buffer len is not large enough to contain + # all missing data between the most recent backend-queried frame + # and the most recent dt-index in the db we warn that we only + # want to load a portion of the next tsdb query to fill that + # space. + log.info( + f'{backfilled_size_s} seconds worth of {timeframe}s loaded' + ) + + # Load TSDB history into shm buffer (for display) if there is + # remaining buffer space. + + time_key: str = 'time' + if getattr(storemod, 'ohlc_key_map', False): + keymap: bidict = storemod.ohlc_key_map + time_key: str = keymap.inverse['time'] + + # if ( + # not len(tsdb_history) + # ): + # return + + tsdb_last_frame_start: datetime = last_tsdb_dt + # load as much from storage into shm possible (depends on + # user's shm size settings). + while shm._first.value > 0: + + tsdb_history = await storage.read_ohlcv( + fqme, + timeframe=timeframe, + end=tsdb_last_frame_start, + ) + + # # empty query + # if not len(tsdb_history): + # break + + next_start = tsdb_history[time_key][0] + if next_start >= tsdb_last_frame_start: + # no earlier data detected + break + + else: + tsdb_last_frame_start = next_start + + # TODO: see if there's faster multi-field reads: + # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields + # re-index with a `time` and index field + prepend_start = shm._first.value + + to_push = tsdb_history[-prepend_start:] + shm.push( + to_push, + + # insert the history pre a "days worth" of samples + # to leave some real-time buffer space at the end. + prepend=True, + # update_first=False, + # start=prepend_start, + field_map=storemod.ohlc_key_map, + ) + + log.info(f'Loaded {to_push.shape} datums from storage') + tsdb_last_frame_start = tsdb_history[time_key][0] + + # manually trigger step update to update charts/fsps + # which need an incremental update. + # NOTE: the way this works is super duper + # un-intuitive right now: + # - the broadcaster fires a msg to the fsp subsystem. + # - fsp subsys then checks for a sample step diff and + # possibly recomputes prepended history. + # - the fsp then sends back to the parent actor + # (usually a chart showing graphics for said fsp) + # which tells the chart to conduct a manual full + # graphics loop cycle. + # await sampler_stream.send('broadcast_all') async def tsdb_backfill( mod: ModuleType, - marketstore: ModuleType, - bus: _FeedsBus, - storage: Storage, + storemod: ModuleType, + tn: trio.Nursery, + + storage: StorageClient, mkt: MktPair, - shms: dict[int, ShmArray], + shm: ShmArray, + timeframe: float, + sampler_stream: tractor.MsgStream, - feed_is_live: trio.Event, task_status: TaskStatus[ tuple[ShmArray, ShmArray] @@ -405,60 +606,142 @@ async def tsdb_backfill( ) -> None: - # TODO: this should be used verbatim for the pure - # shm backfiller approach below. - dts_per_tf: dict[int, datetime] = {} - fqme: str = mkt.fqme + get_hist: Callable[ + [int, datetime, datetime], + tuple[np.ndarray, str] + ] + config: dict[str, int] + async with mod.open_history_client( + mkt, + ) as (get_hist, config): + log.info(f'{mod} history client returned backfill config: {config}') + + # get latest query's worth of history all the way + # back to what is recorded in the tsdb + try: + array, mr_start_dt, mr_end_dt = await get_hist( + timeframe, + end_dt=None, + ) + + # XXX: timeframe not supported for backend (since + # above exception type), terminate immediately since + # there's no backfilling possible. + except DataUnavailable: + task_status.started() + return + + times: np.ndarray = array['time'] + + # sample period step size in seconds + step_size_s = ( + from_timestamp(times[-1]) + - from_timestamp(times[-2]) + ).seconds + + if step_size_s not in (1, 60): + log.error(f'Last 2 sample period is off!? -> {step_size_s}') + step_size_s = ( + from_timestamp(times[-2]) + - from_timestamp(times[-3]) + ).seconds + + # NOTE: on the first history, most recent history + # frame we PREPEND from the current shm ._last index + # and thus a gap between the earliest datum loaded here + # and the latest loaded from the tsdb may exist! + log.info(f'Pushing {array.size} to shm!') + shm.push( + array, + prepend=True, # append on first frame + ) + backfill_gap_from_shm_index: int = shm._first.value + 1 + + # tell parent task to continue + task_status.started() - # start history anal and load missing new data via backend. - for timeframe, shm in shms.items(): # loads a (large) frame of data from the tsdb depending - # on the db's query size limit. - tsdb_history, first_tsdb_dt, last_tsdb_dt = await storage.load( + # on the db's query size limit; our "nativedb" (using + # parquet) generally can load the entire history into mem + # but if not then below the remaining history can be lazy + # loaded? + fqme: str = mkt.fqme + tsdb_entry: tuple | None = await storage.load( fqme, timeframe=timeframe, ) - try: + last_tsdb_dt: datetime | None = None + if tsdb_entry: ( - latest_start_dt, - latest_end_dt, - bf_done, - ) = await bus.nursery.start( - partial( - start_backfill, - mod, - mkt, - shm, - timeframe, - sampler_stream, - feed_is_live, - - last_tsdb_dt=last_tsdb_dt, - tsdb_is_up=True, - storage=storage, - ) - ) - except DataUnavailable: - # XXX: timeframe not supported for backend - dts_per_tf[timeframe] = ( tsdb_history, + first_tsdb_dt, last_tsdb_dt, - None, - None, - None, + ) = tsdb_entry + + # calc the index from which the tsdb data should be + # prepended, presuming there is a gap between the + # latest frame (loaded/read above) and the latest + # sample loaded from the tsdb. + backfill_diff: Duration = mr_start_dt - last_tsdb_dt + offset_s: float = backfill_diff.in_seconds() + offset_samples: int = round(offset_s / timeframe) + + # TODO: see if there's faster multi-field reads: + # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields + # re-index with a `time` and index field + prepend_start = shm._first.value - offset_samples + 1 + + # tsdb history is so far in the past we can't fit it in + # shm buffer space so simply don't load it! + if prepend_start > 0: + to_push = tsdb_history[-prepend_start:] + shm.push( + to_push, + + # insert the history pre a "days worth" of samples + # to leave some real-time buffer space at the end. + prepend=True, + # update_first=False, + start=prepend_start, + field_map=storemod.ohlc_key_map, + ) + + log.info(f'Loaded {to_push.shape} datums from storage') + + # TODO: maybe start history anal and load missing "history + # gaps" via backend.. + + if timeframe not in (1, 60): + raise ValueError( + '`piker` only needs to support 1m and 1s sampling ' + 'but ur api is trying to deliver a longer ' + f'timeframe of {timeframe} seconds..\n' + 'So yuh.. dun do dat brudder.' ) - continue + # if there is a gap to backfill from the first + # history frame until the last datum loaded from the tsdb + # continue that now in the background + bf_done = await tn.start( + partial( + start_backfill, + get_hist, + mod, + mkt, + shm, + timeframe, - # tsdb_history = series.get(timeframe) - dts_per_tf[timeframe] = ( - tsdb_history, - last_tsdb_dt, - latest_start_dt, - latest_end_dt, - bf_done, + backfill_from_shm_index=backfill_gap_from_shm_index, + backfill_from_dt=mr_start_dt, + + sampler_stream=sampler_stream, + + backfill_until_dt=last_tsdb_dt, + storage=storage, + ) ) + # if len(hist_shm.array) < 2: # TODO: there's an edge case here to solve where if the last # frame before market close (at least on ib) was pushed and @@ -469,146 +752,32 @@ async def tsdb_backfill( # the tsdb series and stash that somewhere as meta data on # the shm buffer?.. no se. - # unblock the feed bus management task - # assert len(shms[1].array) - task_status.started() + # backload any further data from tsdb (concurrently per + # timeframe) if not all data was able to be loaded (in memory) + # from the ``StorageClient.load()`` call above. + try: + await trio.sleep_forever() + finally: + return + + # IF we need to continue backloading incrementall from the + # tsdb client.. + tn.start_soon( + back_load_from_tsdb, + + storemod, + storage, + fqme, - async def back_load_from_tsdb( - timeframe: int, - shm: ShmArray, - ): - ( tsdb_history, last_tsdb_dt, - latest_start_dt, - latest_end_dt, + mr_start_dt, + mr_end_dt, bf_done, - ) = dts_per_tf[timeframe] - - # sync to backend history task's query/load completion - if bf_done: - await bf_done.wait() - - # TODO: eventually it'd be nice to not require a shm array/buffer - # to accomplish this.. maybe we can do some kind of tsdb direct to - # graphics format eventually in a child-actor? - - # TODO: see if there's faster multi-field reads: - # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields - # re-index with a `time` and index field - prepend_start = shm._first.value - array = shm.array - if len(array): - shm_last_dt = pendulum.from_timestamp(shm.array[0]['time']) - else: - shm_last_dt = None - - if last_tsdb_dt: - assert shm_last_dt >= last_tsdb_dt - - # do diff against start index of last frame of history and only - # fill in an amount of datums from tsdb allows for most recent - # to be loaded into mem *before* tsdb data. - if ( - last_tsdb_dt - and latest_start_dt - ): - backfilled_size_s = ( - latest_start_dt - last_tsdb_dt - ).seconds - # if the shm buffer len is not large enough to contain - # all missing data between the most recent backend-queried frame - # and the most recent dt-index in the db we warn that we only - # want to load a portion of the next tsdb query to fill that - # space. - log.info( - f'{backfilled_size_s} seconds worth of {timeframe}s loaded' - ) - - # Load TSDB history into shm buffer (for display) if there is - # remaining buffer space. - - if ( - len(tsdb_history) - ): - # load the first (smaller) bit of history originally loaded - # above from ``Storage.load()``. - to_push = tsdb_history[-prepend_start:] - shm.push( - to_push, - - # insert the history pre a "days worth" of samples - # to leave some real-time buffer space at the end. - prepend=True, - # update_first=False, - # start=prepend_start, - field_map=marketstore.ohlc_key_map, - ) - - tsdb_last_frame_start = tsdb_history['Epoch'][0] - - if timeframe == 1: - times = shm.array['time'] - assert (times[1] - times[0]) == 1 - - # load as much from storage into shm possible (depends on - # user's shm size settings). - while shm._first.value > 0: - - tsdb_history = await storage.read_ohlcv( - fqme, - timeframe=timeframe, - end=tsdb_last_frame_start, - ) - - # empty query - if not len(tsdb_history): - break - - next_start = tsdb_history['Epoch'][0] - if next_start >= tsdb_last_frame_start: - # no earlier data detected - break - else: - tsdb_last_frame_start = next_start - - prepend_start = shm._first.value - to_push = tsdb_history[-prepend_start:] - # insert the history pre a "days worth" of samples - # to leave some real-time buffer space at the end. - shm.push( - to_push, - prepend=True, - field_map=marketstore.ohlc_key_map, - ) - log.info(f'Loaded {to_push.shape} datums from storage') - - # manually trigger step update to update charts/fsps - # which need an incremental update. - # NOTE: the way this works is super duper - # un-intuitive right now: - # - the broadcaster fires a msg to the fsp subsystem. - # - fsp subsys then checks for a sample step diff and - # possibly recomputes prepended history. - # - the fsp then sends back to the parent actor - # (usually a chart showing graphics for said fsp) - # which tells the chart to conduct a manual full - # graphics loop cycle. - await sampler_stream.send('broadcast_all') - - # TODO: write new data to tsdb to be ready to for next read. - - # backload from db (concurrently per timeframe) once backfilling of - # recent dat a loaded from the backend provider (see - # ``bf_done.wait()`` call). - async with trio.open_nursery() as nurse: - for timeframe, shm in shms.items(): - nurse.start_soon( - back_load_from_tsdb, - timeframe, - shm, - ) + timeframe, + shm, + ) async def manage_history( @@ -626,12 +795,26 @@ async def manage_history( ) -> None: ''' Load and manage historical data including the loading of any - available series from `marketstore` as well as conducting real-time - update of both that existing db and the allocated shared memory - buffer. + available series from any connected tsdb as well as conduct + real-time update of both that existing db and the allocated + shared memory buffer. + + Init sequence: + - allocate shm (numpy array) buffers for 60s & 1s sample rates + - configure "zero index" for each buffer: the index where + history will prepended *to* and new live data will be + appened *from*. + - open a ``.storage.StorageClient`` and load any existing tsdb + history as well as (async) start a backfill task which loads + missing (newer) history from the data provider backend: + - tsdb history is loaded first and pushed to shm ASAP. + - the backfill task loads the most recent history before + unblocking its parent task, so that the `ShmArray._last` is + up to date to allow the OHLC sampler to begin writing new + samples as the correct buffer index once the provider feed + engages. ''' - # TODO: is there a way to make each shm file key # actor-tree-discovery-addr unique so we avoid collisions # when doing tests which also allocate shms for certain instruments @@ -648,10 +831,13 @@ async def manage_history( # (maybe) allocate shm array for this broker/symbol which will # be used for fast near-term history capture and processing. hist_shm, opened = maybe_open_shm_array( + size=_default_hist_size, + append_start_index=_hist_buffer_start, + key=f'piker.{service}[{uuid[:16]}].{fqme}.hist', # use any broker defined ohlc dtype: - dtype=getattr(mod, '_ohlc_dtype', base_iohlc_dtype), + dtype=getattr(mod, '_ohlc_dtype', def_iohlcv_fields), # we expect the sub-actor to write readonly=False, @@ -665,14 +851,15 @@ async def manage_history( ) rt_shm, opened = maybe_open_shm_array( + size=_default_rt_size, + append_start_index=_rt_buffer_start, key=f'piker.{service}[{uuid[:16]}].{fqme}.rt', # use any broker defined ohlc dtype: - dtype=getattr(mod, '_ohlc_dtype', base_iohlc_dtype), + dtype=getattr(mod, '_ohlc_dtype', def_iohlcv_fields), # we expect the sub-actor to write readonly=False, - size=3*_secs_in_day, ) # (for now) set the rt (hft) shm array with space to prepend @@ -688,123 +875,93 @@ async def manage_history( "Persistent shm for sym was already open?!" ) - # register 1s and 1m buffers with the global incrementer task - async with open_sample_stream( - period_s=1., - shms_by_period={ - 1.: rt_shm.token, - 60.: hist_shm.token, - }, - - # NOTE: we want to only open a stream for doing broadcasts on - # backfill operations, not receive the sample index-stream - # (since there's no code in this data feed layer that needs to - # consume it). - open_index_stream=True, - sub_for_broadcasts=False, - - ) as sample_stream: - - open_history_client = getattr( - mod, - 'open_history_client', - None, - ) - assert open_history_client - - tsdb_is_up: bool = False - try_remote_tsdb: bool = False - - conf, path = config.load('conf', touch_if_dne=True) - net = conf.get('network') - if net: - tsdbconf = net.get('tsdb') - - # lookup backend tsdb module by name and load any user service - # settings for connecting to the tsdb service. - tsdb_backend: str = tsdbconf.pop('backend') - tsdb_host: str = tsdbconf['host'] + open_history_client = getattr( + mod, + 'open_history_client', + None, + ) + assert open_history_client - # TODO: import and load storagemod by name - # mod = get_storagemod(tsdb_backend) - from ..service import marketstore - if tsdb_host == 'localhost': - log.info('Scanning for existing `{tsbd_backend}`') - tsdb_is_up: bool = await check_for_service(f'{tsdb_backend}d') + # TODO: maybe it should be a subpkg of `.data`? + from piker import storage - else: - try_remote_tsdb: bool = True - - if ( - tsdb_is_up - or try_remote_tsdb - and ( - opened - and open_history_client - ) - ): - log.info('Found existing `marketstored`') + async with ( + storage.open_storage_client() as (storemod, client), + trio.open_nursery() as tn, + ): + log.info( + f'Connecting to storage backend `{storemod.name}`:\n' + f'location: {client.address}\n' + f'db cardinality: {client.cardinality}\n' + # TODO: show backend config, eg: + # - network settings + # - storage size with compression + # - number of loaded time series? + ) - async with ( - marketstore.open_storage_client( - **tsdbconf - ) as storage, - ): - # TODO: drop returning the output that we pass in? - await bus.nursery.start( + # NOTE: this call ONLY UNBLOCKS once the latest-most frame + # (i.e. history just before the live feed latest datum) of + # history has been loaded and written to the shm buffer: + # - the backfiller task can write in reverse chronological + # to the shm and tsdb + # - the tsdb data can be loaded immediately and the + # backfiller can do a single append from it's end datum and + # then prepends backward to that from the current time + # step. + tf2mem: dict = { + 1: rt_shm, + 60: hist_shm, + } + async with open_sample_stream( + period_s=1., + shms_by_period={ + 1.: rt_shm.token, + 60.: hist_shm.token, + }, + + # NOTE: we want to only open a stream for doing + # broadcasts on backfill operations, not receive the + # sample index-stream (since there's no code in this + # data feed layer that needs to consume it). + open_index_stream=True, + sub_for_broadcasts=False, + + ) as sample_stream: + # register 1s and 1m buffers with the global incrementer task + log.info(f'Connected to sampler stream: {sample_stream}') + + for timeframe in [60, 1]: + await tn.start( tsdb_backfill, mod, - marketstore, - bus, - storage, + storemod, + tn, + # bus, + client, mkt, - { - 1: rt_shm, - 60: hist_shm, - }, + tf2mem[timeframe], + timeframe, + sample_stream, - feed_is_live, ) - # yield back after client connect with filled shm - task_status.started(( - hist_zero_index, - hist_shm, - rt_zero_index, - rt_shm, - )) - - # indicate to caller that feed can be delivered to - # remote requesting client since we've loaded history - # data that can be used. - some_data_ready.set() - - # history retreival loop depending on user interaction - # and thus a small RPC-prot for remotely controllinlg - # what data is loaded for viewing. - await trio.sleep_forever() - - # load less history if no tsdb can be found - elif ( - not tsdb_is_up - and opened - ): - await basic_backfill( - bus, - mod, - mkt, - { - 1: rt_shm, - 60: hist_shm, - }, - sample_stream, - feed_is_live, - ) + # indicate to caller that feed can be delivered to + # remote requesting client since we've loaded history + # data that can be used. + some_data_ready.set() + + # wait for a live feed before starting the sampler. + await feed_is_live.wait() + + # yield back after client connect with filled shm task_status.started(( hist_zero_index, hist_shm, rt_zero_index, rt_shm, )) - some_data_ready.set() + + # history retreival loop depending on user interaction + # and thus a small RPC-prot for remotely controllinlg + # what data is loaded for viewing. await trio.sleep_forever() diff --git a/piker/fsp/_api.py b/piker/fsp/_api.py index 11d1e7dca..92f8f2711 100644 --- a/piker/fsp/_api.py +++ b/piker/fsp/_api.py @@ -177,6 +177,7 @@ def fsp( def maybe_mk_fsp_shm( sym: str, target: Fsp, + size: int, readonly: bool = True, ) -> (str, ShmArray, bool): @@ -185,7 +186,8 @@ def maybe_mk_fsp_shm( exists, otherwise load the shm already existing for that token. ''' - assert isinstance(sym, str), '`sym` should be file-name-friendly `str`' + if not isinstance(sym, str): + raise ValueError('`sym: str` should be file-name-friendly') # TODO: load output types from `Fsp` # - should `index` be a required internal field? @@ -204,7 +206,7 @@ def maybe_mk_fsp_shm( shm, opened = maybe_open_shm_array( key, - # TODO: create entry for each time frame + size=size, dtype=fsp_dtype, readonly=True, ) diff --git a/piker/service/marketstore.py b/piker/service/marketstore.py index ac0ad0a4f..c9f494201 100644 --- a/piker/service/marketstore.py +++ b/piker/service/marketstore.py @@ -1,5 +1,5 @@ # piker: trading gear for hackers -# Copyright (C) Tyler Goodlet (in stewardship for piker0) +# Copyright (C) Tyler Goodlet (in stewardship for pikers) # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -25,11 +25,9 @@ ''' from __future__ import annotations from contextlib import asynccontextmanager as acm -from datetime import datetime from pprint import pformat from typing import ( Any, - Union, TYPE_CHECKING, ) import time @@ -37,31 +35,34 @@ from pathlib import Path from bidict import bidict -from msgspec.msgpack import encode, decode +from msgspec.msgpack import ( + encode, + decode, +) # import pyqtgraph as pg import numpy as np import tractor from trio_websocket import open_websocket_url -from anyio_marketstore import ( +from anyio_marketstore import ( # noqa open_marketstore_client, MarketstoreClient, Params, ) import pendulum -import purerpc - -if TYPE_CHECKING: - import docker - from ._ahab import DockerContainer +# TODO: import this for specific error set expected by mkts client +# import purerpc +from ..data.feed import maybe_open_feed +from . import Services from ._util import ( log, # sub-sys logger get_console_log, ) -from . import Services -from ..data.feed import maybe_open_feed -from .._profile import Profiler -from .. import config + +if TYPE_CHECKING: + import docker + from ._ahab import DockerContainer + # ahabd-supervisor and container level config @@ -326,16 +327,6 @@ async def start_ahab_daemon( ] -ohlc_key_map = bidict({ - 'Epoch': 'time', - 'Open': 'open', - 'High': 'high', - 'Low': 'low', - 'Close': 'close', - 'Volume': 'volume', -}) - - def mk_tbk(keys: tuple[str, str, str]) -> str: ''' Generate a marketstore table key from a tuple. @@ -387,24 +378,6 @@ def quote_to_marketstore_structarray( return np.array([tuple(array_input)], dtype=_quote_dt) -@acm -async def get_client( - host: str | None, - port: int | None, - -) -> MarketstoreClient: - ''' - Load a ``anyio_marketstore`` grpc client connected - to an existing ``marketstore`` server. - - ''' - async with open_marketstore_client( - host or 'localhost', - port or _config['grpc_listen_port'], - ) as client: - yield client - - class MarketStoreError(Exception): "Generic marketstore client error" @@ -432,375 +405,6 @@ class MarketStoreError(Exception): }) -class Storage: - ''' - High level storage api for both real-time and historical ingest. - - ''' - def __init__( - self, - client: MarketstoreClient, - - ) -> None: - # TODO: eventually this should be an api/interface type that - # ensures we can support multiple tsdb backends. - self.client = client - - # series' cache from tsdb reads - self._arrays: dict[str, np.ndarray] = {} - - async def list_keys(self) -> list[str]: - return await self.client.list_symbols() - - async def search_keys(self, pattern: str) -> list[str]: - ''' - Search for time series key in the storage backend. - - ''' - ... - - async def write_ticks(self, ticks: list) -> None: - ... - - async def load( - self, - fqme: str, - timeframe: int, - - ) -> tuple[ - np.ndarray, # timeframe sampled array-series - datetime | None, # first dt - datetime | None, # last dt - ]: - - first_tsdb_dt, last_tsdb_dt = None, None - hist = await self.read_ohlcv( - fqme, - # on first load we don't need to pull the max - # history per request size worth. - limit=3000, - timeframe=timeframe, - ) - log.info(f'Loaded tsdb history {hist}') - - if len(hist): - times = hist['Epoch'] - first, last = times[0], times[-1] - first_tsdb_dt, last_tsdb_dt = map( - pendulum.from_timestamp, [first, last] - ) - - return ( - hist, # array-data - first_tsdb_dt, # start of query-frame - last_tsdb_dt, # most recent - ) - - async def read_ohlcv( - self, - fqme: str, - timeframe: int | str, - end: int | None = None, - limit: int = int(800e3), - - ) -> np.ndarray: - - client = self.client - syms = await client.list_symbols() - - if fqme not in syms: - return {} - - # use the provided timeframe or 1s by default - tfstr = tf_in_1s.get(timeframe, tf_in_1s[1]) - - params = Params( - symbols=fqme, - timeframe=tfstr, - attrgroup='OHLCV', - end=end, - # limit_from_start=True, - - # TODO: figure the max limit here given the - # ``purepc`` msg size limit of purerpc: 33554432 - limit=limit, - ) - - for i in range(3): - try: - result = await client.query(params) - break - except purerpc.grpclib.exceptions.UnknownError as err: - if 'snappy' in err.args: - await tractor.breakpoint() - - # indicate there is no history for this timeframe - log.exception( - f'Unknown mkts QUERY error: {params}\n' - f'{err.args}' - ) - else: - return {} - - # TODO: it turns out column access on recarrays is actually slower: - # https://jakevdp.github.io/PythonDataScienceHandbook/02.09-structured-data-numpy.html#RecordArrays:-Structured-Arrays-with-a-Twist - # it might make sense to make these structured arrays? - data_set = result.by_symbols()[fqme] - array = data_set.array - - # XXX: ensure sample rate is as expected - time = data_set.array['Epoch'] - if len(time) > 1: - time_step = time[-1] - time[-2] - ts = tf_in_1s.inverse[data_set.timeframe] - - if time_step != ts: - log.warning( - f'MKTS BUG: wrong timeframe loaded: {time_step}' - 'YOUR DATABASE LIKELY CONTAINS BAD DATA FROM AN OLD BUG' - f'WIPING HISTORY FOR {ts}s' - ) - await self.delete_ts(fqme, timeframe) - - # try reading again.. - return await self.read_ohlcv( - fqme, - timeframe, - end, - limit, - ) - - return array - - async def delete_ts( - self, - key: str, - timeframe: Union[int, str | None] = None, - fmt: str = 'OHLCV', - - ) -> bool: - - client = self.client - syms = await client.list_symbols() - if key not in syms: - await tractor.breakpoint() - raise KeyError(f'`{key}` table key not found in\n{syms}?') - - tbk = mk_tbk(( - key, - tf_in_1s.get(timeframe, tf_in_1s[60]), - fmt, - )) - return await client.destroy(tbk=tbk) - - async def write_ohlcv( - self, - fqme: str, - ohlcv: np.ndarray, - timeframe: int, - append_and_duplicate: bool = True, - limit: int = int(800e3), - - ) -> None: - # build mkts schema compat array for writing - mkts_dt = np.dtype(_ohlcv_dt) - mkts_array = np.zeros( - len(ohlcv), - dtype=mkts_dt, - ) - # copy from shm array (yes it's this easy): - # https://numpy.org/doc/stable/user/basics.rec.html#assignment-from-other-structured-arrays - mkts_array[:] = ohlcv[[ - 'time', - 'open', - 'high', - 'low', - 'close', - 'volume', - ]] - - m, r = divmod(len(mkts_array), limit) - - tfkey = tf_in_1s[timeframe] - for i in range(m, 1): - to_push = mkts_array[i-1:i*limit] - - # write to db - resp = await self.client.write( - to_push, - tbk=f'{fqme}/{tfkey}/OHLCV', - - # NOTE: will will append duplicates - # for the same timestamp-index. - # TODO: pre-deduplicate? - isvariablelength=append_and_duplicate, - ) - - log.info( - f'Wrote {mkts_array.size} datums to tsdb\n' - ) - - for resp in resp.responses: - err = resp.error - if err: - raise MarketStoreError(err) - - if r: - to_push = mkts_array[m*limit:] - - # write to db - resp = await self.client.write( - to_push, - tbk=f'{fqme}/{tfkey}/OHLCV', - - # NOTE: will will append duplicates - # for the same timestamp-index. - # TODO: pre deduplicate? - isvariablelength=append_and_duplicate, - ) - - log.info( - f'Wrote {mkts_array.size} datums to tsdb\n' - ) - - for resp in resp.responses: - err = resp.error - if err: - raise MarketStoreError(err) - - # XXX: currently the only way to do this is through the CLI: - - # sudo ./marketstore connect --dir ~/.config/piker/data - # >> \show mnq.globex.20220617.ib/1Sec/OHLCV 2022-05-15 - # and this seems to block and use up mem.. - # >> \trim mnq.globex.20220617.ib/1Sec/OHLCV 2022-05-15 - - # relevant source code for this is here: - # https://github.com/alpacahq/marketstore/blob/master/cmd/connect/session/trim.go#L14 - # def delete_range(self, start_dt, end_dt) -> None: - # ... - - -@acm -async def open_storage_client( - host: str, - grpc_port: int, - -) -> tuple[Storage, dict[str, np.ndarray]]: - ''' - Load a series by key and deliver in ``numpy`` struct array format. - - ''' - async with ( - # eventually a storage backend endpoint - get_client( - host=host, - port=grpc_port, - ) as client, - ): - # slap on our wrapper api - yield Storage(client) - - -@acm -async def open_tsdb_client( - fqme: str, -) -> Storage: - - # TODO: real-time dedicated task for ensuring - # history consistency between the tsdb, shm and real-time feed.. - - # update sequence design notes: - - # - load existing highest frequency data from mkts - # * how do we want to offer this to the UI? - # - lazy loading? - # - try to load it all and expect graphics caching/diffing - # to hide extra bits that aren't in view? - - # - compute the diff between latest data from broker and shm - # * use sql api in mkts to determine where the backend should - # start querying for data? - # * append any diff with new shm length - # * determine missing (gapped) history by scanning - # * how far back do we look? - - # - begin rt update ingest and aggregation - # * could start by always writing ticks to mkts instead of - # worrying about a shm queue for now. - # * we have a short list of shm queues worth groking: - # - https://github.com/pikers/piker/issues/107 - # * the original data feed arch blurb: - # - https://github.com/pikers/piker/issues/98 - # - profiler = Profiler( - disabled=True, # not pg_profile_enabled(), - delayed=False, - ) - - # load any user service settings for connecting to - rootconf, path = config.load( - 'conf', - touch_if_dne=True, - ) - tsdbconf = rootconf['network'].get('tsdb') - # backend = tsdbconf.pop('backend') - async with ( - open_storage_client( - **tsdbconf, - ) as storage, - - maybe_open_feed( - [fqme], - start_stream=False, - - ) as feed, - ): - profiler(f'opened feed for {fqme}') - - # to_append = feed.hist_shm.array - # to_prepend = None - - if fqme: - flume = feed.flumes[fqme] - symbol = flume.mkt - if symbol: - fqme = symbol.fqme - - # diff db history with shm and only write the missing portions - # ohlcv = flume.hist_shm.array - - # TODO: use pg profiler - # for secs in (1, 60): - # tsdb_array = await storage.read_ohlcv( - # fqme, - # timeframe=timeframe, - # ) - # # hist diffing: - # # these aren't currently used but can be referenced from - # # within the embedded ipython shell below. - # to_append = ohlcv[ohlcv['time'] > ts['Epoch'][-1]] - # to_prepend = ohlcv[ohlcv['time'] < ts['Epoch'][0]] - - # profiler('Finished db arrays diffs') - - _ = await storage.client.list_symbols() - # log.info(f'Existing tsdb symbol set:\n{pformat(syms)}') - # profiler(f'listed symbols {syms}') - yield storage - - # for array in [to_append, to_prepend]: - # if array is None: - # continue - - # log.info( - # f'Writing datums {array.size} -> to tsdb from shm\n' - # ) - # await storage.write_ohlcv(fqme, array) - - # profiler('Finished db writes') - - async def ingest_quote_stream( symbols: list[str], brokername: str, @@ -812,6 +416,7 @@ async def ingest_quote_stream( Ingest a broker quote stream into a ``marketstore`` tsdb. ''' + from piker.storage.marketstore import get_client async with ( maybe_open_feed(brokername, symbols, loglevel=loglevel) as feed, get_client() as ms_client, @@ -963,5 +568,3 @@ async def recv() -> dict[str, Any]: if quotes: yield quotes - - diff --git a/piker/storage/__init__.py b/piker/storage/__init__.py new file mode 100644 index 000000000..465d3e284 --- /dev/null +++ b/piker/storage/__init__.py @@ -0,0 +1,307 @@ +# piker: trading gear for hackers +# Copyright (C) Tyler Goodlet (in stewardship for pikers) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +(time-series) database middle ware layer. + +- APIs for read, write, delete, replicate over multiple + db systems. +- backend agnostic tick msg ingest machinery. +- broadcast systems for fan out of real-time ingested + data to live consumers. +- test harness utilities for data-processing verification. + +''' +from abc import abstractmethod +from contextlib import asynccontextmanager as acm +from functools import partial +from importlib import import_module +from datetime import datetime +from types import ModuleType +from typing import ( + # Callable, + # Awaitable, + # Any, + # AsyncIterator, + Protocol, + # Generic, + # TypeVar, +) + +import numpy as np + + +from .. import config +from ..service import ( + check_for_service, +) +from ..log import ( + get_logger, + get_console_log, +) +subsys: str = 'piker.storage' + +log = get_logger(subsys) +get_console_log = partial( + get_console_log, + name=subsys, +) + + +__tsdbs__: list[str] = [ + 'nativedb', + # 'marketstore', +] + + +class StorageClient( + Protocol, +): + ''' + Api description that all storage backends must implement + in order to suffice the historical data mgmt layer. + + ''' + name: str + + @abstractmethod + async def list_keys(self) -> list[str]: + ... + + @abstractmethod + def search_keys(self) -> list[str]: + ... + + # @abstractmethod + # async def write_ticks( + # self, + # ticks: list, + # ) -> ReceiveType: + # ... + + # ``trio.abc.AsyncResource`` methods + @abstractmethod + async def load( + self, + fqme: str, + timeframe: int, + + ) -> tuple[ + np.ndarray, # timeframe sampled array-series + datetime | None, # first dt + datetime | None, # last dt + ]: + ... + + @abstractmethod + async def delete_ts( + self, + key: str, + timeframe: int | str | None = None, + fmt: str = 'OHLCV', + + ) -> bool: + ... + + @abstractmethod + async def read_ohlcv( + self, + fqme: str, + timeframe: int | str, + end: int | None = None, + limit: int = int(800e3), + + ) -> np.ndarray: + ... + + async def write_ohlcv( + self, + fqme: str, + ohlcv: np.ndarray, + timeframe: int, + append_and_duplicate: bool = True, + limit: int = int(800e3), + + ) -> None: + ... + + +class StorageConnectionError(ConnectionError): + ''' + Can't connect to the desired tsdb subsys/service. + + ''' + +def get_storagemod(name: str) -> ModuleType: + mod: ModuleType = import_module( + '.' + name, + 'piker.storage', + ) + + # we only allow monkeying because it's for internal keying + mod.name = mod.__name__.split('.')[-1] + return mod + + +@acm +async def open_storage_client( + backend: str | None = None, + +) -> tuple[ModuleType, StorageClient]: + ''' + Load the ``StorageClient`` for named backend. + + ''' + def_backend: str = 'nativedb' + tsdb_host: str = 'localhost' + + # load root config and any tsdb user defined settings + conf, path = config.load('conf', touch_if_dne=True) + + # TODO: maybe not under a "network" section.. since + # no more chitty mkts.. + tsdbconf: dict = {} + service_section = conf.get('service') + if ( + not backend + and service_section + ): + tsdbconf = service_section.get('tsdb') + + # lookup backend tsdb module by name and load any user service + # settings for connecting to the tsdb service. + backend: str = tsdbconf.pop('backend') + tsdb_host: str = tsdbconf['host'] + + if backend is None: + backend: str = def_backend + + # import and load storagemod by name + mod: ModuleType = get_storagemod(backend) + get_client = mod.get_client + + log.info('Scanning for existing `{tsbd_backend}`') + if backend != def_backend: + tsdb_is_up: bool = await check_for_service(f'{backend}d') + if ( + tsdb_host == 'localhost' + or tsdb_is_up + ): + log.info(f'Connecting to local: {backend}@{tsdbconf}') + else: + log.info(f'Attempting to connect to remote: {backend}@{tsdbconf}') + else: + log.info(f'Connecting to default storage: {backend}@{tsdbconf}') + + async with ( + get_client(**tsdbconf) as client, + ): + # slap on our wrapper api + yield mod, client + + +# NOTE: pretty sure right now this is only being +# called by a CLI entrypoint? +@acm +async def open_tsdb_client( + fqme: str, +) -> StorageClient: + + # TODO: real-time dedicated task for ensuring + # history consistency between the tsdb, shm and real-time feed.. + + # update sequence design notes: + + # - load existing highest frequency data from mkts + # * how do we want to offer this to the UI? + # - lazy loading? + # - try to load it all and expect graphics caching/diffing + # to hide extra bits that aren't in view? + + # - compute the diff between latest data from broker and shm + # * use sql api in mkts to determine where the backend should + # start querying for data? + # * append any diff with new shm length + # * determine missing (gapped) history by scanning + # * how far back do we look? + + # - begin rt update ingest and aggregation + # * could start by always writing ticks to mkts instead of + # worrying about a shm queue for now. + # * we have a short list of shm queues worth groking: + # - https://github.com/pikers/piker/issues/107 + # * the original data feed arch blurb: + # - https://github.com/pikers/piker/issues/98 + # + from .._profile import Profiler + profiler = Profiler( + disabled=True, # not pg_profile_enabled(), + delayed=False, + ) + from ..data.feed import maybe_open_feed + + async with ( + open_storage_client() as (_, storage), + + maybe_open_feed( + [fqme], + start_stream=False, + + ) as feed, + ): + profiler(f'opened feed for {fqme}') + + # to_append = feed.hist_shm.array + # to_prepend = None + + if fqme: + flume = feed.flumes[fqme] + symbol = flume.mkt + if symbol: + fqme = symbol.fqme + + # diff db history with shm and only write the missing portions + # ohlcv = flume.hist_shm.array + + # TODO: use pg profiler + # for secs in (1, 60): + # tsdb_array = await storage.read_ohlcv( + # fqme, + # timeframe=timeframe, + # ) + # # hist diffing: + # # these aren't currently used but can be referenced from + # # within the embedded ipython shell below. + # to_append = ohlcv[ohlcv['time'] > ts['Epoch'][-1]] + # to_prepend = ohlcv[ohlcv['time'] < ts['Epoch'][0]] + + # profiler('Finished db arrays diffs') + + _ = await storage.client.list_symbols() + # log.info(f'Existing tsdb symbol set:\n{pformat(syms)}') + # profiler(f'listed symbols {syms}') + yield storage + + # for array in [to_append, to_prepend]: + # if array is None: + # continue + + # log.info( + # f'Writing datums {array.size} -> to tsdb from shm\n' + # ) + # await storage.write_ohlcv(fqme, array) + + # profiler('Finished db writes') diff --git a/piker/storage/cli.py b/piker/storage/cli.py new file mode 100644 index 000000000..73cf737ee --- /dev/null +++ b/piker/storage/cli.py @@ -0,0 +1,334 @@ +# piker: trading gear for hackers +# Copyright (C) 2018-present Tyler Goodlet (in stewardship of piker0) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +""" +Storage middle-ware CLIs. + +""" +from __future__ import annotations +from pathlib import Path +import time +from typing import Generator +# from typing import TYPE_CHECKING + +import polars as pl +import numpy as np +import tractor +# import pendulum +from rich.console import Console +import trio +# from rich.markdown import Markdown +import typer + +from piker.service import open_piker_runtime +from piker.cli import cli +from piker.config import get_conf_dir +from piker.data import ( + maybe_open_shm_array, + def_iohlcv_fields, + ShmArray, +) +from piker.data.history import ( + _default_hist_size, + _default_rt_size, +) +from . import ( + log, +) +from . import ( + __tsdbs__, + open_storage_client, +) + + +store = typer.Typer() + + +@store.command() +def ls( + backends: list[str] = typer.Argument( + default=None, + help='Storage backends to query, default is all.' + ), +): + from rich.table import Table + + if not backends: + backends: list[str] = __tsdbs__ + + console = Console() + + async def query_all(): + nonlocal backends + + async with ( + open_piker_runtime( + 'tsdb_storage', + enable_modules=['piker.service._ahab'], + ), + ): + for i, backend in enumerate(backends): + table = Table() + try: + async with open_storage_client(backend=backend) as ( + mod, + client, + ): + table.add_column(f'{mod.name}@{client.address}') + keys: list[str] = await client.list_keys() + for key in keys: + table.add_row(key) + + console.print(table) + except Exception: + log.error(f'Unable to connect to storage engine: `{backend}`') + + trio.run(query_all) + + +@store.command() +def delete( + symbols: list[str], + + backend: str = typer.Option( + default=None, + help='Storage backend to update' + ), + # TODO: expose this as flagged multi-option? + timeframes: list[int] = [1, 60], +): + ''' + Delete a storage backend's time series for (table) keys provided as + ``symbols``. + + ''' + from . import open_storage_client + + async def main(symbols: list[str]): + async with ( + open_piker_runtime( + 'tsdb_storage', + enable_modules=['piker.service._ahab'] + ), + open_storage_client(backend) as (_, client), + trio.open_nursery() as n, + ): + # spawn queries as tasks for max conc! + for fqme in symbols: + for tf in timeframes: + n.start_soon( + client.delete_ts, + fqme, + tf, + ) + + trio.run(main, symbols) + + +@store.command() +def anal( + fqme: str, + period: int = 60, + +) -> np.ndarray: + + async def main(): + async with ( + open_piker_runtime( + 'tsdb_polars_anal', + # enable_modules=['piker.service._ahab'] + ), + open_storage_client() as (mod, client), + ): + syms: list[str] = await client.list_keys() + print(f'{len(syms)} FOUND for {mod.name}') + + ( + history, + first_dt, + last_dt, + ) = await client.load( + fqme, + period, + ) + assert first_dt < last_dt + + src_df = await client.as_df(fqme, period) + from piker.data import _timeseries as tsmod + df = tsmod.with_dts(src_df) + gaps: pl.DataFrame = tsmod.detect_time_gaps(df) + + if gaps: + print(f'Gaps found:\n{gaps}') + + # TODO: something better with tab completion.. + # is there something more minimal but nearly as + # functional as ipython? + await tractor.breakpoint() + + trio.run(main) + + +def iter_dfs_from_shms(fqme: str) -> Generator[ + tuple[Path, ShmArray, pl.DataFrame], + None, + None, +]: + # shm buffer size table based on known sample rates + sizes: dict[str, int] = { + 'hist': _default_hist_size, + 'rt': _default_rt_size, + } + + # load all detected shm buffer files which have the + # passed FQME pattern in the file name. + shmfiles: list[Path] = [] + shmdir = Path('/dev/shm/') + + for shmfile in shmdir.glob(f'*{fqme}*'): + filename: str = shmfile.name + + # skip index files + if ( + '_first' in filename + or '_last' in filename + ): + continue + + assert shmfile.is_file() + log.debug(f'Found matching shm buffer file: {filename}') + shmfiles.append(shmfile) + + for shmfile in shmfiles: + + # lookup array buffer size based on file suffix + # being either .rt or .hist + size: int = sizes[shmfile.name.rsplit('.')[-1]] + + # attach to any shm buffer, load array into polars df, + # write to local parquet file. + shm, opened = maybe_open_shm_array( + key=shmfile.name, + size=size, + dtype=def_iohlcv_fields, + readonly=True, + ) + assert not opened + ohlcv = shm.array + + start = time.time() + + # XXX: thanks to this SO answer for this conversion tip: + # https://stackoverflow.com/a/72054819 + df = pl.DataFrame({ + field_name: ohlcv[field_name] + for field_name in ohlcv.dtype.fields + }) + delay: float = round( + time.time() - start, + ndigits=6, + ) + log.info( + f'numpy -> polars conversion took {delay} secs\n' + f'polars df: {df}' + ) + + yield ( + shmfile, + shm, + df, + ) + + +@store.command() +def ldshm( + fqme: str, + + write_parquet: bool = False, + +) -> None: + ''' + Linux ONLY: load any fqme file name matching shm buffer from + /dev/shm/ into an OHLCV numpy array and polars DataFrame, + optionally write to .parquet file. + + ''' + async def main(): + async with ( + open_piker_runtime( + 'polars_boi', + enable_modules=['piker.data._sharedmem'], + ), + ): + + df: pl.DataFrame | None = None + for shmfile, shm, df in iter_dfs_from_shms(fqme): + + # compute ohlc properties for naming + times: np.ndarray = shm.array['time'] + secs: float = times[-1] - times[-2] + if secs < 1.: + breakpoint() + raise ValueError( + f'Something is wrong with time period for {shm}:\n{times}' + ) + + # TODO: maybe only optionally enter this depending + # on some CLI flags and/or gap detection? + await tractor.breakpoint() + + # write to parquet file? + if write_parquet: + timeframe: str = f'{secs}s' + + datadir: Path = get_conf_dir() / 'nativedb' + if not datadir.is_dir(): + datadir.mkdir() + + path: Path = datadir / f'{fqme}.{timeframe}.parquet' + + # write to fs + start = time.time() + df.write_parquet(path) + delay: float = round( + time.time() - start, + ndigits=6, + ) + log.info( + f'parquet write took {delay} secs\n' + f'file path: {path}' + ) + + # read back from fs + start = time.time() + read_df: pl.DataFrame = pl.read_parquet(path) + delay: float = round( + time.time() - start, + ndigits=6, + ) + print( + f'parquet read took {delay} secs\n' + f'polars df: {read_df}' + ) + + if df is None: + log.error(f'No matching shm buffers for {fqme} ?') + + trio.run(main) + + +typer_click_object = typer.main.get_command(store) +cli.add_command(typer_click_object, 'store') diff --git a/piker/storage/marketstore/__init__.py b/piker/storage/marketstore/__init__.py new file mode 100644 index 000000000..2f0a79703 --- /dev/null +++ b/piker/storage/marketstore/__init__.py @@ -0,0 +1,385 @@ +# piker: trading gear for hackers +# Copyright (C) Tyler Goodlet (in stewardship for pikers) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +marketstore tsdb backend: +https://github.com/alpacahq/marketstore + + +We wrote an async gGRPC client: +https://github.com/pikers/anyio-marketstore + +which is normally preferred minus the discovered issues +in https://github.com/pikers/piker/issues/443 + +Which is the main reason for us moving away from this +platform.. + +''' +from __future__ import annotations +from contextlib import asynccontextmanager as acm +from datetime import datetime +# from pprint import pformat +from typing import ( + Union, +) + +from bidict import bidict +import tractor +import numpy as np +from anyio_marketstore import ( + Params, +) +import pendulum +# import purerpc + +from piker.service.marketstore import ( + MarketstoreClient, + tf_in_1s, + mk_tbk, + _ohlcv_dt, + MarketStoreError, +) +from anyio_marketstore import ( # noqa + open_marketstore_client, + MarketstoreClient, + Params, +) +from piker.log import get_logger +# from .._profile import Profiler + + +log = get_logger(__name__) + + +class MktsStorageClient: + ''' + High level storage api for both real-time and historical ingest. + + ''' + name: str = 'marketstore' + + def __init__( + self, + client: MarketstoreClient, + config: dict, + + ) -> None: + # TODO: eventually this should be an api/interface type that + # ensures we can support multiple tsdb backends. + self.client = client + self._config = config + + # series' cache from tsdb reads + self._arrays: dict[str, np.ndarray] = {} + + @property + def address(self) -> str: + conf = self._config + return f'grpc://{conf["host"]}:{conf["port"]}' + + async def list_keys(self) -> list[str]: + return await self.client.list_symbols() + + async def search_keys(self, pattern: str) -> list[str]: + ''' + Search for time series key in the storage backend. + + ''' + ... + + async def write_ticks(self, ticks: list) -> None: + ... + + async def load( + self, + fqme: str, + timeframe: int, + + ) -> tuple[ + np.ndarray, # timeframe sampled array-series + datetime | None, # first dt + datetime | None, # last dt + ]: + first_tsdb_dt, last_tsdb_dt = None, None + hist = await self.read_ohlcv( + fqme, + # on first load we don't need to pull the max + # history per request size worth. + limit=3000, + timeframe=timeframe, + ) + log.info(f'Loaded tsdb history {hist}') + + if len(hist): + # breakpoint() + times: np.ndarray = hist['Epoch'] + + first, last = times[0], times[-1] + first_tsdb_dt, last_tsdb_dt = map( + pendulum.from_timestamp, + [first, last] + ) + + return ( + hist, # array-data + first_tsdb_dt, # start of query-frame + last_tsdb_dt, # most recent + ) + + async def read_ohlcv( + self, + fqme: str, + timeframe: int | str, + end: float | None = None, # epoch or none + limit: int = int(200e3), + + ) -> np.ndarray: + + client = self.client + syms = await client.list_symbols() + if fqme not in syms: + return {} + + # ensure end time is in correct int format! + if ( + end + and not isinstance(end, float) + ): + end = int(float(end)) + # breakpoint() + + # use the provided timeframe or 1s by default + tfstr = tf_in_1s.get(timeframe, tf_in_1s[1]) + + import pymarketstore as pymkts + sync_client = pymkts.Client() + param = pymkts.Params( + symbols=fqme, + timeframe=tfstr, + attrgroup='OHLCV', + end=end, + + limit=limit, + # limit_from_start=True, + ) + try: + reply = sync_client.query(param) + except Exception as err: + if 'no files returned from query parse: None' in err.args: + return [] + + raise + + data_set: pymkts.results.DataSet = reply.first() + array: np.ndarray = data_set.array + + # params = Params( + # symbols=fqme, + # timeframe=tfstr, + # attrgroup='OHLCV', + # end=end, + # # limit_from_start=True, + + # # TODO: figure the max limit here given the + # # ``purepc`` msg size limit of purerpc: 33554432 + # limit=limit, + # ) + + # for i in range(3): + # try: + # result = await client.query(params) + # break + # except purerpc.grpclib.exceptions.UnknownError as err: + # if 'snappy' in err.args: + # await tractor.breakpoint() + + # # indicate there is no history for this timeframe + # log.exception( + # f'Unknown mkts QUERY error: {params}\n' + # f'{err.args}' + # ) + # else: + # return {} + + # # TODO: it turns out column access on recarrays is actually slower: + # # https://jakevdp.github.io/PythonDataScienceHandbook/02.09-structured-data-numpy.html#RecordArrays:-Structured-Arrays-with-a-Twist + # # it might make sense to make these structured arrays? + # data_set = result.by_symbols()[fqme] + # array = data_set.array + + # XXX: ensure sample rate is as expected + time = data_set.array['Epoch'] + if len(time) > 1: + time_step = time[-1] - time[-2] + ts = tf_in_1s.inverse[data_set.timeframe] + + if time_step != ts: + log.warning( + f'MKTS BUG: wrong timeframe loaded: {time_step}\n' + 'YOUR DATABASE LIKELY CONTAINS BAD DATA FROM AN OLD BUG ' + f'WIPING HISTORY FOR {ts}s' + ) + await tractor.breakpoint() + # await self.delete_ts(fqme, timeframe) + + # try reading again.. + # return await self.read_ohlcv( + # fqme, + # timeframe, + # end, + # limit, + # ) + + return array + + async def delete_ts( + self, + key: str, + timeframe: Union[int, str | None] = None, + fmt: str = 'OHLCV', + + ) -> bool: + + client = self.client + # syms = await client.list_symbols() + # if key not in syms: + # raise KeyError(f'`{key}` table key not found in\n{syms}?') + + tbk = mk_tbk(( + key, + tf_in_1s.get(timeframe, tf_in_1s[60]), + fmt, + )) + return await client.destroy(tbk=tbk) + + async def write_ohlcv( + self, + fqme: str, + ohlcv: np.ndarray, + timeframe: int, + append_and_duplicate: bool = True, + limit: int = int(800e3), + + ) -> None: + # build mkts schema compat array for writing + mkts_dt = np.dtype(_ohlcv_dt) + mkts_array = np.zeros( + len(ohlcv), + dtype=mkts_dt, + ) + # copy from shm array (yes it's this easy): + # https://numpy.org/doc/stable/user/basics.rec.html#assignment-from-other-structured-arrays + mkts_array[:] = ohlcv[[ + 'time', + 'open', + 'high', + 'low', + 'close', + 'volume', + ]] + + m, r = divmod(len(mkts_array), limit) + + tfkey = tf_in_1s[timeframe] + for i in range(m, 1): + to_push = mkts_array[i-1:i*limit] + + # write to db + resp = await self.client.write( + to_push, + tbk=f'{fqme}/{tfkey}/OHLCV', + + # NOTE: will will append duplicates + # for the same timestamp-index. + # TODO: pre-deduplicate? + isvariablelength=append_and_duplicate, + ) + + log.info( + f'Wrote {mkts_array.size} datums to tsdb\n' + ) + + for resp in resp.responses: + err = resp.error + if err: + raise MarketStoreError(err) + + if r: + to_push = mkts_array[m*limit:] + + # write to db + resp = await self.client.write( + to_push, + tbk=f'{fqme}/{tfkey}/OHLCV', + + # NOTE: will will append duplicates + # for the same timestamp-index. + # TODO: pre deduplicate? + isvariablelength=append_and_duplicate, + ) + + log.info( + f'Wrote {mkts_array.size} datums to tsdb\n' + ) + + for resp in resp.responses: + err = resp.error + if err: + raise MarketStoreError(err) + + # XXX: currently the only way to do this is through the CLI: + + # sudo ./marketstore connect --dir ~/.config/piker/data + # >> \show mnq.globex.20220617.ib/1Sec/OHLCV 2022-05-15 + # and this seems to block and use up mem.. + # >> \trim mnq.globex.20220617.ib/1Sec/OHLCV 2022-05-15 + + # relevant source code for this is here: + # https://github.com/alpacahq/marketstore/blob/master/cmd/connect/session/trim.go#L14 + # def delete_range(self, start_dt, end_dt) -> None: + # ... + + +ohlc_key_map = bidict({ + 'Epoch': 'time', + 'Open': 'open', + 'High': 'high', + 'Low': 'low', + 'Close': 'close', + 'Volume': 'volume', +}) + + +@acm +async def get_client( + grpc_port: int = 5995, # required + host: str = 'localhost', + +) -> MarketstoreClient: + ''' + Load a ``anyio_marketstore`` grpc client connected + to an existing ``marketstore`` server. + + ''' + async with open_marketstore_client( + host or 'localhost', + grpc_port, + ) as client: + yield MktsStorageClient( + client, + config={'host': host, 'port': grpc_port}, + ) diff --git a/piker/storage/marketstore/_ingest.py b/piker/storage/marketstore/_ingest.py new file mode 100644 index 000000000..7056399bc --- /dev/null +++ b/piker/storage/marketstore/_ingest.py @@ -0,0 +1,177 @@ +# piker: trading gear for hackers +# Copyright (C) 2018-present Tyler Goodlet (in stewardship of piker0) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +Legacy marketstore ingest and streaming related clis. + +''' +# from .. import watchlists as wl +# from ..service.marketstore import ( + # get_client, + # stream_quotes, + # ingest_quote_stream, + # _url, + # _tick_tbk_ids, + # mk_tbk, +# ) + +# @cli.command() +# @click.option( +# '--url', +# default='ws://localhost:5993/ws', +# help='HTTP URL of marketstore instance' +# ) +# @click.argument('names', nargs=-1) +# @click.pass_obj +# def ms_stream( +# config: dict, +# names: list[str], +# url: str, +# ) -> None: +# ''' +# Connect to a marketstore time bucket stream for (a set of) symbols(s) +# and print to console. + +# ''' +# async def main(): +# # async for quote in stream_quotes(symbols=names): +# # log.info(f"Received quote:\n{quote}") +# ... + +# trio.run(main) + + +# @cli.command() +# @click.option( +# '--url', +# default=_url, +# help='HTTP URL of marketstore instance' +# ) +# @click.argument('names', nargs=-1) +# @click.pass_obj +# def ms_destroy(config: dict, names: list[str], url: str) -> None: +# """Destroy symbol entries in the local marketstore instance. +# """ +# async def main(): +# nonlocal names +# async with get_client(url) as client: +# +# if not names: +# names = await client.list_symbols() +# +# # default is to wipe db entirely. +# answer = input( +# "This will entirely wipe you local marketstore db @ " +# f"{url} of the following symbols:\n {pformat(names)}" +# "\n\nDelete [N/y]?\n") +# +# if answer == 'y': +# for sym in names: +# # tbk = _tick_tbk.format(sym) +# tbk = tuple(sym, *_tick_tbk_ids) +# print(f"Destroying {tbk}..") +# await client.destroy(mk_tbk(tbk)) +# else: +# print("Nothing deleted.") +# +# tractor.run(main) + + +# @cli.command() +# @click.option( +# '--tsdb_host', +# default='localhost' +# ) +# @click.option( +# '--tsdb_port', +# default=5993 +# ) +# @click.argument('symbols', nargs=-1) +# @click.pass_obj +# def storesh( +# config, +# tl, +# host, +# port, +# symbols: list[str], +# ): +# ''' +# Start an IPython shell ready to query the local marketstore db. + +# ''' +# from piker.storage import open_tsdb_client +# from piker.service import open_piker_runtime + +# async def main(): +# nonlocal symbols + +# async with open_piker_runtime( +# 'storesh', +# enable_modules=['piker.service._ahab'], +# ): +# symbol = symbols[0] + +# async with open_tsdb_client(symbol): +# # TODO: ask if user wants to write history for detected +# # available shm buffers? +# from tractor.trionics import ipython_embed +# await ipython_embed() + +# trio.run(main) + + +# @cli.command() +# @click.option('--test-file', '-t', help='Test quote stream file') +# @click.option('--tl', is_flag=True, help='Enable tractor logging') +# @click.argument('name', nargs=1, required=True) +# @click.pass_obj +# def ingest(config, name, test_file, tl): +# ''' +# Ingest real-time broker quotes and ticks to a marketstore instance. + +# ''' +# # global opts +# loglevel = config['loglevel'] +# tractorloglevel = config['tractorloglevel'] +# # log = config['log'] + +# watchlist_from_file = wl.ensure_watchlists(config['wl_path']) +# watchlists = wl.merge_watchlist(watchlist_from_file, wl._builtins) +# symbols = watchlists[name] + +# grouped_syms = {} +# for sym in symbols: +# symbol, _, provider = sym.rpartition('.') +# if provider not in grouped_syms: +# grouped_syms[provider] = [] + +# grouped_syms[provider].append(symbol) + +# async def entry_point(): +# async with tractor.open_nursery() as n: +# for provider, symbols in grouped_syms.items(): +# await n.run_in_actor( +# ingest_quote_stream, +# name='ingest_marketstore', +# symbols=symbols, +# brokername=provider, +# tries=1, +# actorloglevel=loglevel, +# loglevel=tractorloglevel +# ) + +# tractor.run(entry_point) + diff --git a/piker/storage/nativedb.py b/piker/storage/nativedb.py new file mode 100644 index 000000000..274bf0399 --- /dev/null +++ b/piker/storage/nativedb.py @@ -0,0 +1,402 @@ +# piker: trading gear for hackers +# Copyright (C) Tyler Goodlet (in stewardship for pikers) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +`nativedb`: a lulzy Apache-parquet file manager (that some might + call a poor man's tsdb). + +AKA a `piker`-native file-system native "time series database" +without needing an extra process and no standard TSDB features, YET! + +''' +# TODO: like there's soo much.. +# - better name like "parkdb" or "nativedb" (lel)? bundle this lib with +# others to make full system: +# - tractor for failover and reliablity? +# - borg for replication and sync? +# +# - use `fastparquet` for appends: +# https://fastparquet.readthedocs.io/en/latest/api.html#fastparquet.write +# (presuming it's actually faster then overwrites and +# makes sense in terms of impl?) +# +# - use `polars` support for lazy scanning, processing and schema +# validation? +# - https://pola-rs.github.io/polars-book/user-guide/io/parquet/#scan +# - https://pola-rs.github.io/polars-book/user-guide/concepts/lazy-vs-eager/ +# - consider delta writes for appends? +# - https://github.com/pola-rs/polars/blob/main/py-polars/polars/dataframe/frame.py#L3232 +# - consider multi-file appends with appropriate time-range naming? +# - https://pola-rs.github.io/polars-book/user-guide/io/multiple/ +# +# - use `borg` for replication? +# - https://borgbackup.readthedocs.io/en/stable/quickstart.html#remote-repositories +# - https://github.com/borgbackup/borg +# - https://borgbackup.readthedocs.io/en/stable/faq.html#usage-limitations +# - https://github.com/borgbackup/community +# - https://github.com/spslater/borgapi +# - https://nixos.wiki/wiki/ZFS +from __future__ import annotations +from contextlib import asynccontextmanager as acm +from datetime import datetime +from pathlib import Path +import time + +# from bidict import bidict +# import tractor +import numpy as np +import polars as pl +from pendulum import ( + from_timestamp, +) + +from piker import config +from piker.data import def_iohlcv_fields +from piker.data import ShmArray +from piker.log import get_logger +# from .._profile import Profiler + + +log = get_logger('storage.nativedb') + + +# NOTE: thanks to this SO answer for the below conversion routines +# to go from numpy struct-arrays to polars dataframes and back: +# https://stackoverflow.com/a/72054819 +def np2pl(array: np.ndarray) -> pl.DataFrame: + return pl.DataFrame({ + field_name: array[field_name] + for field_name in array.dtype.fields + }) + + +def pl2np( + df: pl.DataFrame, + dtype: np.dtype, + +) -> np.ndarray: + + # Create numpy struct array of the correct size and dtype + # and loop through df columns to fill in array fields. + array = np.empty( + df.height, + dtype, + ) + for field, col in zip( + dtype.fields, + df.columns, + ): + array[field] = df.get_column(col).to_numpy() + + return array + + +def detect_period(shm: ShmArray) -> float: + ''' + Attempt to detect the series time step sampling period + in seconds. + + ''' + # TODO: detect sample rate helper? + # calc ohlc sample period for naming + ohlcv: np.ndarray = shm.array + times: np.ndarray = ohlcv['time'] + period: float = times[-1] - times[-2] + if period == 0: + # maybe just last sample is borked? + period: float = times[-2] - times[-3] + + return period + + +def mk_ohlcv_shm_keyed_filepath( + fqme: str, + period: float, # ow known as the "timeframe" + datadir: Path, + +) -> str: + + if period < 1.: + raise ValueError('Sample period should be >= 1.!?') + + period_s: str = f'{period}s' + path: Path = datadir / f'{fqme}.ohlcv{period_s}.parquet' + return path + + +def unpack_fqme_from_parquet_filepath(path: Path) -> str: + + filename: str = str(path.name) + fqme, fmt_descr, suffix = filename.split('.') + assert suffix == 'parquet' + return fqme + + +ohlc_key_map = None + + +class NativeStorageClient: + ''' + High level storage api for OHLCV time series stored in + a (modern) filesystem as apache parquet files B) + + Part of a grander scheme to use arrow and parquet as our main + lowlevel data framework: https://arrow.apache.org/faq/. + + ''' + name: str = 'nativedb' + + def __init__( + self, + datadir: Path, + + ) -> None: + self._datadir = datadir + self._index: dict[str, dict] = {} + + # series' cache from tsdb reads + self._dfs: dict[str, dict[str, pl.DataFrame]] = {} + + @property + def address(self) -> str: + return self._datadir.as_uri() + + @property + def cardinality(self) -> int: + return len(self._index) + + # @property + # def compression(self) -> str: + # ... + + async def list_keys(self) -> list[str]: + return list(self._index) + + def index_files(self): + for path in self._datadir.iterdir(): + if path.name in {'borked', 'expired',}: + continue + + key: str = path.name.rstrip('.parquet') + fqme, _, descr = key.rpartition('.') + prefix, _, suffix = descr.partition('ohlcv') + period: int = int(suffix.strip('s')) + + # cache description data + self._index[fqme] = { + 'path': path, + 'period': period, + } + + return self._index + + + # async def search_keys(self, pattern: str) -> list[str]: + # ''' + # Search for time series key in the storage backend. + + # ''' + # ... + + # async def write_ticks(self, ticks: list) -> None: + # ... + + async def load( + self, + fqme: str, + timeframe: int, + + ) -> tuple[ + np.ndarray, # timeframe sampled array-series + datetime | None, # first dt + datetime | None, # last dt + ] | None: + try: + array: np.ndarray = await self.read_ohlcv( + fqme, + timeframe, + ) + except FileNotFoundError: + return None + + times = array['time'] + return ( + array, + from_timestamp(times[0]), + from_timestamp(times[-1]), + ) + + def mk_path( + self, + fqme: str, + period: float, + ) -> Path: + return mk_ohlcv_shm_keyed_filepath( + fqme=fqme, + period=period, + datadir=self._datadir, + ) + + async def read_ohlcv( + self, + fqme: str, + timeframe: int | str, + end: float | None = None, # epoch or none + # limit: int = int(200e3), + + ) -> np.ndarray: + path: Path = self.mk_path(fqme, period=int(timeframe)) + df: pl.DataFrame = pl.read_parquet(path) + self._dfs.setdefault(timeframe, {})[fqme] = df + + # TODO: filter by end and limit inputs + # times: pl.Series = df['time'] + array: np.ndarray = pl2np( + df, + dtype=np.dtype(def_iohlcv_fields), + ) + return array + + async def as_df( + self, + fqme: str, + period: int = 60, + + ) -> pl.DataFrame: + try: + return self._dfs[period][fqme] + except KeyError: + await self.read_ohlcv(fqme, period) + return self._dfs[period][fqme] + + def _write_ohlcv( + self, + fqme: str, + ohlcv: np.ndarray | pl.DataFrame, + timeframe: int, + + ) -> Path: + ''' + Sync version of the public interface meth, since we don't + currently actually need or support an async impl. + + ''' + path: Path = mk_ohlcv_shm_keyed_filepath( + fqme=fqme, + period=timeframe, + datadir=self._datadir, + ) + if isinstance(ohlcv, np.ndarray): + df: pl.DataFrame = np2pl(ohlcv) + else: + df = ohlcv + + # TODO: in terms of managing the ultra long term data + # - use a proper profiler to measure all this IO and + # roundtripping! + # - try out ``fastparquet``'s append writing: + # https://fastparquet.readthedocs.io/en/latest/api.html#fastparquet.write + start = time.time() + df.write_parquet(path) + delay: float = round( + time.time() - start, + ndigits=6, + ) + print( + f'parquet write took {delay} secs\n' + f'file path: {path}' + ) + return path + + + async def write_ohlcv( + self, + fqme: str, + ohlcv: np.ndarray, + timeframe: int, + + ) -> Path: + ''' + Write input ohlcv time series for fqme and sampling period + to (local) disk. + + ''' + return self._write_ohlcv( + fqme, + ohlcv, + timeframe, + ) + + async def delete_ts( + self, + key: str, + timeframe: int | None = None, + + ) -> bool: + path: Path = mk_ohlcv_shm_keyed_filepath( + fqme=key, + period=timeframe, + datadir=self._datadir, + ) + if path.is_file(): + path.unlink() + log.warning(f'Deleting parquet entry:\n{path}') + else: + log.error(f'No path exists:\n{path}') + + return path + + # TODO: allow wiping and refetching a segment of the OHLCV timeseries + # data. + # def clear_range( + # self, + # key: str, + # start_dt: datetime, + # end_dt: datetime, + # timeframe: int | None = None, + # ) -> pl.DataFrame: + # ''' + # Clear and re-fetch a range of datums for the OHLCV time series. + + # Useful for series editing from a chart B) + + # ''' + # ... + + +@acm +async def get_client( + + # TODO: eventually support something something apache arrow + # transport over ssh something..? + # host: str | None = None, + + **kwargs, + +) -> NativeStorageClient: + ''' + Load a ``anyio_marketstore`` grpc client connected + to an existing ``marketstore`` server. + + ''' + datadir: Path = config.get_conf_dir() / 'nativedb' + if not datadir.is_dir(): + log.info(f'Creating `nativedb` director: {datadir}') + datadir.mkdir() + + client = NativeStorageClient(datadir) + client.index_files() + yield client diff --git a/piker/ui/_app.py b/piker/ui/_app.py index ee4faf570..644e7567d 100644 --- a/piker/ui/_app.py +++ b/piker/ui/_app.py @@ -185,7 +185,7 @@ def _main( func=_async_main, args=( syms, - {mod.name: mod for mod in brokermods}, + brokermods, piker_loglevel, ), main_widget_type=GodWidget, diff --git a/piker/ui/_axes.py b/piker/ui/_axes.py index 470df3f5e..7ed3d5cbb 100644 --- a/piker/ui/_axes.py +++ b/piker/ui/_axes.py @@ -215,13 +215,17 @@ def set_title( # self.setLabel(title) # self.showLabel() - label = self.title = Label( - view=view or self.linkedView(), - fmt_str=title, - color=color or self.text_color, - parent=self, - # update_on_range_change=False, - ) + label: Label | None = self.title + if label is None: + label = self.title = Label( + view=view or self.linkedView(), + fmt_str=title, + color=color or self.text_color, + parent=self, + # update_on_range_change=False, + ) + else: + label.fmt_str: str = title def below_axis() -> QPointF: return QPointF( diff --git a/piker/ui/_chart.py b/piker/ui/_chart.py index 21ef0bcb7..1b410164b 100644 --- a/piker/ui/_chart.py +++ b/piker/ui/_chart.py @@ -1121,8 +1121,23 @@ def overlay_plotitem( # add axis title # TODO: do we want this API to still work? # raxis = pi.getAxis('right') - axis = self.pi_overlay.get_axis(pi, axis_side) - axis.set_title(axis_title or name, view=pi.getViewBox()) + overlay: PlotItemOverlay = self.pi_overlay + + # Whenever overlays exist always add a y-axis label to the + # main axis as well! + for name, axis_info in self.plotItem.axes.items(): + axis = axis_info['item'] + if isinstance(axis, PriceAxis): + axis.set_title(self.linked.mkt.pair()) + + axis: PriceAxis = overlay.get_axis( + pi, + axis_side, + ) + axis.set_title( + axis_title or name, + view=pi.getViewBox(), + ) return pi @@ -1213,11 +1228,13 @@ def draw_curve( if add_sticky: - if pi is not self.plotItem: + main_pi: pgo.PlotItem = self.plotItem + if pi is not main_pi: # overlay = self.pi_overlay # assert pi in overlay.overlays overlay = self.pi_overlay assert pi in overlay.overlays + assert main_pi is overlay.root_plotitem axis = overlay.get_axis( pi, add_sticky, diff --git a/piker/ui/_cursor.py b/piker/ui/_cursor.py index 0a2c82b1b..f69f503a5 100644 --- a/piker/ui/_cursor.py +++ b/piker/ui/_cursor.py @@ -215,8 +215,8 @@ def update_from_ohlc( "H:{}
" "L:{}
" "C:{}
" - "V:{}
" - "wap:{}".format( + "V:{}
".format( + # "wap:{}".format( *array[ix][ [ 'time', @@ -225,7 +225,7 @@ def update_from_ohlc( 'low', 'close', 'volume', - 'bar_wap', + # 'bar_wap', ] ], # name=name, diff --git a/piker/ui/_dataviz.py b/piker/ui/_dataviz.py index a24c7d5c3..9da45f448 100644 --- a/piker/ui/_dataviz.py +++ b/piker/ui/_dataviz.py @@ -49,7 +49,7 @@ OHLCBarsAsCurveFmtr, # OHLC converted to line StepCurveFmtr, # "step" curve (like for vlm) ) -from ..data._pathops import ( +from ..data._timeseries import ( slice_from_time, ) from ._ohlc import ( @@ -371,8 +371,8 @@ def index_step( # the source data. if self._index_step is None: - index = self.shm.array[self.index_field] - isample = index[:16] + index: np.ndarray = self.shm.array[self.index_field] + isample: np.ndarray = index[-16:] mxdiff: None | float = None for step in np.diff(isample): @@ -752,6 +752,7 @@ def update_graphics( profiler: Profiler | None = None, do_append: bool = True, + force_redraw: bool = False, **kwargs, @@ -796,7 +797,7 @@ def update_graphics( graphics, ) - should_redraw: bool = False + should_redraw: bool = force_redraw or False ds_allowed: bool = True # guard for m4 activation # TODO: probably specialize ``Renderer`` types instead of @@ -906,6 +907,11 @@ def update_graphics( should_ds=should_ds, showing_src_data=showing_src_data, + # XXX: reallocate entire underlying "format graphics array" + # whenever the caller insists, such as on history + # backfills. + force_reformat=force_redraw, + do_append=do_append, ) @@ -925,6 +931,7 @@ def update_graphics( reset_cache = False if ( reset_cache + or should_redraw ): # assign output paths to graphicis obj but # after a coords-cache reset. diff --git a/piker/ui/_display.py b/piker/ui/_display.py index c747eb311..1884d018b 100644 --- a/piker/ui/_display.py +++ b/piker/ui/_display.py @@ -33,7 +33,6 @@ import trio import pyqtgraph as pg # import pendulum - from msgspec import field # from .. import brokers @@ -51,6 +50,7 @@ ) from ..data._sampling import ( _tick_groups, + _auction_ticks, open_sample_stream, ) from ._axes import YAxisLabel @@ -204,15 +204,17 @@ class DisplayState(Struct): vlm_chart: ChartPlotWidget | None = None vlm_sticky: YAxisLabel | None = None - wap_in_history: bool = False async def increment_history_view( + # min_istream: tractor.MsgStream, ds: DisplayState, ): - hist_chart = ds.hist_chart - hist_viz = ds.hist_viz + hist_chart: ChartPlotWidget = ds.hist_chart + hist_viz: Viz = ds.hist_viz + viz: Viz = ds.viz assert 'hist' in hist_viz.shm.token['shm_name'] + name: str = hist_viz.name # TODO: seems this is more reliable at keeping the slow # chart incremented in view more correctly? @@ -221,8 +223,14 @@ async def increment_history_view( # wakeups/ctx switches verus logic checks (as normal) # - we need increment logic that only does the view shift # call when the uppx permits/needs it - async with open_sample_stream(1.) as istream: - async for msg in istream: + + # draw everything from scratch on first entry! + for curve_name, hist_viz in hist_chart._vizs.items(): + log.info(f'Forcing hard redraw -> {curve_name}') + hist_viz.update_graphics(force_redraw=True) + + async with open_sample_stream(1.) as min_istream: + async for msg in min_istream: profiler = Profiler( msg=f'History chart cycle for: `{ds.fqme}`', @@ -232,12 +240,28 @@ async def increment_history_view( # ms_threshold=4, ) - # l3 = ds.viz.shm.array[-3:] - # print( - # f'fast step for {ds.flume.mkt.fqme}:\n' - # f'{list(l3["time"])}\n' - # f'{l3}\n' - # ) + # NOTE: when a backfill msg is broadcast from the + # history mgmt layer, we match against the equivalent + # `Viz` and "hard re-render" (i.e. re-allocate the + # in-mem xy-array formats managed in + # `.data._formatters) its curve graphics to fill + # on-chart gaps. + # TODO: specifically emit/handle range tuples? + # - samplerd could emit the actual update range via + # tuple and then we only enter the below block if that + # range is detected as in-view? + if ( + (bf_wut := msg.get('backfilling', False)) + ): + viz_name, timeframe = bf_wut + if viz_name == name: + log.info(f'Forcing hard redraw -> {name}@{timeframe}') + match timeframe: + case 60: + hist_viz.update_graphics(force_redraw=True) + case 1: + viz.update_graphics(force_redraw=True) + # check if slow chart needs an x-domain shift and/or # y-range resize. ( @@ -272,7 +296,7 @@ async def increment_history_view( hist_chart.increment_view(datums=append_diff) profiler('hist tread view') - profiler.finish() + profiler.finish() async def graphics_update_loop( @@ -280,8 +304,9 @@ async def graphics_update_loop( nurse: trio.Nursery, godwidget: GodWidget, feed: Feed, + # min_istream: tractor.MsgStream, + pis: dict[str, list[pgo.PlotItem, pgo.PlotItem]] = {}, - wap_in_history: bool = False, vlm_charts: dict[str, ChartPlotWidget] = {}, ) -> None: @@ -429,11 +454,13 @@ async def graphics_update_loop( nurse.start_soon( increment_history_view, + # min_istream, ds, ) + await trio.sleep(0) if ds.hist_vars['i_last'] < ds.hist_vars['i_last_append']: - breakpoint() + await tractor.breakpoint() # main real-time quotes update loop stream: tractor.MsgStream @@ -459,7 +486,6 @@ async def graphics_update_loop( for fqme, quote in quotes.items(): ds = dss[fqme] ds.quotes = quote - rt_pi, hist_pi = pis[fqme] # chart isn't active/shown so skip render cycle and @@ -490,19 +516,24 @@ def graphics_update_cycle( ds: DisplayState, quote: dict, - wap_in_history: bool = False, trigger_all: bool = False, # flag used by prepend history updates prepend_update_index: int | None = None, + # NOTE: this has to be manually turned on in code (or by + # caller) to get profiling since by default we never want the + # overhead! + debug_n_trace: bool = False, + ) -> None: - profiler = Profiler( - msg=f'Graphics loop cycle for: `{ds.fqme}`', - disabled=not pg_profile_enabled(), - ms_threshold=ms_slower_then, - delayed=True, - # ms_threshold=4, - ) + if debug_n_trace: + profiler = Profiler( + msg=f'Graphics loop cycle for: `{ds.fqme}`', + disabled=not pg_profile_enabled(), + ms_threshold=ms_slower_then, + delayed=True, + # ms_threshold=4, + ) # TODO: SPEEDing this all up.. # - optimize this whole graphics stack with ``numba`` hopefully @@ -534,7 +565,8 @@ def graphics_update_cycle( do_rt_update, should_tread, ) = main_viz.incr_info(ds=ds) - profiler('`.incr_info()`') + if debug_n_trace: + profiler('`.incr_info()`') # TODO: we should only run mxmn when we know # an update is due via ``do_px_step`` above. @@ -572,7 +604,8 @@ def graphics_update_cycle( # since .interact_graphics_cycle() also calls it? # I guess we can add a guard in there? _, i_read_range, _ = main_viz.update_graphics() - profiler('`Viz.update_graphics()` call') + if debug_n_trace: + profiler('`Viz.update_graphics()` call') # don't real-time "shift" the curve to the # left unless we get one of the following: @@ -587,7 +620,8 @@ def graphics_update_cycle( # if vlm_chart: # vlm_chart.increment_view(datums=append_diff) - profiler('view incremented') + if debug_n_trace: + profiler('view incremented') # NOTE: do this **after** the tread to ensure we take the yrange # from the most current view x-domain. @@ -599,16 +633,25 @@ def graphics_update_cycle( i_read_range, main_viz, ds.vlm_viz, - profiler, + profiler if debug_n_trace else None, ) - profiler(f'{fqme} `multi_maxmin()` call') + if debug_n_trace: + profiler(f'{fqme} `multi_maxmin()` call') # iterate frames of ticks-by-type such that we only update graphics # using the last update per type where possible. ticks_by_type = quote.get('tbt', {}) for typ, ticks in ticks_by_type.items(): + if typ not in _auction_ticks: + if debug_n_trace: + log.warning( + 'Skipping non-auction-native `{typ}` ticks:\n' + f'{ticks}\n' + ) + continue + # NOTE: ticks are `.append()`-ed to the `ticks_by_type: dict` by the # `._sampling.uniform_rate_send()` loop tick = ticks[-1] # get most recent value @@ -628,16 +671,18 @@ def graphics_update_cycle( if ( price < mn ): + if debug_n_trace: + log.info(f'{this_viz.name} new MN from TICK {mn} -> {price}') mn = price yrange_margin = 0.16 - # # print(f'{this_viz.name} new MN from TICK {mn}') if ( price > mx ): + if debug_n_trace: + log.info(f'{this_viz.name} new MX from TICK {mx} -> {price}') mx = price yrange_margin = 0.16 - # # print(f'{this_viz.name} new MX from TICK {mx}') # mx = max(price, mx) # mn = min(price, mn) @@ -655,10 +700,6 @@ def graphics_update_cycle( ds.last_price_sticky.update_from_data(*end_ic) ds.hist_last_price_sticky.update_from_data(*end_ic) - # update vwap overlay line - # if wap_in_history: - # chart.get_viz('bar_wap').update_graphics() - # update OHLC chart last bars # TODO: fix the only last uppx stuff.... main_viz.draw_last() # only_last_uppx=True) @@ -699,7 +740,8 @@ def graphics_update_cycle( ): l1.bid_label.update_fields({'level': price, 'size': size}) - profiler('L1 labels updates') + if debug_n_trace: + profiler('L1 labels updates') # Y-autoranging: adjust y-axis limits based on state tracking # of previous "last" L1 values which are in view. @@ -717,9 +759,14 @@ def graphics_update_cycle( # complain about out-of-range outliers which can show up # in certain annoying feeds (like ib).. if ( - abs(mx_diff) > .25 * lmx - or - abs(mn_diff) > .25 * lmn + lmx + and lmn + and ( + abs(mx_diff) > .25 * lmx + or + abs(mn_diff) > .25 * lmn + ) + and debug_n_trace ): log.error( f'WTF MN/MX IS WAY OFF:\n' @@ -730,6 +777,9 @@ def graphics_update_cycle( f'mx_diff: {mx_diff}\n' f'mn_diff: {mn_diff}\n' ) + chart.pause_all_feeds() + breakpoint() + chart.resume_all_feeds() # TODO: track local liv maxmin without doing a recompute all the # time..plus, just generally the user is more likely to be @@ -772,7 +822,8 @@ def graphics_update_cycle( }, } ) - profiler('main vb y-autorange') + if debug_n_trace: + profiler('main vb y-autorange') # SLOW CHART y-auto-range resize casd # (NOTE: still is still inside the y-range @@ -800,7 +851,8 @@ def graphics_update_cycle( # f'datetime: {dt}\n' # ) - # profiler('hist `Viz.incr_info()`') + # if debug_n_trace: + # profiler('hist `Viz.incr_info()`') # hist_chart = ds.hist_chart # if ( @@ -856,8 +908,8 @@ def graphics_update_cycle( # `draw_last_datum()` .. only_last_uppx=True, ) - - profiler('overlays updates') + if debug_n_trace: + profiler('overlays updates') # volume chart logic.. # TODO: can we unify this with the above loop? @@ -905,7 +957,8 @@ def graphics_update_cycle( # connected to update accompanying overlay # graphics.. ) - profiler('`main_vlm_viz.update_graphics()`') + if debug_n_trace: + profiler('`main_vlm_viz.update_graphics()`') if ( mx_vlm_in_view @@ -928,7 +981,8 @@ def graphics_update_cycle( }, }, ) - profiler('`vlm_chart.view.interact_graphics_cycle()`') + if debug_n_trace: + profiler('`vlm_chart.view.interact_graphics_cycle()`') # update all downstream FSPs for curve_name, viz in vlm_vizs.items(): @@ -948,7 +1002,8 @@ def graphics_update_cycle( curve_name, array_key=curve_name, ) - profiler(f'vlm `Viz[{viz.name}].update_graphics()`') + if debug_n_trace: + profiler(f'vlm `Viz[{viz.name}].update_graphics()`') # is this even doing anything? # (pretty sure it's the real-time @@ -960,9 +1015,10 @@ def graphics_update_cycle( # do_linked_charts=False, # do_overlay_scaling=False, # ) - profiler( - f'Viz[{viz.name}].plot.vb.interact_graphics_cycle()`' - ) + if debug_n_trace: + profiler( + f'Viz[{viz.name}].plot.vb.interact_graphics_cycle()`' + ) # even if we're downsampled bigly # draw the last datum in the final @@ -976,11 +1032,14 @@ def graphics_update_cycle( # always update the last datum-element # graphic for all vizs viz.draw_last(array_key=curve_name) - profiler(f'vlm `Viz[{viz.name}].draw_last()`') + if debug_n_trace: + profiler(f'vlm `Viz[{viz.name}].draw_last()`') - profiler('vlm Viz all updates complete') + if debug_n_trace: + profiler('vlm Viz all updates complete') - profiler.finish() + if debug_n_trace: + profiler.finish() async def link_views_with_region( @@ -1214,12 +1273,15 @@ async def display_symbol_data( ) feed: Feed - async with open_feed( - fqmes, - loglevel=loglevel, - tick_throttle=cycles_per_feed, - - ) as feed: + async with ( + # open_sample_stream(1.) as min_istream, + open_feed( + fqmes, + loglevel=loglevel, + tick_throttle=cycles_per_feed, + + ) as feed, + ): # use expanded contract symbols passed back from feed layer. fqmes = list(feed.flumes.keys()) @@ -1289,7 +1351,7 @@ async def display_symbol_data( hist_ohlcv: ShmArray = flume.hist_shm mkt: MktPair = flume.mkt - fqme = mkt.fqme + fqme: str = mkt.fqme hist_chart = hist_linked.plot_ohlc_main( mkt, @@ -1356,21 +1418,6 @@ async def display_symbol_data( loglevel, ) - # XXX: FOR SOME REASON THIS IS CAUSING HANGZ!?! - # plot historical vwap if available - wap_in_history = False - # if ( - # brokermod._show_wap_in_history - # and 'bar_wap' in bars.dtype.fields - # ): - # wap_in_history = True - # rt_chart.draw_curve( - # name='bar_wap', - # shm=ohlcv, - # color='default_light', - # add_label=False, - # ) - godwidget.resize_all() await trio.sleep(0) @@ -1386,7 +1433,7 @@ async def display_symbol_data( hist_pi = hist_chart.overlay_plotitem( name=fqme, - axis_title=fqme, + axis_title=flume.mkt.pair(), ) hist_viz = hist_chart.draw_curve( @@ -1416,7 +1463,7 @@ async def display_symbol_data( rt_pi = rt_chart.overlay_plotitem( name=fqme, - axis_title=fqme, + axis_title=flume.mkt.pair(), ) rt_viz = rt_chart.draw_curve( @@ -1491,8 +1538,9 @@ async def display_symbol_data( ln, godwidget, feed, + # min_istream, + pis, - wap_in_history, vlm_charts, ) diff --git a/piker/ui/_fsp.py b/piker/ui/_fsp.py index b4aa2b106..5202ea975 100644 --- a/piker/ui/_fsp.py +++ b/piker/ui/_fsp.py @@ -30,14 +30,14 @@ ) import msgspec -import tractor import pyqtgraph as pg +import tractor +from tractor.trionics import maybe_open_context import trio from trio_typing import TaskStatus from piker.data.types import Struct from ._axes import PriceAxis -from .._cacheables import maybe_open_context from ..calc import humanize from ..data._sharedmem import ( ShmArray, @@ -377,7 +377,7 @@ def __init__( # TODO: make this a `.src_flume` and add # a `dst_flume`? # (=> but then wouldn't this be the most basic `Viz`?) - self.flume = flume + self.flume: Flume = flume def rr_next_portal(self) -> tractor.Portal: name, portal = next(self._rr_next_actor) @@ -479,9 +479,15 @@ async def start_engine_task( fqme: str = src_mkt.get_fqme(delim_char='') # allocate an output shm array + + # NOTE: rn we assume the HFT 1s period chart + # is always used! + src_shm: ShmArray = self.flume._rt_shm + key, dst_shm, opened = maybe_mk_fsp_shm( fqme, target=target, + size=src_shm._token.size, readonly=True, ) diff --git a/piker/ui/_position.py b/piker/ui/_position.py index a2e6c19ea..0cf181369 100644 --- a/piker/ui/_position.py +++ b/piker/ui/_position.py @@ -294,7 +294,10 @@ def apply_setting( f'limit must > then current pp: {dsize}' ) # reset position size value - alloc.currency_limit = dsize + alloc.currency_limit = round( + dsize, + ndigits=3, + ) return False alloc.currency_limit = value diff --git a/piker/ui/_render.py b/piker/ui/_render.py index fb41b696b..2a442e987 100644 --- a/piker/ui/_render.py +++ b/piker/ui/_render.py @@ -136,6 +136,7 @@ def render( do_append: bool = True, use_fpath: bool = True, + force_reformat: bool = False, # only render datums "in view" of the ``ChartView`` use_vr: bool = True, @@ -174,6 +175,7 @@ def render( profiler, slice_to_inview=use_vr, + force_full_realloc=force_reformat, ) # no history in view case @@ -222,7 +224,10 @@ def render( or should_redraw ): # print(f"{self.viz.name} -> REDRAWING BRUH") - if new_sample_rate and showing_src_data: + if ( + new_sample_rate + and showing_src_data + ): log.info(f'DE-downsampling -> {array_key}') self._in_ds = False diff --git a/piker/ui/order_mode.py b/piker/ui/order_mode.py index 2cd22610c..0b3a18970 100644 --- a/piker/ui/order_mode.py +++ b/piker/ui/order_mode.py @@ -31,6 +31,7 @@ ) import uuid +from bidict import bidict import tractor import trio from PyQt5.QtCore import Qt @@ -601,50 +602,65 @@ def on_cancel( ) def cancel_orders_under_cursor(self) -> list[str]: - return self.cancel_orders_from_lines( - self.lines.lines_under_cursor() - ) - - def cancel_all_orders(self) -> list[str]: - ''' - Cancel all orders for the current chart. - - ''' - return self.cancel_orders_from_lines( - self.lines.all_lines() + return self.cancel_orders( + self.oids_from_lines( + self.lines.lines_under_cursor() + ) ) - def cancel_orders_from_lines( + def oids_from_lines( self, lines: list[LevelLine], - ) -> list[str]: + ) -> list[Dialog]: - ids: list = [] - if lines: - key = self.multistatus.open_status( - f'cancelling {len(lines)} orders', - final_msg=f'cancelled {len(lines)} orders', - group_key=True - ) + oids: set[str] = set() + for line in lines: + dialog: Dialog = getattr(line, 'dialog', None) + oid: str = dialog.uuid + if ( + dialog + and oid not in oids + ): + oids.add(oid) + + return oids - # cancel all active orders and triggers - for line in lines: - dialog = getattr(line, 'dialog', None) + def cancel_orders( + self, + oids: list[str], - if dialog: - oid = dialog.uuid + ) -> None: + ''' + Cancel all orders from a list of order ids: `oids`. - cancel_status_close = self.multistatus.open_status( - f'cancelling order {oid}', - group_key=key, - ) - dialog.last_status_close = cancel_status_close + ''' + key = self.multistatus.open_status( + f'cancelling {len(oids)} orders', + final_msg=f'cancelled orders:\n{oids}', + group_key=True + ) + for oid in oids: + dialog: Dialog = self.dialogs[oid] + self.client.cancel_nowait(uuid=oid) + cancel_status_close = self.multistatus.open_status( + f'cancelling order {oid}', + group_key=key, + ) + dialog.last_status_close = cancel_status_close - ids.append(oid) - self.client.cancel_nowait(uuid=oid) + def cancel_all_orders(self) -> None: + ''' + Cancel all unique orders / executions by extracting unique + order ids from all order lines and then submitting cancel + requests for each dialog. - return ids + ''' + return self.cancel_orders( + self.oids_from_lines( + self.lines.all_lines() + ) + ) def load_unknown_dialog_from_msg( self, @@ -750,7 +766,7 @@ async def open_order_mode( trackers: dict[str, PositionTracker] = {} # load account names from ``brokers.toml`` - accounts_def = config.load_accounts( + accounts_def: bidict[str, str | None] = config.load_accounts( providers=[mkt.broker], ) @@ -1127,16 +1143,21 @@ async def process_trade_msg( case Status(resp='fill'): # handle out-of-piker fills reporting? - order: Order = client._sent_orders.get(oid) - if not order: + order: Order | None + if not (order := client._sent_orders.get(oid)): + + # set it from last known request msg log.warning(f'order {oid} is unknown') order = msg.req - action = order.action - details = msg.brokerd_msg + # XXX TODO: have seen order be a dict here!? + # that should never happen tho? + action: str = order.action + details: dict = msg.brokerd_msg - # TODO: put the actual exchange timestamp? - # TODO: some kinda progress system? + # TODO: state tracking: + # - put the actual exchange timestamp? + # - some kinda progress system? # NOTE: currently the ``kraken`` openOrders sub # doesn't deliver their engine timestamp as part of diff --git a/piker/ui/view_mode.py b/piker/ui/view_mode.py index ecb62557a..d785c67a2 100644 --- a/piker/ui/view_mode.py +++ b/piker/ui/view_mode.py @@ -19,6 +19,7 @@ ''' from __future__ import annotations +from operator import itemgetter from typing import ( Any, Literal, @@ -30,7 +31,7 @@ import pyqtgraph as pg from ..data.types import Struct -from ..data._pathops import slice_from_time +from ..data._timeseries import slice_from_time from ..log import get_logger from .._profile import Profiler @@ -197,15 +198,17 @@ def overlay_viewlists( ) -> None: ''' - Calculate and apply y-domain (axis y-range) multi-curve overlay adjustments - a set of ``plots`` based on the requested ``method``. + Calculate and apply y-domain (axis y-range) multi-curve overlay + adjustments a set of ``plots`` based on the requested + ``method``. ''' chart_name: str chart: ChartPlotWidget + for chart_name, chart in plots.items(): - overlay_viz_items = chart._vizs.items() + overlay_viz_items: dict = chart._vizs # Common `PlotItem` maxmin table; presumes that some path # graphics (and thus their backing data sets) are in the @@ -271,6 +274,7 @@ def overlay_viewlists( # determine auto-ranging input for `._set_yrange()`. # this is primarly used for our so called "log-linearized # multi-plot" overlay technique. + # vizs_by_disp: list[tuple[float, Viz]] = [] overlay_table: dict[ float, tuple[ @@ -288,7 +292,7 @@ def overlay_viewlists( ] = {} # multi-curve overlay processing stage - for name, viz in overlay_viz_items: + for name, viz in overlay_viz_items.items(): out = _maybe_calc_yrange( viz, @@ -354,9 +358,9 @@ def overlay_viewlists( start_t = row_start['time'] # returns scalars - r_up = (ymx - y_ref) / y_ref - r_down = (ymn - y_ref) / y_ref - disp = r_up - r_down + r_up: float = (ymx - y_ref) / y_ref + r_down: float = (ymn - y_ref) / y_ref + disp: float = r_up - r_down msg = ( f'Viz[{viz.name}][{key}]: @{chart_name}\n' @@ -489,7 +493,15 @@ def overlay_viewlists( # register curves by a "full" dispersion metric for # later sort order in the overlay (technique # ) application loop below. - overlay_table[disp] = ( + pair: tuple[float, Viz] = (disp, viz) + + # time series are so similar they have same + # dispersion with `float` precision.. + if entry := overlay_table.get(pair): + raise RuntimeError('Duplicate entry!? -> {entry}') + + # vizs_by_disp.append(pair) + overlay_table[pair] = ( viz.plot.vb, viz, y_ref, @@ -548,7 +560,7 @@ def overlay_viewlists( r_up_mx: float r_dn_mn: float - mx_disp = max(overlay_table) + mx_pair: tuple = max(overlay_table, key=itemgetter(0)) if debug_print: # print overlay table in descending dispersion order @@ -564,11 +576,11 @@ def overlay_viewlists( ) if method == 'loglin_ref_to_curve': - mx_entry = overlay_table.pop(mx_disp) + mx_entry = overlay_table.pop(mx_pair) else: - # TODO: for pin to first-in-view we need to no pop this from the + # TODO: for pin to first-in-view we need to NOT pop this from the # table, but can we simplify below code even more? - mx_entry = overlay_table[mx_disp] + mx_entry = overlay_table[mx_pair] ( mx_view, # viewbox @@ -599,7 +611,11 @@ def overlay_viewlists( tuple[Viz, float, float, float, float] ] = {} - for full_disp in reversed(overlay_table): + for pair in sorted( + overlay_table, + key=itemgetter(0), + reverse=True, + ): ( view, viz, @@ -610,7 +626,7 @@ def overlay_viewlists( minor_in_view, r_up, r_dn, - ) = overlay_table[full_disp] + ) = overlay_table[pair] key = 'open' if viz.is_ohlc else viz.name xref = minor_in_view[0]['time'] @@ -839,7 +855,7 @@ def overlay_viewlists( print( 'SCALING PHASE' + '-'*100 + '\n\n' '_________MAJOR INFO___________\n' - f'SIGMA MAJOR C: {mx_viz.name} -> {mx_disp}\n' + f'SIGMA MAJOR C: {mx_viz.name} -> {mx_pair[0]}\n' f'UP MAJOR C: {upt.viz.name} with disp: {upt.rng}\n' f'DOWN MAJOR C: {dnt.viz.name} with disp: {dnt.rng}\n' f'xref: {mx_xref}\n' diff --git a/setup.py b/setup.py index c63622b25..cb5d7df8a 100755 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ 'piker = piker.cli:cli', 'pikerd = piker.cli:pikerd', 'ledger = piker.accounting.cli:ledger', + # 'store = piker.storage.cli:store', ] }, install_requires=[ @@ -78,6 +79,7 @@ 'cython', 'numpy', 'numba', + 'polars', # dataframes # UI 'PyQt5',