From 29211b200df34c2cd996eaf4487b1da759a6fa77 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 9 Mar 2023 15:30:18 -0500
Subject: [PATCH 01/85] Start `piker.storage` subsys: cross-(ts)db middlewares

The plan is to offer multiple tsdb and other storage backends (for
a variety of use cases) and expose them similarly to how we do for
broker and data providers B)
---
 piker/data/feed.py           |   2 +-
 piker/service/marketstore.py | 400 ++------------------------------
 piker/storage/__init__.py    | 430 +++++++++++++++++++++++++++++++++++
 3 files changed, 446 insertions(+), 386 deletions(-)
 create mode 100644 piker/storage/__init__.py

diff --git a/piker/data/feed.py b/piker/data/feed.py
index 1714cf193..91793440a 100644
--- a/piker/data/feed.py
+++ b/piker/data/feed.py
@@ -718,7 +718,7 @@ async def install_brokerd_search(
 
     async with portal.open_context(
         brokermod.open_symbol_search
-    ) as (ctx, cache):
+    ) as (ctx, _):
 
         # shield here since we expect the search rpc to be
         # cancellable by the user as they see fit.
diff --git a/piker/service/marketstore.py b/piker/service/marketstore.py
index ac0ad0a4f..93656ab37 100644
--- a/piker/service/marketstore.py
+++ b/piker/service/marketstore.py
@@ -1,5 +1,5 @@
 # piker: trading gear for hackers
-# Copyright (C) Tyler Goodlet (in stewardship for piker0)
+# Copyright (C) Tyler Goodlet (in stewardship for pikers)
 
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
@@ -25,11 +25,9 @@
 '''
 from __future__ import annotations
 from contextlib import asynccontextmanager as acm
-from datetime import datetime
 from pprint import pformat
 from typing import (
     Any,
-    Union,
     TYPE_CHECKING,
 )
 import time
@@ -37,31 +35,34 @@
 from pathlib import Path
 
 from bidict import bidict
-from msgspec.msgpack import encode, decode
+from msgspec.msgpack import (
+    encode,
+    decode,
+)
 # import pyqtgraph as pg
 import numpy as np
 import tractor
 from trio_websocket import open_websocket_url
-from anyio_marketstore import (
+from anyio_marketstore import (  # noqa
     open_marketstore_client,
     MarketstoreClient,
     Params,
 )
 import pendulum
-import purerpc
-
-if TYPE_CHECKING:
-    import docker
-    from ._ahab import DockerContainer
+# TODO: import this for specific error set expected by mkts client
+# import purerpc
 
+from ..data.feed import maybe_open_feed
+from . import Services
 from ._util import (
     log,  # sub-sys logger
     get_console_log,
 )
-from . import Services
-from ..data.feed import maybe_open_feed
-from .._profile import Profiler
-from .. import config
+
+if TYPE_CHECKING:
+    import docker
+    from ._ahab import DockerContainer
+
 
 
 # ahabd-supervisor and container level config
@@ -432,375 +433,6 @@ class MarketStoreError(Exception):
 })
 
 
-class Storage:
-    '''
-    High level storage api for both real-time and historical ingest.
-
-    '''
-    def __init__(
-        self,
-        client: MarketstoreClient,
-
-    ) -> None:
-        # TODO: eventually this should be an api/interface type that
-        # ensures we can support multiple tsdb backends.
-        self.client = client
-
-        # series' cache from tsdb reads
-        self._arrays: dict[str, np.ndarray] = {}
-
-    async def list_keys(self) -> list[str]:
-        return await self.client.list_symbols()
-
-    async def search_keys(self, pattern: str) -> list[str]:
-        '''
-        Search for time series key in the storage backend.
-
-        '''
-        ...
-
-    async def write_ticks(self, ticks: list) -> None:
-        ...
-
-    async def load(
-        self,
-        fqme: str,
-        timeframe: int,
-
-    ) -> tuple[
-        np.ndarray,  # timeframe sampled array-series
-        datetime | None,  # first dt
-        datetime | None,  # last dt
-    ]:
-
-        first_tsdb_dt, last_tsdb_dt = None, None
-        hist = await self.read_ohlcv(
-            fqme,
-            # on first load we don't need to pull the max
-            # history per request size worth.
-            limit=3000,
-            timeframe=timeframe,
-        )
-        log.info(f'Loaded tsdb history {hist}')
-
-        if len(hist):
-            times = hist['Epoch']
-            first, last = times[0], times[-1]
-            first_tsdb_dt, last_tsdb_dt = map(
-                pendulum.from_timestamp, [first, last]
-            )
-
-        return (
-            hist,  # array-data
-            first_tsdb_dt,  # start of query-frame
-            last_tsdb_dt,  # most recent
-        )
-
-    async def read_ohlcv(
-        self,
-        fqme: str,
-        timeframe: int | str,
-        end: int | None = None,
-        limit: int = int(800e3),
-
-    ) -> np.ndarray:
-
-        client = self.client
-        syms = await client.list_symbols()
-
-        if fqme not in syms:
-            return {}
-
-        # use the provided timeframe or 1s by default
-        tfstr = tf_in_1s.get(timeframe, tf_in_1s[1])
-
-        params = Params(
-            symbols=fqme,
-            timeframe=tfstr,
-            attrgroup='OHLCV',
-            end=end,
-            # limit_from_start=True,
-
-            # TODO: figure the max limit here given the
-            # ``purepc`` msg size limit of purerpc: 33554432
-            limit=limit,
-        )
-
-        for i in range(3):
-            try:
-                result = await client.query(params)
-                break
-            except purerpc.grpclib.exceptions.UnknownError as err:
-                if 'snappy' in err.args:
-                    await tractor.breakpoint()
-
-                # indicate there is no history for this timeframe
-                log.exception(
-                    f'Unknown mkts QUERY error: {params}\n'
-                    f'{err.args}'
-                )
-        else:
-            return {}
-
-        # TODO: it turns out column access on recarrays is actually slower:
-        # https://jakevdp.github.io/PythonDataScienceHandbook/02.09-structured-data-numpy.html#RecordArrays:-Structured-Arrays-with-a-Twist
-        # it might make sense to make these structured arrays?
-        data_set = result.by_symbols()[fqme]
-        array = data_set.array
-
-        # XXX: ensure sample rate is as expected
-        time = data_set.array['Epoch']
-        if len(time) > 1:
-            time_step = time[-1] - time[-2]
-            ts = tf_in_1s.inverse[data_set.timeframe]
-
-            if time_step != ts:
-                log.warning(
-                    f'MKTS BUG: wrong timeframe loaded: {time_step}'
-                    'YOUR DATABASE LIKELY CONTAINS BAD DATA FROM AN OLD BUG'
-                    f'WIPING HISTORY FOR {ts}s'
-                )
-                await self.delete_ts(fqme, timeframe)
-
-                # try reading again..
-                return await self.read_ohlcv(
-                    fqme,
-                    timeframe,
-                    end,
-                    limit,
-                )
-
-        return array
-
-    async def delete_ts(
-        self,
-        key: str,
-        timeframe: Union[int, str | None] = None,
-        fmt: str = 'OHLCV',
-
-    ) -> bool:
-
-        client = self.client
-        syms = await client.list_symbols()
-        if key not in syms:
-            await tractor.breakpoint()
-            raise KeyError(f'`{key}` table key not found in\n{syms}?')
-
-        tbk = mk_tbk((
-            key,
-            tf_in_1s.get(timeframe, tf_in_1s[60]),
-            fmt,
-        ))
-        return await client.destroy(tbk=tbk)
-
-    async def write_ohlcv(
-        self,
-        fqme: str,
-        ohlcv: np.ndarray,
-        timeframe: int,
-        append_and_duplicate: bool = True,
-        limit: int = int(800e3),
-
-    ) -> None:
-        # build mkts schema compat array for writing
-        mkts_dt = np.dtype(_ohlcv_dt)
-        mkts_array = np.zeros(
-            len(ohlcv),
-            dtype=mkts_dt,
-        )
-        # copy from shm array (yes it's this easy):
-        # https://numpy.org/doc/stable/user/basics.rec.html#assignment-from-other-structured-arrays
-        mkts_array[:] = ohlcv[[
-            'time',
-            'open',
-            'high',
-            'low',
-            'close',
-            'volume',
-        ]]
-
-        m, r = divmod(len(mkts_array), limit)
-
-        tfkey = tf_in_1s[timeframe]
-        for i in range(m, 1):
-            to_push = mkts_array[i-1:i*limit]
-
-            # write to db
-            resp = await self.client.write(
-                to_push,
-                tbk=f'{fqme}/{tfkey}/OHLCV',
-
-                # NOTE: will will append duplicates
-                # for the same timestamp-index.
-                # TODO: pre-deduplicate?
-                isvariablelength=append_and_duplicate,
-            )
-
-            log.info(
-                f'Wrote {mkts_array.size} datums to tsdb\n'
-            )
-
-            for resp in resp.responses:
-                err = resp.error
-                if err:
-                    raise MarketStoreError(err)
-
-        if r:
-            to_push = mkts_array[m*limit:]
-
-            # write to db
-            resp = await self.client.write(
-                to_push,
-                tbk=f'{fqme}/{tfkey}/OHLCV',
-
-                # NOTE: will will append duplicates
-                # for the same timestamp-index.
-                # TODO: pre deduplicate?
-                isvariablelength=append_and_duplicate,
-            )
-
-            log.info(
-                f'Wrote {mkts_array.size} datums to tsdb\n'
-            )
-
-            for resp in resp.responses:
-                err = resp.error
-                if err:
-                    raise MarketStoreError(err)
-
-    # XXX: currently the only way to do this is through the CLI:
-
-    # sudo ./marketstore connect --dir ~/.config/piker/data
-    # >> \show mnq.globex.20220617.ib/1Sec/OHLCV 2022-05-15
-    # and this seems to block and use up mem..
-    # >> \trim mnq.globex.20220617.ib/1Sec/OHLCV 2022-05-15
-
-    # relevant source code for this is here:
-    # https://github.com/alpacahq/marketstore/blob/master/cmd/connect/session/trim.go#L14
-    # def delete_range(self, start_dt, end_dt) -> None:
-    #     ...
-
-
-@acm
-async def open_storage_client(
-    host: str,
-    grpc_port: int,
-
-) -> tuple[Storage, dict[str, np.ndarray]]:
-    '''
-    Load a series by key and deliver in ``numpy`` struct array format.
-
-    '''
-    async with (
-        # eventually a storage backend endpoint
-        get_client(
-            host=host,
-            port=grpc_port,
-        ) as client,
-    ):
-        # slap on our wrapper api
-        yield Storage(client)
-
-
-@acm
-async def open_tsdb_client(
-    fqme: str,
-) -> Storage:
-
-    # TODO: real-time dedicated task for ensuring
-    # history consistency between the tsdb, shm and real-time feed..
-
-    # update sequence design notes:
-
-    # - load existing highest frequency data from mkts
-    #   * how do we want to offer this to the UI?
-    #    - lazy loading?
-    #    - try to load it all and expect graphics caching/diffing
-    #      to  hide extra bits that aren't in view?
-
-    # - compute the diff between latest data from broker and shm
-    #   * use sql api in mkts to determine where the backend should
-    #     start querying for data?
-    #   * append any diff with new shm length
-    #   * determine missing (gapped) history by scanning
-    #   * how far back do we look?
-
-    # - begin rt update ingest and aggregation
-    #   * could start by always writing ticks to mkts instead of
-    #     worrying about a shm queue for now.
-    #   * we have a short list of shm queues worth groking:
-    #     - https://github.com/pikers/piker/issues/107
-    #   * the original data feed arch blurb:
-    #     - https://github.com/pikers/piker/issues/98
-    #
-    profiler = Profiler(
-        disabled=True,  # not pg_profile_enabled(),
-        delayed=False,
-    )
-
-    # load any user service settings for connecting to
-    rootconf, path = config.load(
-        'conf',
-        touch_if_dne=True,
-    )
-    tsdbconf = rootconf['network'].get('tsdb')
-    # backend = tsdbconf.pop('backend')
-    async with (
-        open_storage_client(
-            **tsdbconf,
-        ) as storage,
-
-        maybe_open_feed(
-            [fqme],
-            start_stream=False,
-
-        ) as feed,
-    ):
-        profiler(f'opened feed for {fqme}')
-
-        # to_append = feed.hist_shm.array
-        # to_prepend = None
-
-        if fqme:
-            flume = feed.flumes[fqme]
-            symbol = flume.mkt
-            if symbol:
-                fqme = symbol.fqme
-
-            # diff db history with shm and only write the missing portions
-            # ohlcv = flume.hist_shm.array
-
-            # TODO: use pg profiler
-            # for secs in (1, 60):
-            #     tsdb_array = await storage.read_ohlcv(
-            #         fqme,
-            #         timeframe=timeframe,
-            #     )
-            #     # hist diffing:
-            #     # these aren't currently used but can be referenced from
-            #     # within the embedded ipython shell below.
-            #     to_append = ohlcv[ohlcv['time'] > ts['Epoch'][-1]]
-            #     to_prepend = ohlcv[ohlcv['time'] < ts['Epoch'][0]]
-
-            # profiler('Finished db arrays diffs')
-
-            _ = await storage.client.list_symbols()
-            # log.info(f'Existing tsdb symbol set:\n{pformat(syms)}')
-            # profiler(f'listed symbols {syms}')
-            yield storage
-
-        # for array in [to_append, to_prepend]:
-        #     if array is None:
-        #         continue
-
-        #     log.info(
-        #         f'Writing datums {array.size} -> to tsdb from shm\n'
-        #     )
-        #     await storage.write_ohlcv(fqme, array)
-
-        # profiler('Finished db writes')
-
-
 async def ingest_quote_stream(
     symbols: list[str],
     brokername: str,
@@ -963,5 +595,3 @@ async def recv() -> dict[str, Any]:
 
             if quotes:
                 yield quotes
-
-
diff --git a/piker/storage/__init__.py b/piker/storage/__init__.py
new file mode 100644
index 000000000..8169f4ef9
--- /dev/null
+++ b/piker/storage/__init__.py
@@ -0,0 +1,430 @@
+# piker: trading gear for hackers
+# Copyright (C) Tyler Goodlet (in stewardship for pikers)
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+'''
+(time-series) database middle ware layer.
+
+- APIs for read, write, delete, replicate over multiple
+  db systems.
+- backend agnostic tick msg ingest machinery.
+- broadcast systems for fan out of real-time ingested
+  data to live consumers.
+- test harness utilities for data-processing verification.
+
+'''
+from __future__ import annotations
+from contextlib import asynccontextmanager as acm
+from datetime import datetime
+# from pprint import pformat
+from typing import (
+    Union,
+)
+
+import tractor
+import numpy as np
+from anyio_marketstore import (
+    Params,
+)
+import pendulum
+import purerpc
+
+from . import config
+from ..service.marketstore import (
+    MarketstoreClient,
+    tf_in_1s,
+    mk_tbk,
+    _ohlcv_dt,
+    MarketStoreError,
+)
+from ..data.feed import maybe_open_feed
+from ..log import get_logger
+from .._profile import Profiler
+
+
+log = get_logger(__name__)
+
+
+class Storage:
+    '''
+    High level storage api for both real-time and historical ingest.
+
+    '''
+    def __init__(
+        self,
+        client: MarketstoreClient,
+
+    ) -> None:
+        # TODO: eventually this should be an api/interface type that
+        # ensures we can support multiple tsdb backends.
+        self.client = client
+
+        # series' cache from tsdb reads
+        self._arrays: dict[str, np.ndarray] = {}
+
+    async def list_keys(self) -> list[str]:
+        return await self.client.list_symbols()
+
+    async def search_keys(self, pattern: str) -> list[str]:
+        '''
+        Search for time series key in the storage backend.
+
+        '''
+        ...
+
+    async def write_ticks(self, ticks: list) -> None:
+        ...
+
+    async def load(
+        self,
+        fqme: str,
+        timeframe: int,
+
+    ) -> tuple[
+        np.ndarray,  # timeframe sampled array-series
+        datetime | None,  # first dt
+        datetime | None,  # last dt
+    ]:
+
+        first_tsdb_dt, last_tsdb_dt = None, None
+        hist = await self.read_ohlcv(
+            fqme,
+            # on first load we don't need to pull the max
+            # history per request size worth.
+            limit=3000,
+            timeframe=timeframe,
+        )
+        log.info(f'Loaded tsdb history {hist}')
+
+        if len(hist):
+            times = hist['Epoch']
+            first, last = times[0], times[-1]
+            first_tsdb_dt, last_tsdb_dt = map(
+                pendulum.from_timestamp, [first, last]
+            )
+
+        return (
+            hist,  # array-data
+            first_tsdb_dt,  # start of query-frame
+            last_tsdb_dt,  # most recent
+        )
+
+    async def read_ohlcv(
+        self,
+        fqme: str,
+        timeframe: int | str,
+        end: int | None = None,
+        limit: int = int(800e3),
+
+    ) -> np.ndarray:
+
+        client = self.client
+        syms = await client.list_symbols()
+
+        if fqme not in syms:
+            return {}
+
+        # use the provided timeframe or 1s by default
+        tfstr = tf_in_1s.get(timeframe, tf_in_1s[1])
+
+        params = Params(
+            symbols=fqme,
+            timeframe=tfstr,
+            attrgroup='OHLCV',
+            end=end,
+            # limit_from_start=True,
+
+            # TODO: figure the max limit here given the
+            # ``purepc`` msg size limit of purerpc: 33554432
+            limit=limit,
+        )
+
+        for i in range(3):
+            try:
+                result = await client.query(params)
+                break
+            except purerpc.grpclib.exceptions.UnknownError as err:
+                if 'snappy' in err.args:
+                    await tractor.breakpoint()
+
+                # indicate there is no history for this timeframe
+                log.exception(
+                    f'Unknown mkts QUERY error: {params}\n'
+                    f'{err.args}'
+                )
+        else:
+            return {}
+
+        # TODO: it turns out column access on recarrays is actually slower:
+        # https://jakevdp.github.io/PythonDataScienceHandbook/02.09-structured-data-numpy.html#RecordArrays:-Structured-Arrays-with-a-Twist
+        # it might make sense to make these structured arrays?
+        data_set = result.by_symbols()[fqme]
+        array = data_set.array
+
+        # XXX: ensure sample rate is as expected
+        time = data_set.array['Epoch']
+        if len(time) > 1:
+            time_step = time[-1] - time[-2]
+            ts = tf_in_1s.inverse[data_set.timeframe]
+
+            if time_step != ts:
+                log.warning(
+                    f'MKTS BUG: wrong timeframe loaded: {time_step}'
+                    'YOUR DATABASE LIKELY CONTAINS BAD DATA FROM AN OLD BUG'
+                    f'WIPING HISTORY FOR {ts}s'
+                )
+                await self.delete_ts(fqme, timeframe)
+
+                # try reading again..
+                return await self.read_ohlcv(
+                    fqme,
+                    timeframe,
+                    end,
+                    limit,
+                )
+
+        return array
+
+    async def delete_ts(
+        self,
+        key: str,
+        timeframe: Union[int, str | None] = None,
+        fmt: str = 'OHLCV',
+
+    ) -> bool:
+
+        client = self.client
+        syms = await client.list_symbols()
+        if key not in syms:
+            await tractor.breakpoint()
+            raise KeyError(f'`{key}` table key not found in\n{syms}?')
+
+        tbk = mk_tbk((
+            key,
+            tf_in_1s.get(timeframe, tf_in_1s[60]),
+            fmt,
+        ))
+        return await client.destroy(tbk=tbk)
+
+    async def write_ohlcv(
+        self,
+        fqme: str,
+        ohlcv: np.ndarray,
+        timeframe: int,
+        append_and_duplicate: bool = True,
+        limit: int = int(800e3),
+
+    ) -> None:
+        # build mkts schema compat array for writing
+        mkts_dt = np.dtype(_ohlcv_dt)
+        mkts_array = np.zeros(
+            len(ohlcv),
+            dtype=mkts_dt,
+        )
+        # copy from shm array (yes it's this easy):
+        # https://numpy.org/doc/stable/user/basics.rec.html#assignment-from-other-structured-arrays
+        mkts_array[:] = ohlcv[[
+            'time',
+            'open',
+            'high',
+            'low',
+            'close',
+            'volume',
+        ]]
+
+        m, r = divmod(len(mkts_array), limit)
+
+        tfkey = tf_in_1s[timeframe]
+        for i in range(m, 1):
+            to_push = mkts_array[i-1:i*limit]
+
+            # write to db
+            resp = await self.client.write(
+                to_push,
+                tbk=f'{fqme}/{tfkey}/OHLCV',
+
+                # NOTE: will will append duplicates
+                # for the same timestamp-index.
+                # TODO: pre-deduplicate?
+                isvariablelength=append_and_duplicate,
+            )
+
+            log.info(
+                f'Wrote {mkts_array.size} datums to tsdb\n'
+            )
+
+            for resp in resp.responses:
+                err = resp.error
+                if err:
+                    raise MarketStoreError(err)
+
+        if r:
+            to_push = mkts_array[m*limit:]
+
+            # write to db
+            resp = await self.client.write(
+                to_push,
+                tbk=f'{fqme}/{tfkey}/OHLCV',
+
+                # NOTE: will will append duplicates
+                # for the same timestamp-index.
+                # TODO: pre deduplicate?
+                isvariablelength=append_and_duplicate,
+            )
+
+            log.info(
+                f'Wrote {mkts_array.size} datums to tsdb\n'
+            )
+
+            for resp in resp.responses:
+                err = resp.error
+                if err:
+                    raise MarketStoreError(err)
+
+    # XXX: currently the only way to do this is through the CLI:
+
+    # sudo ./marketstore connect --dir ~/.config/piker/data
+    # >> \show mnq.globex.20220617.ib/1Sec/OHLCV 2022-05-15
+    # and this seems to block and use up mem..
+    # >> \trim mnq.globex.20220617.ib/1Sec/OHLCV 2022-05-15
+
+    # relevant source code for this is here:
+    # https://github.com/alpacahq/marketstore/blob/master/cmd/connect/session/trim.go#L14
+    # def delete_range(self, start_dt, end_dt) -> None:
+    #     ...
+
+
+@acm
+async def open_storage_client(
+    host: str,
+    grpc_port: int,
+
+) -> tuple[Storage, dict[str, np.ndarray]]:
+    '''
+    Load a series by key and deliver in ``numpy`` struct array format.
+
+    '''
+    from piker.service.marketstore import get_client
+
+    async with (
+        # eventually a storage backend endpoint
+        get_client(
+            host=host,
+            port=grpc_port,
+        ) as client,
+    ):
+        # slap on our wrapper api
+        yield Storage(client)
+
+
+# NOTE: pretty sure right now this is only being
+# called by a CLI entrypoint?
+@acm
+async def open_tsdb_client(
+    fqme: str,
+) -> Storage:
+
+    # TODO: real-time dedicated task for ensuring
+    # history consistency between the tsdb, shm and real-time feed..
+
+    # update sequence design notes:
+
+    # - load existing highest frequency data from mkts
+    #   * how do we want to offer this to the UI?
+    #    - lazy loading?
+    #    - try to load it all and expect graphics caching/diffing
+    #      to  hide extra bits that aren't in view?
+
+    # - compute the diff between latest data from broker and shm
+    #   * use sql api in mkts to determine where the backend should
+    #     start querying for data?
+    #   * append any diff with new shm length
+    #   * determine missing (gapped) history by scanning
+    #   * how far back do we look?
+
+    # - begin rt update ingest and aggregation
+    #   * could start by always writing ticks to mkts instead of
+    #     worrying about a shm queue for now.
+    #   * we have a short list of shm queues worth groking:
+    #     - https://github.com/pikers/piker/issues/107
+    #   * the original data feed arch blurb:
+    #     - https://github.com/pikers/piker/issues/98
+    #
+    profiler = Profiler(
+        disabled=True,  # not pg_profile_enabled(),
+        delayed=False,
+    )
+
+    # load any user service settings for connecting to
+    rootconf, path = config.load(
+        'conf',
+        touch_if_dne=True,
+    )
+    tsdbconf = rootconf['network'].get('tsdb')
+    # backend = tsdbconf.pop('backend')
+    async with (
+        open_storage_client(
+            **tsdbconf,
+        ) as storage,
+
+        maybe_open_feed(
+            [fqme],
+            start_stream=False,
+
+        ) as feed,
+    ):
+        profiler(f'opened feed for {fqme}')
+
+        # to_append = feed.hist_shm.array
+        # to_prepend = None
+
+        if fqme:
+            flume = feed.flumes[fqme]
+            symbol = flume.mkt
+            if symbol:
+                fqme = symbol.fqme
+
+            # diff db history with shm and only write the missing portions
+            # ohlcv = flume.hist_shm.array
+
+            # TODO: use pg profiler
+            # for secs in (1, 60):
+            #     tsdb_array = await storage.read_ohlcv(
+            #         fqme,
+            #         timeframe=timeframe,
+            #     )
+            #     # hist diffing:
+            #     # these aren't currently used but can be referenced from
+            #     # within the embedded ipython shell below.
+            #     to_append = ohlcv[ohlcv['time'] > ts['Epoch'][-1]]
+            #     to_prepend = ohlcv[ohlcv['time'] < ts['Epoch'][0]]
+
+            # profiler('Finished db arrays diffs')
+
+            _ = await storage.client.list_symbols()
+            # log.info(f'Existing tsdb symbol set:\n{pformat(syms)}')
+            # profiler(f'listed symbols {syms}')
+            yield storage
+
+        # for array in [to_append, to_prepend]:
+        #     if array is None:
+        #         continue
+
+        #     log.info(
+        #         f'Writing datums {array.size} -> to tsdb from shm\n'
+        #     )
+        #     await storage.write_ohlcv(fqme, array)
+
+        # profiler('Finished db writes')

From 7ab97fb21dc1ad1d9fe73fa4c321222f97977d5d Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Mon, 29 May 2023 13:52:55 -0400
Subject: [PATCH 02/85] Add marketstore client as storage-backend module

To kick off our (tsdb) storage backends this adds our first implementing
a new `Storage(Protocol)` client interface. Going foward, the top level
`.storage` pkg-module will now expose backend agnostic APIs and helpers
whilst specific backend implementations will adhere to that middle-ware
layer.

Deats:
- add `.storage.marketstore.Storage` as the first client implementation,
  moving all needed (import) dependencies out from
  `.service.marketstore` as well as `.ohlc_key_map` and `get_client()`.
- move root `conf.toml` loading from `.data.history` into
  `.storage.__init__.open_storage_client()` which now takes in a `name:
  str` and does all the work of loading the correct backend module, its
  config, and determining if a service-instance can be contacted and
  a client loaded; in the case where this fails we raise a new
  `StorageConnectionError`.
- add a new `.storage.get_storagemod()` just like we have for brokers.
- make `open_storage_client()` also return the backend module such that
  the history-data layer can make backend specific calls as needed (eg.
  ohlc_key_map).
- fall back to a basic non-tsdb backfill when `open_storage_client()`
  raises the new connection error.
---
 piker/data/cli.py            |   4 +-
 piker/data/history.py        |  79 ++------
 piker/service/marketstore.py |  29 +--
 piker/storage/__init__.py    | 370 +++++++++++------------------------
 piker/storage/marketstore.py | 342 ++++++++++++++++++++++++++++++++
 5 files changed, 480 insertions(+), 344 deletions(-)
 create mode 100644 piker/storage/marketstore.py

diff --git a/piker/data/cli.py b/piker/data/cli.py
index 59db1037e..f855717b0 100644
--- a/piker/data/cli.py
+++ b/piker/data/cli.py
@@ -121,7 +121,7 @@ def storesh(
     Start an IPython shell ready to query the local marketstore db.
 
     '''
-    from piker.data.marketstore import open_tsdb_client
+    from piker.storage import open_tsdb_client
     from piker.service import open_piker_runtime
 
     async def main():
@@ -171,7 +171,7 @@ def storage(
     Start an IPython shell ready to query the local marketstore db.
 
     '''
-    from piker.service.marketstore import open_tsdb_client
+    from piker.storage import open_tsdb_client
     from piker.service import open_piker_runtime
 
     async def main():
diff --git a/piker/data/history.py b/piker/data/history.py
index ebfe8c65d..a29d2ab93 100644
--- a/piker/data/history.py
+++ b/piker/data/history.py
@@ -28,7 +28,6 @@
 from types import ModuleType
 from typing import (
     Callable,
-    Optional,
     TYPE_CHECKING,
 )
 
@@ -38,16 +37,12 @@
 import pendulum
 import numpy as np
 
-from .. import config
 from ..accounting import (
     MktPair,
 )
 from ._util import (
     log,
 )
-from ..service import (
-    check_for_service,
-)
 from ._sharedmem import (
     maybe_open_shm_array,
     ShmArray,
@@ -91,8 +86,8 @@ async def start_backfill(
     sampler_stream: tractor.MsgStream,
     feed_is_live: trio.Event,
 
-    last_tsdb_dt: Optional[datetime] = None,
-    storage: Optional[Storage] = None,
+    last_tsdb_dt: datetime | None = None,
+    storage: Storage | None = None,
     write_tsdb: bool = True,
     tsdb_is_up: bool = False,
 
@@ -391,7 +386,7 @@ async def basic_backfill(
 
 async def tsdb_backfill(
     mod: ModuleType,
-    marketstore: ModuleType,
+    storemod: ModuleType,
     bus: _FeedsBus,
     storage: Storage,
     mkt: MktPair,
@@ -542,7 +537,7 @@ async def back_load_from_tsdb(
                 prepend=True,
                 # update_first=False,
                 # start=prepend_start,
-                field_map=marketstore.ohlc_key_map,
+                field_map=storemod.ohlc_key_map,
             )
 
             tsdb_last_frame_start = tsdb_history['Epoch'][0]
@@ -580,7 +575,7 @@ async def back_load_from_tsdb(
                 shm.push(
                     to_push,
                     prepend=True,
-                    field_map=marketstore.ohlc_key_map,
+                    field_map=storemod.ohlc_key_map,
                 )
                 log.info(f'Loaded {to_push.shape} datums from storage')
 
@@ -626,12 +621,11 @@ async def manage_history(
 ) -> None:
     '''
     Load and manage historical data including the loading of any
-    available series from `marketstore` as well as conducting real-time
-    update of both that existing db and the allocated shared memory
-    buffer.
+    available series from any connected tsdb as well as conduct
+    real-time update of both that existing db and the allocated shared
+    memory buffer.
 
     '''
-
     # TODO: is there a way to make each shm file key
     # actor-tree-discovery-addr unique so we avoid collisions
     # when doing tests which also allocate shms for certain instruments
@@ -711,52 +705,17 @@ async def manage_history(
             None,
         )
         assert open_history_client
-
-        tsdb_is_up: bool = False
-        try_remote_tsdb: bool = False
-
-        conf, path = config.load('conf', touch_if_dne=True)
-        net = conf.get('network')
-        if net:
-            tsdbconf = net.get('tsdb')
-
-            # lookup backend tsdb module by name and load any user service
-            # settings for connecting to the tsdb service.
-            tsdb_backend: str = tsdbconf.pop('backend')
-            tsdb_host: str = tsdbconf['host']
-
-            # TODO: import and load storagemod by name
-            # mod = get_storagemod(tsdb_backend)
-            from ..service import marketstore
-            if tsdb_host == 'localhost':
-                log.info('Scanning for existing `{tsbd_backend}`')
-                tsdb_is_up: bool = await check_for_service(f'{tsdb_backend}d')
-
-            else:
-                try_remote_tsdb: bool = True
-
-        if (
-            tsdb_is_up
-            or try_remote_tsdb
-            and (
-                opened
-                and open_history_client
-            )
-        ):
-            log.info('Found existing `marketstored`')
-
-            async with (
-                marketstore.open_storage_client(
-                    **tsdbconf
-                ) as storage,
-            ):
+        from .. import storage
+        try:
+            async with storage.open_storage_client() as (storemod, client):
+                log.info(f'Found existing `{storemod.name}`')
                 # TODO: drop returning the output that we pass in?
                 await bus.nursery.start(
                     tsdb_backfill,
                     mod,
-                    marketstore,
+                    storemod,
                     bus,
-                    storage,
+                    client,
                     mkt,
                     {
                         1: rt_shm,
@@ -784,11 +743,11 @@ async def manage_history(
                 # what data is loaded for viewing.
                 await trio.sleep_forever()
 
-        # load less history if no tsdb can be found
-        elif (
-            not tsdb_is_up
-            and opened
-        ):
+        except storage.StorageConnectionError:
+            log.exception(
+                "Can't connect to tsdb backend!?\n"
+                'Starting basic backfille to shm..'
+            )
             await basic_backfill(
                 bus,
                 mod,
diff --git a/piker/service/marketstore.py b/piker/service/marketstore.py
index 93656ab37..c9f494201 100644
--- a/piker/service/marketstore.py
+++ b/piker/service/marketstore.py
@@ -327,16 +327,6 @@ async def start_ahab_daemon(
 ]
 
 
-ohlc_key_map = bidict({
-    'Epoch': 'time',
-    'Open': 'open',
-    'High': 'high',
-    'Low': 'low',
-    'Close': 'close',
-    'Volume': 'volume',
-})
-
-
 def mk_tbk(keys: tuple[str, str, str]) -> str:
     '''
     Generate a marketstore table key from a tuple.
@@ -388,24 +378,6 @@ def quote_to_marketstore_structarray(
     return np.array([tuple(array_input)], dtype=_quote_dt)
 
 
-@acm
-async def get_client(
-    host: str | None,
-    port: int | None,
-
-) -> MarketstoreClient:
-    '''
-    Load a ``anyio_marketstore`` grpc client connected
-    to an existing ``marketstore`` server.
-
-    '''
-    async with open_marketstore_client(
-        host or 'localhost',
-        port or _config['grpc_listen_port'],
-    ) as client:
-        yield client
-
-
 class MarketStoreError(Exception):
     "Generic marketstore client error"
 
@@ -444,6 +416,7 @@ async def ingest_quote_stream(
     Ingest a broker quote stream into a ``marketstore`` tsdb.
 
     '''
+    from piker.storage.marketstore import get_client
     async with (
         maybe_open_feed(brokername, symbols, loglevel=loglevel) as feed,
         get_client() as ms_client,
diff --git a/piker/storage/__init__.py b/piker/storage/__init__.py
index 8169f4ef9..3aeefc377 100644
--- a/piker/storage/__init__.py
+++ b/piker/storage/__init__.py
@@ -25,68 +25,67 @@
 - test harness utilities for data-processing verification.
 
 '''
-from __future__ import annotations
+from abc import abstractmethod
 from contextlib import asynccontextmanager as acm
+from functools import partial
+from importlib import import_module
 from datetime import datetime
-# from pprint import pformat
+from types import ModuleType
 from typing import (
-    Union,
+    # Callable,
+    # Awaitable,
+    # Any,
+    # AsyncIterator,
+    Protocol,
+    # Generic,
+    # TypeVar,
 )
 
-import tractor
 import numpy as np
-from anyio_marketstore import (
-    Params,
+
+
+from .. import config
+from ..service import (
+    check_for_service,
 )
-import pendulum
-import purerpc
-
-from . import config
-from ..service.marketstore import (
-    MarketstoreClient,
-    tf_in_1s,
-    mk_tbk,
-    _ohlcv_dt,
-    MarketStoreError,
+from ..log import (
+    get_logger,
+    get_console_log,
 )
-from ..data.feed import maybe_open_feed
-from ..log import get_logger
-from .._profile import Profiler
-
+subsys: str = 'piker.storage'
 
-log = get_logger(__name__)
+log = get_logger(subsys)
+get_console_log = partial(
+    get_console_log,
+    name=subsys,
+)
 
 
-class Storage:
+class Storage(
+    Protocol,
+):
     '''
-    High level storage api for both real-time and historical ingest.
+    Api description that all storage backends must implement
+    in order to suffice the historical data mgmt layer.
 
     '''
-    def __init__(
-        self,
-        client: MarketstoreClient,
-
-    ) -> None:
-        # TODO: eventually this should be an api/interface type that
-        # ensures we can support multiple tsdb backends.
-        self.client = client
-
-        # series' cache from tsdb reads
-        self._arrays: dict[str, np.ndarray] = {}
-
+    @abstractmethod
     async def list_keys(self) -> list[str]:
-        return await self.client.list_symbols()
-
-    async def search_keys(self, pattern: str) -> list[str]:
-        '''
-        Search for time series key in the storage backend.
-
-        '''
         ...
 
-    async def write_ticks(self, ticks: list) -> None:
+    @abstractmethod
+    def search_keys(self) -> list[str]:
         ...
 
+    # @abstractmethod
+    # async def write_ticks(
+    #     self,
+    #     ticks: list,
+    # ) -> ReceiveType:
+    #     ...
+
+    # ``trio.abc.AsyncResource`` methods
+    @abstractmethod
     async def load(
         self,
         fqme: str,
@@ -97,30 +96,19 @@ async def load(
         datetime | None,  # first dt
         datetime | None,  # last dt
     ]:
+        ...
+
+    @abstractmethod
+    async def delete_ts(
+        self,
+        key: str,
+        timeframe: int | str | None = None,
+        fmt: str = 'OHLCV',
 
-        first_tsdb_dt, last_tsdb_dt = None, None
-        hist = await self.read_ohlcv(
-            fqme,
-            # on first load we don't need to pull the max
-            # history per request size worth.
-            limit=3000,
-            timeframe=timeframe,
-        )
-        log.info(f'Loaded tsdb history {hist}')
-
-        if len(hist):
-            times = hist['Epoch']
-            first, last = times[0], times[-1]
-            first_tsdb_dt, last_tsdb_dt = map(
-                pendulum.from_timestamp, [first, last]
-            )
-
-        return (
-            hist,  # array-data
-            first_tsdb_dt,  # start of query-frame
-            last_tsdb_dt,  # most recent
-        )
+    ) -> bool:
+        ...
 
+    @abstractmethod
     async def read_ohlcv(
         self,
         fqme: str,
@@ -129,94 +117,7 @@ async def read_ohlcv(
         limit: int = int(800e3),
 
     ) -> np.ndarray:
-
-        client = self.client
-        syms = await client.list_symbols()
-
-        if fqme not in syms:
-            return {}
-
-        # use the provided timeframe or 1s by default
-        tfstr = tf_in_1s.get(timeframe, tf_in_1s[1])
-
-        params = Params(
-            symbols=fqme,
-            timeframe=tfstr,
-            attrgroup='OHLCV',
-            end=end,
-            # limit_from_start=True,
-
-            # TODO: figure the max limit here given the
-            # ``purepc`` msg size limit of purerpc: 33554432
-            limit=limit,
-        )
-
-        for i in range(3):
-            try:
-                result = await client.query(params)
-                break
-            except purerpc.grpclib.exceptions.UnknownError as err:
-                if 'snappy' in err.args:
-                    await tractor.breakpoint()
-
-                # indicate there is no history for this timeframe
-                log.exception(
-                    f'Unknown mkts QUERY error: {params}\n'
-                    f'{err.args}'
-                )
-        else:
-            return {}
-
-        # TODO: it turns out column access on recarrays is actually slower:
-        # https://jakevdp.github.io/PythonDataScienceHandbook/02.09-structured-data-numpy.html#RecordArrays:-Structured-Arrays-with-a-Twist
-        # it might make sense to make these structured arrays?
-        data_set = result.by_symbols()[fqme]
-        array = data_set.array
-
-        # XXX: ensure sample rate is as expected
-        time = data_set.array['Epoch']
-        if len(time) > 1:
-            time_step = time[-1] - time[-2]
-            ts = tf_in_1s.inverse[data_set.timeframe]
-
-            if time_step != ts:
-                log.warning(
-                    f'MKTS BUG: wrong timeframe loaded: {time_step}'
-                    'YOUR DATABASE LIKELY CONTAINS BAD DATA FROM AN OLD BUG'
-                    f'WIPING HISTORY FOR {ts}s'
-                )
-                await self.delete_ts(fqme, timeframe)
-
-                # try reading again..
-                return await self.read_ohlcv(
-                    fqme,
-                    timeframe,
-                    end,
-                    limit,
-                )
-
-        return array
-
-    async def delete_ts(
-        self,
-        key: str,
-        timeframe: Union[int, str | None] = None,
-        fmt: str = 'OHLCV',
-
-    ) -> bool:
-
-        client = self.client
-        syms = await client.list_symbols()
-        if key not in syms:
-            await tractor.breakpoint()
-            raise KeyError(f'`{key}` table key not found in\n{syms}?')
-
-        tbk = mk_tbk((
-            key,
-            tf_in_1s.get(timeframe, tf_in_1s[60]),
-            fmt,
-        ))
-        return await client.destroy(tbk=tbk)
+        ...
 
     async def write_ohlcv(
         self,
@@ -227,106 +128,74 @@ async def write_ohlcv(
         limit: int = int(800e3),
 
     ) -> None:
-        # build mkts schema compat array for writing
-        mkts_dt = np.dtype(_ohlcv_dt)
-        mkts_array = np.zeros(
-            len(ohlcv),
-            dtype=mkts_dt,
-        )
-        # copy from shm array (yes it's this easy):
-        # https://numpy.org/doc/stable/user/basics.rec.html#assignment-from-other-structured-arrays
-        mkts_array[:] = ohlcv[[
-            'time',
-            'open',
-            'high',
-            'low',
-            'close',
-            'volume',
-        ]]
-
-        m, r = divmod(len(mkts_array), limit)
-
-        tfkey = tf_in_1s[timeframe]
-        for i in range(m, 1):
-            to_push = mkts_array[i-1:i*limit]
-
-            # write to db
-            resp = await self.client.write(
-                to_push,
-                tbk=f'{fqme}/{tfkey}/OHLCV',
-
-                # NOTE: will will append duplicates
-                # for the same timestamp-index.
-                # TODO: pre-deduplicate?
-                isvariablelength=append_and_duplicate,
-            )
-
-            log.info(
-                f'Wrote {mkts_array.size} datums to tsdb\n'
-            )
-
-            for resp in resp.responses:
-                err = resp.error
-                if err:
-                    raise MarketStoreError(err)
-
-        if r:
-            to_push = mkts_array[m*limit:]
-
-            # write to db
-            resp = await self.client.write(
-                to_push,
-                tbk=f'{fqme}/{tfkey}/OHLCV',
-
-                # NOTE: will will append duplicates
-                # for the same timestamp-index.
-                # TODO: pre deduplicate?
-                isvariablelength=append_and_duplicate,
-            )
-
-            log.info(
-                f'Wrote {mkts_array.size} datums to tsdb\n'
-            )
-
-            for resp in resp.responses:
-                err = resp.error
-                if err:
-                    raise MarketStoreError(err)
-
-    # XXX: currently the only way to do this is through the CLI:
-
-    # sudo ./marketstore connect --dir ~/.config/piker/data
-    # >> \show mnq.globex.20220617.ib/1Sec/OHLCV 2022-05-15
-    # and this seems to block and use up mem..
-    # >> \trim mnq.globex.20220617.ib/1Sec/OHLCV 2022-05-15
-
-    # relevant source code for this is here:
-    # https://github.com/alpacahq/marketstore/blob/master/cmd/connect/session/trim.go#L14
-    # def delete_range(self, start_dt, end_dt) -> None:
-    #     ...
+        ...
+
+
+class StorageConnectionError(ConnectionError): 
+    '''
+    Can't connect to the desired tsdb subsys/service.
+
+    '''
+
+def get_storagemod(name: str) -> ModuleType:
+    mod: ModuleType = import_module(
+        '.' + name,
+        'piker.storage',
+    )
+
+    # we only allow monkeying because it's for internal keying
+    mod.name = mod.__name__.split('.')[-1]
+    return mod
 
 
 @acm
 async def open_storage_client(
-    host: str,
-    grpc_port: int,
+    name: str | None = None,
 
-) -> tuple[Storage, dict[str, np.ndarray]]:
+) -> tuple[ModuleType, Storage]:
     '''
-    Load a series by key and deliver in ``numpy`` struct array format.
+    Load the ``Storage`` client for named backend.
 
     '''
-    from piker.service.marketstore import get_client
-
-    async with (
-        # eventually a storage backend endpoint
-        get_client(
-            host=host,
-            port=grpc_port,
-        ) as client,
+    # load root config for tsdb
+    conf, path = config.load('conf', touch_if_dne=True)
+    net = conf.get('network')
+    if net:
+        tsdbconf = net.get('tsdb')
+
+        # lookup backend tsdb module by name and load any user service
+        # settings for connecting to the tsdb service.
+        name: str = tsdbconf.pop('backend')
+        tsdb_host: str = tsdbconf['host']
+
+    if name is None:
+        raise RuntimeError('No tsdb backend has been set!?')
+
+    # import and load storagemod by name
+    mod: ModuleType = get_storagemod(name)
+    get_client = mod.get_client
+
+    log.info('Scanning for existing `{tsbd_backend}`')
+    tsdb_is_up: bool = await check_for_service(f'{name}d')
+    if (
+        tsdb_host == 'localhost'
+        or tsdb_is_up
     ):
-        # slap on our wrapper api
-        yield Storage(client)
+        log.info(f'Connecting to local {name}@{tsdbconf}')
+    else:
+        log.info(f'Attempting to connect to remote {name}@{tsdbconf}')
+
+    try:
+        async with (
+            get_client(**tsdbconf) as client,
+        ):
+            # slap on our wrapper api
+            yield mod, client
+
+    except Exception as err:
+        raise StorageConnectionError(
+            f'No connection to {name}'
+        ) from err
 
 
 # NOTE: pretty sure right now this is only being
@@ -362,22 +231,15 @@ async def open_tsdb_client(
     #   * the original data feed arch blurb:
     #     - https://github.com/pikers/piker/issues/98
     #
+    from .._profile import Profiler
     profiler = Profiler(
         disabled=True,  # not pg_profile_enabled(),
         delayed=False,
     )
+    from ..data.feed import maybe_open_feed
 
-    # load any user service settings for connecting to
-    rootconf, path = config.load(
-        'conf',
-        touch_if_dne=True,
-    )
-    tsdbconf = rootconf['network'].get('tsdb')
-    # backend = tsdbconf.pop('backend')
     async with (
-        open_storage_client(
-            **tsdbconf,
-        ) as storage,
+        open_storage_client() as (_, storage),
 
         maybe_open_feed(
             [fqme],
diff --git a/piker/storage/marketstore.py b/piker/storage/marketstore.py
new file mode 100644
index 000000000..9aad2230a
--- /dev/null
+++ b/piker/storage/marketstore.py
@@ -0,0 +1,342 @@
+# piker: trading gear for hackers
+# Copyright (C) Tyler Goodlet (in stewardship for pikers)
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+'''
+marketstore tsdb backend:
+https://github.com/alpacahq/marketstore
+
+
+We wrote an async gGRPC client:
+https://github.com/pikers/anyio-marketstore
+
+which is normally preferred minus the discovered issues
+in https://github.com/pikers/piker/issues/443
+
+Which is the main reason for us moving away from this
+platform..
+
+'''
+from __future__ import annotations
+from contextlib import asynccontextmanager as acm
+from datetime import datetime
+# from pprint import pformat
+from typing import (
+    Union,
+)
+
+from bidict import bidict
+import tractor
+import numpy as np
+from anyio_marketstore import (
+    Params,
+)
+import pendulum
+import purerpc
+
+from ..service.marketstore import (
+    MarketstoreClient,
+    tf_in_1s,
+    mk_tbk,
+    _ohlcv_dt,
+    MarketStoreError,
+)
+from anyio_marketstore import (  # noqa
+    open_marketstore_client,
+    MarketstoreClient,
+    Params,
+)
+from ..log import get_logger
+# from .._profile import Profiler
+
+
+log = get_logger(__name__)
+
+
+class Storage:
+    '''
+    High level storage api for both real-time and historical ingest.
+
+    '''
+    def __init__(
+        self,
+        client: MarketstoreClient,
+
+    ) -> None:
+        # TODO: eventually this should be an api/interface type that
+        # ensures we can support multiple tsdb backends.
+        self.client = client
+
+        # series' cache from tsdb reads
+        self._arrays: dict[str, np.ndarray] = {}
+
+    async def list_keys(self) -> list[str]:
+        return await self.client.list_symbols()
+
+    async def search_keys(self, pattern: str) -> list[str]:
+        '''
+        Search for time series key in the storage backend.
+
+        '''
+        ...
+
+    async def write_ticks(self, ticks: list) -> None:
+        ...
+
+    async def load(
+        self,
+        fqme: str,
+        timeframe: int,
+
+    ) -> tuple[
+        np.ndarray,  # timeframe sampled array-series
+        datetime | None,  # first dt
+        datetime | None,  # last dt
+    ]:
+
+        first_tsdb_dt, last_tsdb_dt = None, None
+        hist = await self.read_ohlcv(
+            fqme,
+            # on first load we don't need to pull the max
+            # history per request size worth.
+            limit=3000,
+            timeframe=timeframe,
+        )
+        log.info(f'Loaded tsdb history {hist}')
+
+        if len(hist):
+            times = hist['Epoch']
+            first, last = times[0], times[-1]
+            first_tsdb_dt, last_tsdb_dt = map(
+                pendulum.from_timestamp, [first, last]
+            )
+
+        return (
+            hist,  # array-data
+            first_tsdb_dt,  # start of query-frame
+            last_tsdb_dt,  # most recent
+        )
+
+    async def read_ohlcv(
+        self,
+        fqme: str,
+        timeframe: int | str,
+        end: int | None = None,
+        limit: int = int(800e3),
+
+    ) -> np.ndarray:
+
+        client = self.client
+        syms = await client.list_symbols()
+
+        if fqme not in syms:
+            return {}
+
+        # use the provided timeframe or 1s by default
+        tfstr = tf_in_1s.get(timeframe, tf_in_1s[1])
+
+        params = Params(
+            symbols=fqme,
+            timeframe=tfstr,
+            attrgroup='OHLCV',
+            end=end,
+            # limit_from_start=True,
+
+            # TODO: figure the max limit here given the
+            # ``purepc`` msg size limit of purerpc: 33554432
+            limit=limit,
+        )
+
+        for i in range(3):
+            try:
+                result = await client.query(params)
+                break
+            except purerpc.grpclib.exceptions.UnknownError as err:
+                if 'snappy' in err.args:
+                    await tractor.breakpoint()
+
+                # indicate there is no history for this timeframe
+                log.exception(
+                    f'Unknown mkts QUERY error: {params}\n'
+                    f'{err.args}'
+                )
+        else:
+            return {}
+
+        # TODO: it turns out column access on recarrays is actually slower:
+        # https://jakevdp.github.io/PythonDataScienceHandbook/02.09-structured-data-numpy.html#RecordArrays:-Structured-Arrays-with-a-Twist
+        # it might make sense to make these structured arrays?
+        data_set = result.by_symbols()[fqme]
+        array = data_set.array
+
+        # XXX: ensure sample rate is as expected
+        time = data_set.array['Epoch']
+        if len(time) > 1:
+            time_step = time[-1] - time[-2]
+            ts = tf_in_1s.inverse[data_set.timeframe]
+
+            if time_step != ts:
+                log.warning(
+                    f'MKTS BUG: wrong timeframe loaded: {time_step}'
+                    'YOUR DATABASE LIKELY CONTAINS BAD DATA FROM AN OLD BUG'
+                    f'WIPING HISTORY FOR {ts}s'
+                )
+                await self.delete_ts(fqme, timeframe)
+
+                # try reading again..
+                return await self.read_ohlcv(
+                    fqme,
+                    timeframe,
+                    end,
+                    limit,
+                )
+
+        return array
+
+    async def delete_ts(
+        self,
+        key: str,
+        timeframe: Union[int, str | None] = None,
+        fmt: str = 'OHLCV',
+
+    ) -> bool:
+
+        client = self.client
+        syms = await client.list_symbols()
+        if key not in syms:
+            await tractor.breakpoint()
+            raise KeyError(f'`{key}` table key not found in\n{syms}?')
+
+        tbk = mk_tbk((
+            key,
+            tf_in_1s.get(timeframe, tf_in_1s[60]),
+            fmt,
+        ))
+        return await client.destroy(tbk=tbk)
+
+    async def write_ohlcv(
+        self,
+        fqme: str,
+        ohlcv: np.ndarray,
+        timeframe: int,
+        append_and_duplicate: bool = True,
+        limit: int = int(800e3),
+
+    ) -> None:
+        # build mkts schema compat array for writing
+        mkts_dt = np.dtype(_ohlcv_dt)
+        mkts_array = np.zeros(
+            len(ohlcv),
+            dtype=mkts_dt,
+        )
+        # copy from shm array (yes it's this easy):
+        # https://numpy.org/doc/stable/user/basics.rec.html#assignment-from-other-structured-arrays
+        mkts_array[:] = ohlcv[[
+            'time',
+            'open',
+            'high',
+            'low',
+            'close',
+            'volume',
+        ]]
+
+        m, r = divmod(len(mkts_array), limit)
+
+        tfkey = tf_in_1s[timeframe]
+        for i in range(m, 1):
+            to_push = mkts_array[i-1:i*limit]
+
+            # write to db
+            resp = await self.client.write(
+                to_push,
+                tbk=f'{fqme}/{tfkey}/OHLCV',
+
+                # NOTE: will will append duplicates
+                # for the same timestamp-index.
+                # TODO: pre-deduplicate?
+                isvariablelength=append_and_duplicate,
+            )
+
+            log.info(
+                f'Wrote {mkts_array.size} datums to tsdb\n'
+            )
+
+            for resp in resp.responses:
+                err = resp.error
+                if err:
+                    raise MarketStoreError(err)
+
+        if r:
+            to_push = mkts_array[m*limit:]
+
+            # write to db
+            resp = await self.client.write(
+                to_push,
+                tbk=f'{fqme}/{tfkey}/OHLCV',
+
+                # NOTE: will will append duplicates
+                # for the same timestamp-index.
+                # TODO: pre deduplicate?
+                isvariablelength=append_and_duplicate,
+            )
+
+            log.info(
+                f'Wrote {mkts_array.size} datums to tsdb\n'
+            )
+
+            for resp in resp.responses:
+                err = resp.error
+                if err:
+                    raise MarketStoreError(err)
+
+    # XXX: currently the only way to do this is through the CLI:
+
+    # sudo ./marketstore connect --dir ~/.config/piker/data
+    # >> \show mnq.globex.20220617.ib/1Sec/OHLCV 2022-05-15
+    # and this seems to block and use up mem..
+    # >> \trim mnq.globex.20220617.ib/1Sec/OHLCV 2022-05-15
+
+    # relevant source code for this is here:
+    # https://github.com/alpacahq/marketstore/blob/master/cmd/connect/session/trim.go#L14
+    # def delete_range(self, start_dt, end_dt) -> None:
+    #     ...
+
+
+ohlc_key_map = bidict({
+    'Epoch': 'time',
+    'Open': 'open',
+    'High': 'high',
+    'Low': 'low',
+    'Close': 'close',
+    'Volume': 'volume',
+})
+
+
+@acm
+async def get_client(
+    grpc_port: int,  # required
+    host: str | None,
+
+) -> MarketstoreClient:
+    '''
+    Load a ``anyio_marketstore`` grpc client connected
+    to an existing ``marketstore`` server.
+
+    '''
+    async with open_marketstore_client(
+        host or 'localhost',
+        grpc_port,
+    ) as client:
+        yield Storage(client)

From 1ec9b0565f41878952d69907370a6e6bd468e362 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Mon, 29 May 2023 14:05:20 -0400
Subject: [PATCH 03/85] Move `.data.cli` to `.storage.cli`

---
 piker/{data => storage}/cli.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename piker/{data => storage}/cli.py (100%)

diff --git a/piker/data/cli.py b/piker/storage/cli.py
similarity index 100%
rename from piker/data/cli.py
rename to piker/storage/cli.py

From cb774e5a5de7ce6e2f4d1f270ac58a11e6efe6ba Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Mon, 29 May 2023 17:41:40 -0400
Subject: [PATCH 04/85] Re-implement `piker store` CLI with `typer`

Turns out you can mix and match `click` with `typer` so this moves what
was the `.data.cli` stuff into `storage.cli` and uses the integration
api to make it all work B)

New subcmd: `piker store`
- add `piker store ls` which lists all fqme keyed time-series from backend.
- add `store delete` to remove any such key->time-series.
  - now uses a nursery for multi-timeframe concurrency B)

Mask out all the old `marketstore` specific subcmds for now (streaming,
ingest, storesh, etc..) in anticipation of moving them into
a subpkg-module and make sure to import the sub-cmd module in our top
level cli package.

Other `.storage` api tweaks:
- drop the reraising with custom error (for now).
- rename `Storage` -> `StorageClient` (or should it be API?).
---
 piker/cli/__init__.py        |   9 +-
 piker/storage/__init__.py    |  41 ++--
 piker/storage/cli.py         | 388 ++++++++++++++++++++---------------
 piker/storage/marketstore.py |  13 +-
 4 files changed, 266 insertions(+), 185 deletions(-)

diff --git a/piker/cli/__init__.py b/piker/cli/__init__.py
index 706101357..a51fab3a2 100644
--- a/piker/cli/__init__.py
+++ b/piker/cli/__init__.py
@@ -154,6 +154,8 @@ def cli(
         assert os.path.isdir(configdir), f"`{configdir}` is not a valid path"
         config._override_config_dir(configdir)
 
+    # TODO: for typer see
+    # https://typer.tiangolo.com/tutorial/commands/context/
     ctx.ensure_object(dict)
 
     if not brokers:
@@ -227,12 +229,15 @@ async def list_services():
 
 def _load_clis() -> None:
     from ..service import marketstore  # noqa
-    from ..service import elastic
-    from ..data import cli  # noqa
+    from ..service import elastic  # noqa
     from ..brokers import cli  # noqa
     from ..ui import cli  # noqa
     from ..watchlists import cli  # noqa
 
+    # typer implemented
+    from ..storage import cli  # noqa
+    from ..accounting import cli  # noqa
+
 
 # load downstream cli modules
 _load_clis()
diff --git a/piker/storage/__init__.py b/piker/storage/__init__.py
index 3aeefc377..21e258a68 100644
--- a/piker/storage/__init__.py
+++ b/piker/storage/__init__.py
@@ -61,7 +61,12 @@
 )
 
 
-class Storage(
+__tsdbs__: list[str] = [
+    'marketstore',
+]
+
+
+class StorageClient(
     Protocol,
 ):
     '''
@@ -69,6 +74,8 @@ class Storage(
     in order to suffice the historical data mgmt layer.
 
     '''
+    name: str
+
     @abstractmethod
     async def list_keys(self) -> list[str]:
         ...
@@ -131,7 +138,7 @@ async def write_ohlcv(
         ...
 
 
-class StorageConnectionError(ConnectionError): 
+class StorageConnectionError(ConnectionError):
     '''
     Can't connect to the desired tsdb subsys/service.
 
@@ -152,12 +159,14 @@ def get_storagemod(name: str) -> ModuleType:
 async def open_storage_client(
     name: str | None = None,
 
-) -> tuple[ModuleType, Storage]:
+) -> tuple[ModuleType, StorageClient]:
     '''
-    Load the ``Storage`` client for named backend.
+    Load the ``StorageClient`` for named backend.
 
     '''
-    # load root config for tsdb
+    tsdb_host: str = 'localhost'
+
+    # load root config and any tsdb user defined settings
     conf, path = config.load('conf', touch_if_dne=True)
     net = conf.get('network')
     if net:
@@ -185,17 +194,17 @@ async def open_storage_client(
     else:
         log.info(f'Attempting to connect to remote {name}@{tsdbconf}')
 
-    try:
-        async with (
-            get_client(**tsdbconf) as client,
-        ):
-            # slap on our wrapper api
-            yield mod, client
+    # try:
+    async with (
+        get_client(**tsdbconf) as client,
+    ):
+        # slap on our wrapper api
+        yield mod, client
 
-    except Exception as err:
-        raise StorageConnectionError(
-            f'No connection to {name}'
-        ) from err
+    # except Exception as err:
+    #     raise StorageConnectionError(
+    #         f'No connection to {name}'
+    #     ) from err
 
 
 # NOTE: pretty sure right now this is only being
@@ -203,7 +212,7 @@ async def open_storage_client(
 @acm
 async def open_tsdb_client(
     fqme: str,
-) -> Storage:
+) -> StorageClient:
 
     # TODO: real-time dedicated task for ensuring
     # history consistency between the tsdb, shm and real-time feed..
diff --git a/piker/storage/cli.py b/piker/storage/cli.py
index f855717b0..d2148109b 100644
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@@ -18,9 +18,14 @@
 marketstore cli.
 
 """
+from __future__ import annotations
+from typing import TYPE_CHECKING
+# import tractor
 import trio
-import tractor
-import click
+# import click
+from rich.console import Console
+# from rich.markdown import Markdown
+import typer
 
 from ..service.marketstore import (
     # get_client,
@@ -32,35 +37,40 @@
 )
 from ..cli import cli
 from .. import watchlists as wl
-from ._util import (
+from . import (
     log,
 )
 
 
-@cli.command()
-@click.option(
-    '--url',
-    default='ws://localhost:5993/ws',
-    help='HTTP URL of marketstore instance'
-)
-@click.argument('names', nargs=-1)
-@click.pass_obj
-def ms_stream(
-    config: dict,
-    names: list[str],
-    url: str,
-) -> None:
-    '''
-    Connect to a marketstore time bucket stream for (a set of) symbols(s)
-    and print to console.
+if TYPE_CHECKING:
+    from . import Storage
 
-    '''
-    async def main():
-        # async for quote in stream_quotes(symbols=names):
-        #    log.info(f"Received quote:\n{quote}")
-        ...
+store = typer.Typer()
 
-    trio.run(main)
+# @cli.command()
+# @click.option(
+#     '--url',
+#     default='ws://localhost:5993/ws',
+#     help='HTTP URL of marketstore instance'
+# )
+# @click.argument('names', nargs=-1)
+# @click.pass_obj
+# def ms_stream(
+#     config: dict,
+#     names: list[str],
+#     url: str,
+# ) -> None:
+#     '''
+#     Connect to a marketstore time bucket stream for (a set of) symbols(s)
+#     and print to console.
+
+#     '''
+#     async def main():
+#         # async for quote in stream_quotes(symbols=names):
+#         #    log.info(f"Received quote:\n{quote}")
+#         ...
+
+#     trio.run(main)
 
 
 # @cli.command()
@@ -99,157 +109,213 @@ async def main():
 #     tractor.run(main)
 
 
-@cli.command()
-@click.option(
-    '--tsdb_host',
-    default='localhost'
-)
-@click.option(
-    '--tsdb_port',
-    default=5993
-)
-@click.argument('symbols', nargs=-1)
-@click.pass_obj
-def storesh(
-    config,
-    tl,
-    host,
-    port,
-    symbols: list[str],
-):
-    '''
-    Start an IPython shell ready to query the local marketstore db.
+# @cli.command()
+# @click.option(
+#     '--tsdb_host',
+#     default='localhost'
+# )
+# @click.option(
+#     '--tsdb_port',
+#     default=5993
+# )
+# @click.argument('symbols', nargs=-1)
+# @click.pass_obj
+# def storesh(
+#     config,
+#     tl,
+#     host,
+#     port,
+#     symbols: list[str],
+# ):
+#     '''
+#     Start an IPython shell ready to query the local marketstore db.
+
+#     '''
+#     from piker.storage import open_tsdb_client
+#     from piker.service import open_piker_runtime
 
-    '''
-    from piker.storage import open_tsdb_client
-    from piker.service import open_piker_runtime
+#     async def main():
+#         nonlocal symbols
+
+#         async with open_piker_runtime(
+#             'storesh',
+#             enable_modules=['piker.service._ahab'],
+#         ):
+#             symbol = symbols[0]
+
+#             async with open_tsdb_client(symbol):
+#                 # TODO: ask if user wants to write history for detected
+#                 # available shm buffers?
+#                 from tractor.trionics import ipython_embed
+#                 await ipython_embed()
 
-    async def main():
-        nonlocal symbols
+#     trio.run(main)
 
-        async with open_piker_runtime(
-            'storesh',
-            enable_modules=['piker.service._ahab'],
+
+@store.command()
+def ls(
+    backends: list[str] = typer.Argument(
+        default=None,
+        help='Storage backends to query, default is all.'
+    ),
+):
+    from piker.service import open_piker_runtime
+    from . import (
+        __tsdbs__,
+        open_storage_client,
+    )
+    from rich.table import Table
+
+    if not backends:
+        backends: list[str] = __tsdbs__
+
+    table = Table(title=f'Table keys for backends {backends}:')
+    console = Console()
+
+    async def query_all():
+        nonlocal backends
+
+        async with (
+            open_piker_runtime(
+                'tsdb_storage',
+                enable_modules=['piker.service._ahab'],
+            ),
         ):
-            symbol = symbols[0]
+            for backend in backends:
+                async with open_storage_client(name=backend) as (
+                    mod,
+                    client,
+                ):
+                    table.add_column(f'{mod.name} fqmes')
+                    keys: list[str] = await client.list_keys()
+                    for key in keys:
+                        table.add_row(key)
 
-            async with open_tsdb_client(symbol):
-                # TODO: ask if user wants to write history for detected
-                # available shm buffers?
-                from tractor.trionics import ipython_embed
-                await ipython_embed()
+            console.print(table)
 
-    trio.run(main)
+    trio.run(query_all)
 
 
-@cli.command()
-@click.option(
-    '--host',
-    default='localhost'
-)
-@click.option(
-    '--port',
-    default=5993
-)
-@click.option(
-    '--delete',
-    '-d',
-    is_flag=True,
-    help='Delete history (1 Min) for symbol(s)',
-)
-@click.argument('symbols', nargs=-1)
-@click.pass_obj
-def storage(
-    config,
-    host,
-    port,
+async def del_ts_by_timeframe(
+    client: Storage,
+    fqme: str,
+    timeframe: int,
+
+) -> None:
+
+    resp = await client.delete_ts(fqme, timeframe)
+
+    # TODO: encapsulate per backend errors..
+    # - MEGA LOL, apparently the symbols don't
+    # flush out until you refresh something or other
+    # (maybe the WALFILE)... #lelandorlulzone, classic
+    # alpaca(Rtm) design here ..
+    # well, if we ever can make this work we
+    # probably want to dogsplain the real reason
+    # for the delete errurz..llululu
+    # if fqme not in syms:
+    #     log.error(f'Pair {fqme} dne in DB')
+    msgish = resp.ListFields()[0][1]
+    if 'error' in str(msgish):
+        log.error(
+            f'Deletion error:\n'
+            f'backend: {client.name}\n'
+            f'fqme: {fqme}\n'
+            f'timeframe: {timeframe}s\n'
+            f'Error msg:\n\n{msgish}\n',
+        )
+
+
+@store.command()
+def delete(
     symbols: list[str],
-    delete: bool,
 
+    backend: str = typer.Option(
+        default=None,
+        help='Storage backend to update'
+    ),
+
+    # delete: bool = typer.Option(False, '-d'),
+    # host: str = typer.Option(
+    #     'localhost',
+    #     '-h',
+    # ),
+    # port: int = typer.Option('5993', '-p'),
 ):
     '''
-    Start an IPython shell ready to query the local marketstore db.
+    Delete a storage backend's time series for (table) keys provided as
+    ``symbols``.
 
     '''
-    from piker.storage import open_tsdb_client
     from piker.service import open_piker_runtime
+    from . import open_storage_client
+
+    async def main(symbols: list[str]):
+        async with (
+            open_piker_runtime(
+                'tsdb_storage',
+                enable_modules=['piker.service._ahab']
+            ),
+            open_storage_client(name=backend) as (_, storage),
+            trio.open_nursery() as n,
+        ):
+            # spawn queries as tasks for max conc!
+            for fqme in symbols:
+                for tf in [1, 60]:
+                    n.start_soon(
+                        del_ts_by_timeframe,
+                        storage,
+                        fqme,
+                        tf,
+                    )
 
-    async def main():
-        nonlocal symbols
+    trio.run(main, symbols)
 
-        async with open_piker_runtime(
-            'tsdb_storage',
-            enable_modules=['piker.service._ahab'],
-        ):
-            symbol = symbols[0]
-            async with open_tsdb_client(symbol) as storage:
-                if delete:
-                    for fqme in symbols:
-                        syms = await storage.client.list_symbols()
-
-                        resp60s = await storage.delete_ts(fqme, 60)
-
-                        msgish = resp60s.ListFields()[0][1]
-                        if 'error' in str(msgish):
-
-                            # TODO: MEGA LOL, apparently the symbols don't
-                            # flush out until you refresh something or other
-                            # (maybe the WALFILE)... #lelandorlulzone, classic
-                            # alpaca(Rtm) design here ..
-                            # well, if we ever can make this work we
-                            # probably want to dogsplain the real reason
-                            # for the delete errurz..llululu
-                            if fqme not in syms:
-                                log.error(f'Pair {fqme} dne in DB')
-
-                            log.error(f'Deletion error: {fqme}\n{msgish}')
-
-                        resp1s = await storage.delete_ts(fqme, 1)
-                        msgish = resp1s.ListFields()[0][1]
-                        if 'error' in str(msgish):
-                            log.error(f'Deletion error: {fqme}\n{msgish}')
-
-    trio.run(main)
-
-
-@cli.command()
-@click.option('--test-file', '-t', help='Test quote stream file')
-@click.option('--tl', is_flag=True, help='Enable tractor logging')
-@click.argument('name', nargs=1, required=True)
-@click.pass_obj
-def ingest(config, name, test_file, tl):
-    '''
-    Ingest real-time broker quotes and ticks to a marketstore instance.
 
-    '''
-    # global opts
-    loglevel = config['loglevel']
-    tractorloglevel = config['tractorloglevel']
-    # log = config['log']
-
-    watchlist_from_file = wl.ensure_watchlists(config['wl_path'])
-    watchlists = wl.merge_watchlist(watchlist_from_file, wl._builtins)
-    symbols = watchlists[name]
-
-    grouped_syms = {}
-    for sym in symbols:
-        symbol, _, provider = sym.rpartition('.')
-        if provider not in grouped_syms:
-            grouped_syms[provider] = []
-
-        grouped_syms[provider].append(symbol)
-
-    async def entry_point():
-        async with tractor.open_nursery() as n:
-            for provider, symbols in grouped_syms.items():
-                await n.run_in_actor(
-                    ingest_quote_stream,
-                    name='ingest_marketstore',
-                    symbols=symbols,
-                    brokername=provider,
-                    tries=1,
-                    actorloglevel=loglevel,
-                    loglevel=tractorloglevel
-                )
-
-    tractor.run(entry_point)
+typer_click_object = typer.main.get_command(store)
+cli.add_command(typer_click_object, 'store')
+
+# @cli.command()
+# @click.option('--test-file', '-t', help='Test quote stream file')
+# @click.option('--tl', is_flag=True, help='Enable tractor logging')
+# @click.argument('name', nargs=1, required=True)
+# @click.pass_obj
+# def ingest(config, name, test_file, tl):
+#     '''
+#     Ingest real-time broker quotes and ticks to a marketstore instance.
+
+#     '''
+#     # global opts
+#     loglevel = config['loglevel']
+#     tractorloglevel = config['tractorloglevel']
+#     # log = config['log']
+
+#     watchlist_from_file = wl.ensure_watchlists(config['wl_path'])
+#     watchlists = wl.merge_watchlist(watchlist_from_file, wl._builtins)
+#     symbols = watchlists[name]
+
+#     grouped_syms = {}
+#     for sym in symbols:
+#         symbol, _, provider = sym.rpartition('.')
+#         if provider not in grouped_syms:
+#             grouped_syms[provider] = []
+
+#         grouped_syms[provider].append(symbol)
+
+#     async def entry_point():
+#         async with tractor.open_nursery() as n:
+#             for provider, symbols in grouped_syms.items():
+#                 await n.run_in_actor(
+#                     ingest_quote_stream,
+#                     name='ingest_marketstore',
+#                     symbols=symbols,
+#                     brokername=provider,
+#                     tries=1,
+#                     actorloglevel=loglevel,
+#                     loglevel=tractorloglevel
+#                 )
+
+#     tractor.run(entry_point)
+
+# if __name__ == "__main__":
+#     store()  # this is called from ``>> ledger <accountname>``
diff --git a/piker/storage/marketstore.py b/piker/storage/marketstore.py
index 9aad2230a..d1a3d67f9 100644
--- a/piker/storage/marketstore.py
+++ b/piker/storage/marketstore.py
@@ -65,11 +65,13 @@
 log = get_logger(__name__)
 
 
-class Storage:
+class MktsStorageClient:
     '''
     High level storage api for both real-time and historical ingest.
 
     '''
+    name: str = 'marketstore'
+
     def __init__(
         self,
         client: MarketstoreClient,
@@ -214,10 +216,9 @@ async def delete_ts(
     ) -> bool:
 
         client = self.client
-        syms = await client.list_symbols()
-        if key not in syms:
-            await tractor.breakpoint()
-            raise KeyError(f'`{key}` table key not found in\n{syms}?')
+        # syms = await client.list_symbols()
+        # if key not in syms:
+        #     raise KeyError(f'`{key}` table key not found in\n{syms}?')
 
         tbk = mk_tbk((
             key,
@@ -339,4 +340,4 @@ async def get_client(
         host or 'localhost',
         grpc_port,
     ) as client:
-        yield Storage(client)
+        yield MktsStorageClient(client)

From 33c464524ba7e694001f8acac89fc2b7d97ba10d Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Mon, 29 May 2023 17:48:03 -0400
Subject: [PATCH 05/85] Lower the paper engine order-cancel latency

---
 piker/clearing/_paper_engine.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/piker/clearing/_paper_engine.py b/piker/clearing/_paper_engine.py
index 44171dbc0..fd998e97d 100644
--- a/piker/clearing/_paper_engine.py
+++ b/piker/clearing/_paper_engine.py
@@ -124,7 +124,7 @@ async def submit_limit(
         # for dark orders since we want the dark_executed
         # to trigger first thus creating a lookup entry
         # in the broker trades event processing loop
-        await trio.sleep(0.05)
+        await trio.sleep(0.01)
 
         if (
             action == 'sell'
@@ -191,7 +191,7 @@ async def submit_cancel(
             self._sells[symbol].pop(oid, None)
 
         # TODO: net latency model
-        await trio.sleep(0.05)
+        await trio.sleep(0.01)
 
         msg = BrokerdStatus(
             status='canceled',
@@ -224,7 +224,7 @@ async def fake_fill(
 
         '''
         # TODO: net latency model
-        await trio.sleep(0.05)
+        await trio.sleep(0.01)
         fill_time_ns = time.time_ns()
         fill_time_s = time.time()
 

From e83de2906f92b99f80cba78075bff0121ec0a9a4 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Mon, 29 May 2023 20:11:57 -0400
Subject: [PATCH 06/85] Relegate old marketstore cli eps to masked module

---
 piker/storage/cli.py                          | 164 +---------------
 .../__init__.py}                              |   0
 piker/storage/marketstore/_ingest.py          | 177 ++++++++++++++++++
 3 files changed, 179 insertions(+), 162 deletions(-)
 rename piker/storage/{marketstore.py => marketstore/__init__.py} (100%)
 create mode 100644 piker/storage/marketstore/_ingest.py

diff --git a/piker/storage/cli.py b/piker/storage/cli.py
index d2148109b..352db2cd5 100644
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@@ -15,141 +15,26 @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 """
-marketstore cli.
+Storage middle-ware CLIs.
 
 """
 from __future__ import annotations
 from typing import TYPE_CHECKING
-# import tractor
 import trio
-# import click
 from rich.console import Console
 # from rich.markdown import Markdown
 import typer
 
-from ..service.marketstore import (
-    # get_client,
-    # stream_quotes,
-    ingest_quote_stream,
-    # _url,
-    # _tick_tbk_ids,
-    # mk_tbk,
-)
 from ..cli import cli
-from .. import watchlists as wl
 from . import (
     log,
 )
 
-
 if TYPE_CHECKING:
     from . import Storage
 
-store = typer.Typer()
-
-# @cli.command()
-# @click.option(
-#     '--url',
-#     default='ws://localhost:5993/ws',
-#     help='HTTP URL of marketstore instance'
-# )
-# @click.argument('names', nargs=-1)
-# @click.pass_obj
-# def ms_stream(
-#     config: dict,
-#     names: list[str],
-#     url: str,
-# ) -> None:
-#     '''
-#     Connect to a marketstore time bucket stream for (a set of) symbols(s)
-#     and print to console.
-
-#     '''
-#     async def main():
-#         # async for quote in stream_quotes(symbols=names):
-#         #    log.info(f"Received quote:\n{quote}")
-#         ...
-
-#     trio.run(main)
-
-
-# @cli.command()
-# @click.option(
-#     '--url',
-#     default=_url,
-#     help='HTTP URL of marketstore instance'
-# )
-# @click.argument('names', nargs=-1)
-# @click.pass_obj
-# def ms_destroy(config: dict, names: list[str], url: str) -> None:
-#     """Destroy symbol entries in the local marketstore instance.
-#     """
-#     async def main():
-#         nonlocal names
-#         async with get_client(url) as client:
-#
-#             if not names:
-#                 names = await client.list_symbols()
-#
-#             # default is to wipe db entirely.
-#             answer = input(
-#                 "This will entirely wipe you local marketstore db @ "
-#                 f"{url} of the following symbols:\n {pformat(names)}"
-#                 "\n\nDelete [N/y]?\n")
-#
-#             if answer == 'y':
-#                 for sym in names:
-#                     # tbk = _tick_tbk.format(sym)
-#                     tbk = tuple(sym, *_tick_tbk_ids)
-#                     print(f"Destroying {tbk}..")
-#                     await client.destroy(mk_tbk(tbk))
-#             else:
-#                 print("Nothing deleted.")
-#
-#     tractor.run(main)
-
-
-# @cli.command()
-# @click.option(
-#     '--tsdb_host',
-#     default='localhost'
-# )
-# @click.option(
-#     '--tsdb_port',
-#     default=5993
-# )
-# @click.argument('symbols', nargs=-1)
-# @click.pass_obj
-# def storesh(
-#     config,
-#     tl,
-#     host,
-#     port,
-#     symbols: list[str],
-# ):
-#     '''
-#     Start an IPython shell ready to query the local marketstore db.
-
-#     '''
-#     from piker.storage import open_tsdb_client
-#     from piker.service import open_piker_runtime
-
-#     async def main():
-#         nonlocal symbols
 
-#         async with open_piker_runtime(
-#             'storesh',
-#             enable_modules=['piker.service._ahab'],
-#         ):
-#             symbol = symbols[0]
-
-#             async with open_tsdb_client(symbol):
-#                 # TODO: ask if user wants to write history for detected
-#                 # available shm buffers?
-#                 from tractor.trionics import ipython_embed
-#                 await ipython_embed()
-
-#     trio.run(main)
+store = typer.Typer()
 
 
 @store.command()
@@ -274,48 +159,3 @@ async def main(symbols: list[str]):
 
 typer_click_object = typer.main.get_command(store)
 cli.add_command(typer_click_object, 'store')
-
-# @cli.command()
-# @click.option('--test-file', '-t', help='Test quote stream file')
-# @click.option('--tl', is_flag=True, help='Enable tractor logging')
-# @click.argument('name', nargs=1, required=True)
-# @click.pass_obj
-# def ingest(config, name, test_file, tl):
-#     '''
-#     Ingest real-time broker quotes and ticks to a marketstore instance.
-
-#     '''
-#     # global opts
-#     loglevel = config['loglevel']
-#     tractorloglevel = config['tractorloglevel']
-#     # log = config['log']
-
-#     watchlist_from_file = wl.ensure_watchlists(config['wl_path'])
-#     watchlists = wl.merge_watchlist(watchlist_from_file, wl._builtins)
-#     symbols = watchlists[name]
-
-#     grouped_syms = {}
-#     for sym in symbols:
-#         symbol, _, provider = sym.rpartition('.')
-#         if provider not in grouped_syms:
-#             grouped_syms[provider] = []
-
-#         grouped_syms[provider].append(symbol)
-
-#     async def entry_point():
-#         async with tractor.open_nursery() as n:
-#             for provider, symbols in grouped_syms.items():
-#                 await n.run_in_actor(
-#                     ingest_quote_stream,
-#                     name='ingest_marketstore',
-#                     symbols=symbols,
-#                     brokername=provider,
-#                     tries=1,
-#                     actorloglevel=loglevel,
-#                     loglevel=tractorloglevel
-#                 )
-
-#     tractor.run(entry_point)
-
-# if __name__ == "__main__":
-#     store()  # this is called from ``>> ledger <accountname>``
diff --git a/piker/storage/marketstore.py b/piker/storage/marketstore/__init__.py
similarity index 100%
rename from piker/storage/marketstore.py
rename to piker/storage/marketstore/__init__.py
diff --git a/piker/storage/marketstore/_ingest.py b/piker/storage/marketstore/_ingest.py
new file mode 100644
index 000000000..7056399bc
--- /dev/null
+++ b/piker/storage/marketstore/_ingest.py
@@ -0,0 +1,177 @@
+# piker: trading gear for hackers
+# Copyright (C) 2018-present  Tyler Goodlet (in stewardship of piker0)
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+'''
+Legacy marketstore ingest and streaming related clis.
+
+'''
+# from .. import watchlists as wl
+# from ..service.marketstore import (
+    # get_client,
+    # stream_quotes,
+    # ingest_quote_stream,
+    # _url,
+    # _tick_tbk_ids,
+    # mk_tbk,
+# )
+
+# @cli.command()
+# @click.option(
+#     '--url',
+#     default='ws://localhost:5993/ws',
+#     help='HTTP URL of marketstore instance'
+# )
+# @click.argument('names', nargs=-1)
+# @click.pass_obj
+# def ms_stream(
+#     config: dict,
+#     names: list[str],
+#     url: str,
+# ) -> None:
+#     '''
+#     Connect to a marketstore time bucket stream for (a set of) symbols(s)
+#     and print to console.
+
+#     '''
+#     async def main():
+#         # async for quote in stream_quotes(symbols=names):
+#         #    log.info(f"Received quote:\n{quote}")
+#         ...
+
+#     trio.run(main)
+
+
+# @cli.command()
+# @click.option(
+#     '--url',
+#     default=_url,
+#     help='HTTP URL of marketstore instance'
+# )
+# @click.argument('names', nargs=-1)
+# @click.pass_obj
+# def ms_destroy(config: dict, names: list[str], url: str) -> None:
+#     """Destroy symbol entries in the local marketstore instance.
+#     """
+#     async def main():
+#         nonlocal names
+#         async with get_client(url) as client:
+#
+#             if not names:
+#                 names = await client.list_symbols()
+#
+#             # default is to wipe db entirely.
+#             answer = input(
+#                 "This will entirely wipe you local marketstore db @ "
+#                 f"{url} of the following symbols:\n {pformat(names)}"
+#                 "\n\nDelete [N/y]?\n")
+#
+#             if answer == 'y':
+#                 for sym in names:
+#                     # tbk = _tick_tbk.format(sym)
+#                     tbk = tuple(sym, *_tick_tbk_ids)
+#                     print(f"Destroying {tbk}..")
+#                     await client.destroy(mk_tbk(tbk))
+#             else:
+#                 print("Nothing deleted.")
+#
+#     tractor.run(main)
+
+
+# @cli.command()
+# @click.option(
+#     '--tsdb_host',
+#     default='localhost'
+# )
+# @click.option(
+#     '--tsdb_port',
+#     default=5993
+# )
+# @click.argument('symbols', nargs=-1)
+# @click.pass_obj
+# def storesh(
+#     config,
+#     tl,
+#     host,
+#     port,
+#     symbols: list[str],
+# ):
+#     '''
+#     Start an IPython shell ready to query the local marketstore db.
+
+#     '''
+#     from piker.storage import open_tsdb_client
+#     from piker.service import open_piker_runtime
+
+#     async def main():
+#         nonlocal symbols
+
+#         async with open_piker_runtime(
+#             'storesh',
+#             enable_modules=['piker.service._ahab'],
+#         ):
+#             symbol = symbols[0]
+
+#             async with open_tsdb_client(symbol):
+#                 # TODO: ask if user wants to write history for detected
+#                 # available shm buffers?
+#                 from tractor.trionics import ipython_embed
+#                 await ipython_embed()
+
+#     trio.run(main)
+
+
+# @cli.command()
+# @click.option('--test-file', '-t', help='Test quote stream file')
+# @click.option('--tl', is_flag=True, help='Enable tractor logging')
+# @click.argument('name', nargs=1, required=True)
+# @click.pass_obj
+# def ingest(config, name, test_file, tl):
+#     '''
+#     Ingest real-time broker quotes and ticks to a marketstore instance.
+
+#     '''
+#     # global opts
+#     loglevel = config['loglevel']
+#     tractorloglevel = config['tractorloglevel']
+#     # log = config['log']
+
+#     watchlist_from_file = wl.ensure_watchlists(config['wl_path'])
+#     watchlists = wl.merge_watchlist(watchlist_from_file, wl._builtins)
+#     symbols = watchlists[name]
+
+#     grouped_syms = {}
+#     for sym in symbols:
+#         symbol, _, provider = sym.rpartition('.')
+#         if provider not in grouped_syms:
+#             grouped_syms[provider] = []
+
+#         grouped_syms[provider].append(symbol)
+
+#     async def entry_point():
+#         async with tractor.open_nursery() as n:
+#             for provider, symbols in grouped_syms.items():
+#                 await n.run_in_actor(
+#                     ingest_quote_stream,
+#                     name='ingest_marketstore',
+#                     symbols=symbols,
+#                     brokername=provider,
+#                     tries=1,
+#                     actorloglevel=loglevel,
+#                     loglevel=tractorloglevel
+#                 )
+
+#     tractor.run(entry_point)
+

From 8ccb8b07444f4d81d943842a24d0cd7886c04712 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Wed, 31 May 2023 12:11:14 -0400
Subject: [PATCH 07/85] kucoin: drop shm-array `numpy` dtype def, our default
 is the same

---
 piker/brokers/kucoin.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/piker/brokers/kucoin.py b/piker/brokers/kucoin.py
index 8cf06300e..998ee3ed9 100755
--- a/piker/brokers/kucoin.py
+++ b/piker/brokers/kucoin.py
@@ -71,17 +71,6 @@
 
 log = get_logger(__name__)
 
-_ohlc_dtype = [
-    ('index', int),
-    ('time', int),
-    ('open', float),
-    ('high', float),
-    ('low', float),
-    ('close', float),
-    ('volume', float),
-    ('bar_wap', float),  # will be zeroed by sampler if not filled
-]
-
 
 class KucoinMktPair(Struct, frozen=True):
     '''

From e82538ededd879e1e4312b2a64e63e94c2e56184 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Wed, 31 May 2023 12:11:52 -0400
Subject: [PATCH 08/85] .data: export ohlc dtypes at top level

---
 piker/data/__init__.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/piker/data/__init__.py b/piker/data/__init__.py
index ba6af4caf..681bd3fe7 100644
--- a/piker/data/__init__.py
+++ b/piker/data/__init__.py
@@ -22,9 +22,6 @@
 sharing live streams over a network.
 
 """
-import tractor
-import trio
-
 from ._util import (
     get_console_log,
 )
@@ -36,6 +33,10 @@
     get_shm_token,
     ShmArray,
 )
+from ._source import (
+    base_iohlc_dtype,
+    base_ohlc_dtype,
+)
 from .feed import (
     open_feed,
 )
@@ -49,4 +50,6 @@
     'attach_shm_array',
     'open_shm_array',
     'get_shm_token',
+    'base_iohlc_dtype',
+    'base_ohlc_dtype',
 ]

From 848577488e6098919dba1304560a7a6af1e403e6 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Wed, 31 May 2023 17:50:56 -0400
Subject: [PATCH 09/85] Add public config dir getter

---
 piker/config.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/piker/config.py b/piker/config.py
index e2c63ea49..97989fe2f 100644
--- a/piker/config.py
+++ b/piker/config.py
@@ -187,6 +187,15 @@ def _conf_fn_w_ext(
     return f'{name}.toml'
 
 
+def get_conf_dir() -> Path:
+    '''
+    Return the user configuration directory ``Path``
+    on the local filesystem.
+
+    '''
+    return _config_dir
+
+
 def get_conf_path(
     conf_name: str = 'brokers',
 

From bf21d2e3292fc3d736cf56654327a25e7782241e Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Wed, 31 May 2023 17:53:15 -0400
Subject: [PATCH 10/85] Rename default OHLCV `np.dtype` descriptions

Use `def_iohlcv_fields` for a name and instead of copying and inserting
the index field pop it for the non-index version. Drop creating
`np.dtype()` instances since `numpy`'s apis accept both input forms so
this is simpler on our end.
---
 piker/data/__init__.py   | 11 ++++-------
 piker/data/_sharedmem.py |  5 ++---
 piker/data/_source.py    | 34 +++++++++++++++++++++++++---------
 3 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/piker/data/__init__.py b/piker/data/__init__.py
index 681bd3fe7..cd0a11833 100644
--- a/piker/data/__init__.py
+++ b/piker/data/__init__.py
@@ -22,9 +22,6 @@
 sharing live streams over a network.
 
 """
-from ._util import (
-    get_console_log,
-)
 from ._normalize import iterticks
 from ._sharedmem import (
     maybe_open_shm_array,
@@ -34,8 +31,8 @@
     ShmArray,
 )
 from ._source import (
-    base_iohlc_dtype,
-    base_ohlc_dtype,
+    def_iohlcv_fields,
+    def_ohlcv_fields,
 )
 from .feed import (
     open_feed,
@@ -50,6 +47,6 @@
     'attach_shm_array',
     'open_shm_array',
     'get_shm_token',
-    'base_iohlc_dtype',
-    'base_ohlc_dtype',
+    'def_iohlcv_fields',
+    'def_ohlcv_fields',
 ]
diff --git a/piker/data/_sharedmem.py b/piker/data/_sharedmem.py
index 2ed1c8922..41dd00718 100644
--- a/piker/data/_sharedmem.py
+++ b/piker/data/_sharedmem.py
@@ -33,7 +33,7 @@
 import tractor
 
 from ._util import log
-from ._source import base_iohlc_dtype
+from ._source import def_iohlcv_fields
 from .types import Struct
 
 
@@ -168,7 +168,7 @@ def _make_token(
     to access a shared array.
 
     '''
-    dtype = base_iohlc_dtype if dtype is None else dtype
+    dtype = def_iohlcv_fields if dtype is None else dtype
     return _Token(
         shm_name=key,
         shm_first_index_name=key + "_first",
@@ -258,7 +258,6 @@ def array(self) -> np.ndarray:
         # to load an empty array..
         if len(a) == 0 and self._post_init:
             raise RuntimeError('Empty array race condition hit!?')
-            # breakpoint()
 
         return a
 
diff --git a/piker/data/_source.py b/piker/data/_source.py
index d1d8be023..7f739d7d0 100644
--- a/piker/data/_source.py
+++ b/piker/data/_source.py
@@ -23,26 +23,42 @@
 import numpy as np
 
 
-ohlc_fields = [
-    ('time', float),
+def_iohlcv_fields: list[tuple[str, type]] = [
+
+    # YES WE KNOW, this isn't needed in polars but we use it for doing
+    # ring-buffer like pre/append ops our our `ShmArray` real-time
+    # numpy-array buffering system such that there is a master index
+    # that can be used for index-arithmetic when write data to the
+    # "middle" of the array. See the ``tractor.ipc.shm`` pkg for more
+    # details.
+    ('index', int),
+
+    # presume int for epoch stamps since it's most common
+    # and makes the most sense to avoid float rounding issues.
+    # TODO: if we want higher reso we should use the new
+    # ``time.time_ns()`` in python 3.10+
+    ('time', int),
     ('open', float),
     ('high', float),
     ('low', float),
     ('close', float),
     ('volume', float),
-    ('bar_wap', float),
-]
 
-ohlc_with_index = ohlc_fields.copy()
-ohlc_with_index.insert(0, ('index', int))
+    # TODO: can we elim this from default field set to save on mem?
+    # i think only kraken really uses this in terms of what we get from
+    # their ohlc history API?
+    ('bar_wap', float),  # shouldn't be default right?
+]
 
-# our minimum structured array layout for ohlc data
-base_iohlc_dtype = np.dtype(ohlc_with_index)
-base_ohlc_dtype = np.dtype(ohlc_fields)
+# remove index field
+def_ohlcv_fields: list[tuple[str, type]] = def_iohlcv_fields.copy()
+def_ohlcv_fields.pop(0)
+assert (len(def_iohlcv_fields) - len(def_ohlcv_fields)) == 1
 
 # TODO: for now need to construct this manually for readonly arrays, see
 # https://github.com/numba/numba/issues/4511
 # from numba import from_dtype
+# base_ohlc_dtype = np.dtype(def_ohlc_fields)
 # numba_ohlc_dtype = from_dtype(base_ohlc_dtype)
 
 # map time frame "keys" to seconds values

From af64152640fcd4f69879c6469e83d8924c380646 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Wed, 31 May 2023 17:56:32 -0400
Subject: [PATCH 11/85] .data.history: update to new naming

-> `._source.def_iohlcv_fields`
-> `.storage.StorageClient`
---
 piker/data/history.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/piker/data/history.py b/piker/data/history.py
index a29d2ab93..4a0ab29bb 100644
--- a/piker/data/history.py
+++ b/piker/data/history.py
@@ -48,7 +48,7 @@
     ShmArray,
     _secs_in_day,
 )
-from ._source import base_iohlc_dtype
+from ._source import def_iohlcv_fields
 from ._sampling import (
     open_sample_stream,
 )
@@ -57,7 +57,7 @@
 )
 
 if TYPE_CHECKING:
-    from ..service.marketstore import Storage
+    from ..service.marketstore import StorageClient
     from .feed import _FeedsBus
 
 
@@ -87,7 +87,7 @@ async def start_backfill(
     feed_is_live: trio.Event,
 
     last_tsdb_dt: datetime | None = None,
-    storage: Storage | None = None,
+    storage: StorageClient | None = None,
     write_tsdb: bool = True,
     tsdb_is_up: bool = False,
 
@@ -177,6 +177,7 @@ async def start_backfill(
 
         # based on the sample step size, maybe load a certain amount history
         if last_tsdb_dt is None:
+
             if step_size_s not in (1, 60):
                 raise ValueError(
                     '`piker` only needs to support 1m and 1s sampling '
@@ -388,7 +389,7 @@ async def tsdb_backfill(
     mod: ModuleType,
     storemod: ModuleType,
     bus: _FeedsBus,
-    storage: Storage,
+    storage: StorageClient,
     mkt: MktPair,
     shms: dict[int, ShmArray],
     sampler_stream: tractor.MsgStream,
@@ -406,6 +407,7 @@ async def tsdb_backfill(
     fqme: str = mkt.fqme
 
     # start history anal and load missing new data via backend.
+    timeframe: int
     for timeframe, shm in shms.items():
         # loads a (large) frame of data from the tsdb depending
         # on the db's query size limit.
@@ -527,7 +529,7 @@ async def back_load_from_tsdb(
             len(tsdb_history)
         ):
             # load the first (smaller) bit of history originally loaded
-            # above from ``Storage.load()``.
+            # above from ``StorageClient.load()``.
             to_push = tsdb_history[-prepend_start:]
             shm.push(
                 to_push,
@@ -645,7 +647,7 @@ async def manage_history(
         key=f'piker.{service}[{uuid[:16]}].{fqme}.hist',
 
         # use any broker defined ohlc dtype:
-        dtype=getattr(mod, '_ohlc_dtype', base_iohlc_dtype),
+        dtype=getattr(mod, '_ohlc_dtype', def_iohlcv_fields),
 
         # we expect the sub-actor to write
         readonly=False,
@@ -662,7 +664,7 @@ async def manage_history(
         key=f'piker.{service}[{uuid[:16]}].{fqme}.rt',
 
         # use any broker defined ohlc dtype:
-        dtype=getattr(mod, '_ohlc_dtype', base_iohlc_dtype),
+        dtype=getattr(mod, '_ohlc_dtype', def_iohlcv_fields),
 
         # we expect the sub-actor to write
         readonly=False,

From 9859f601ca26138461761bdef9a06bd5976cf4a5 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Wed, 31 May 2023 18:07:00 -0400
Subject: [PATCH 12/85] Invert data provider's OHLCV field defs

Turns out the reason we were originally making the `time: float` column in our
ohlcv arrays was bc that's what **only** ib uses XD (and/or :facepalm:)

Instead we changed the default field type to be an `int` (which is also
more correct to avoid `float` rounding/precision discrepancies) and thus
**do not need to override it** in all other (crypto) backends (except
`ib`). Now we only do the customization (via `._ohlc_dtype`) to `float`
only for `ib` for now (though pretty sure we can also not do that
eventually as well..)!
---
 piker/brokers/binance.py         | 33 +++++++---------
 piker/brokers/deribit/api.py     | 68 +++++++++++++-------------------
 piker/brokers/deribit/feed.py    |  1 -
 piker/brokers/ib/api.py          | 26 +++++++++++-
 piker/brokers/kraken/__init__.py |  7 +---
 piker/brokers/kraken/api.py      | 31 +++++----------
 piker/brokers/kraken/broker.py   |  2 +-
 piker/brokers/kraken/feed.py     |  2 +-
 8 files changed, 79 insertions(+), 91 deletions(-)

diff --git a/piker/brokers/binance.py b/piker/brokers/binance.py
index 48b28d6f6..6242d0ba9 100644
--- a/piker/brokers/binance.py
+++ b/piker/brokers/binance.py
@@ -58,9 +58,10 @@
     log,
     get_console_log,
 )
-from ..data.types import Struct
-from ..data.validate import FeedInit
-from ..data._web_bs import (
+from piker.data.types import Struct
+from piker.data.validate import FeedInit
+from piker.data import def_iohlcv_fields
+from piker.data._web_bs import (
     open_autorecon_ws,
     NoBsWs,
 )
@@ -70,30 +71,21 @@
 
 
 # Broker specific ohlc schema (rest)
-_ohlc_dtype = [
-    ('index', int),
-    ('time', int),
-    ('open', float),
-    ('high', float),
-    ('low', float),
-    ('close', float),
-    ('volume', float),
-    ('bar_wap', float),  # will be zeroed by sampler if not filled
-
-    # XXX: some additional fields are defined in the docs:
-    # https://binance-docs.github.io/apidocs/spot/en/#kline-candlestick-data
+# XXX TODO? some additional fields are defined in the docs:
+# https://binance-docs.github.io/apidocs/spot/en/#kline-candlestick-data
 
+# _ohlc_dtype = [
     # ('close_time', int),
     # ('quote_vol', float),
     # ('num_trades', int),
     # ('buy_base_vol', float),
     # ('buy_quote_vol', float),
     # ('ignore', float),
-]
+# ]
 
 # UI components allow this to be declared such that additional
 # (historical) fields can be exposed.
-ohlc_dtype = np.dtype(_ohlc_dtype)
+# ohlc_dtype = np.dtype(_ohlc_dtype)
 
 _show_wap_in_history = False
 
@@ -330,7 +322,7 @@ async def bars(
             bar.typecast()
 
             row = []
-            for j, (name, ftype) in enumerate(_ohlc_dtype[1:]):
+            for j, (name, ftype) in enumerate(def_iohlcv_fields[1:]):
 
                 # TODO: maybe we should go nanoseconds on all
                 # history time stamps?
@@ -343,7 +335,10 @@ async def bars(
 
             new_bars.append((i,) + tuple(row))
 
-        array = np.array(new_bars, dtype=_ohlc_dtype) if as_np else bars
+        array = np.array(
+            new_bars,
+            dtype=def_iohlcv_fields,
+        ) if as_np else bars
         return array
 
 
diff --git a/piker/brokers/deribit/api.py b/piker/brokers/deribit/api.py
index 4159b18a7..93d4c498f 100644
--- a/piker/brokers/deribit/api.py
+++ b/piker/brokers/deribit/api.py
@@ -18,43 +18,33 @@
 Deribit backend.
 
 '''
-import json
-import time
 import asyncio
-
-from contextlib import asynccontextmanager as acm, AsyncExitStack
-from functools import partial
+from contextlib import (
+    asynccontextmanager as acm,
+)
 from datetime import datetime
-from typing import Any, Optional, Iterable, Callable
+from functools import partial
+import time
+from typing import (
+    Any,
+    Optional,
+    Callable,
+)
 
 import pendulum
-import asks
 import trio
-from trio_typing import Nursery, TaskStatus
+from trio_typing import TaskStatus
 from fuzzywuzzy import process as fuzzy
 import numpy as np
-
-from piker.data.types import Struct
-from piker.data._web_bs import (
-    NoBsWs,
-    open_autorecon_ws,
-    open_jsonrpc_session
-)
-
-from .._util import resproc
-
-from piker import config
-from piker.log import get_logger
-
 from tractor.trionics import (
     broadcast_receiver,
-    BroadcastReceiver,
     maybe_open_context
 )
 from tractor import to_asyncio
-
+# XXX WOOPS XD
+# yeah you'll need to install it since it was removed in #489 by
+# accident; well i thought we had removed all usage..
 from cryptofeed import FeedHandler
-
 from cryptofeed.defines import (
     DERIBIT,
     L1_BOOK, TRADES,
@@ -62,6 +52,17 @@
 )
 from cryptofeed.symbols import Symbol
 
+from piker.data.types import Struct
+from piker.data import def_iohlcv_fields
+from piker.data._web_bs import (
+    open_jsonrpc_session
+)
+
+
+from piker import config
+from piker.log import get_logger
+
+
 log = get_logger(__name__)
 
 
@@ -75,26 +76,13 @@
 _testnet_ws_url = 'wss://test.deribit.com/ws/api/v2'
 
 
-# Broker specific ohlc schema (rest)
-_ohlc_dtype = [
-    ('index', int),
-    ('time', int),
-    ('open', float),
-    ('high', float),
-    ('low', float),
-    ('close', float),
-    ('volume', float),
-    ('bar_wap', float),  # will be zeroed by sampler if not filled
-]
-
-
 class JSONRPCResult(Struct):
     jsonrpc: str = '2.0'
     id: int
     result: Optional[dict] = None
     error: Optional[dict] = None
-    usIn: int 
-    usOut: int 
+    usIn: int
+    usOut: int
     usDiff: int
     testnet: bool
 
@@ -405,7 +393,7 @@ async def bars(
 
             new_bars.append((i,) + tuple(row))
 
-        array = np.array(new_bars, dtype=_ohlc_dtype) if as_np else klines
+        array = np.array(new_bars, dtype=def_iohlcv_fields) if as_np else klines
         return array
 
     async def last_trades(
diff --git a/piker/brokers/deribit/feed.py b/piker/brokers/deribit/feed.py
index a94204020..04357ef84 100644
--- a/piker/brokers/deribit/feed.py
+++ b/piker/brokers/deribit/feed.py
@@ -39,7 +39,6 @@
 )
 
 from cryptofeed import FeedHandler
-
 from cryptofeed.defines import (
     DERIBIT, L1_BOOK, TRADES, OPTION, CALL, PUT
 )
diff --git a/piker/brokers/ib/api.py b/piker/brokers/ib/api.py
index 8636ddd26..8fc8c6519 100644
--- a/piker/brokers/ib/api.py
+++ b/piker/brokers/ib/api.py
@@ -73,12 +73,34 @@
 from ib_insync.client import Client as ib_Client
 import numpy as np
 
+# TODO: in hindsight, probably all imports should be
+# non-relative for backends so that non-builting backends
+# can be easily modelled after this style B)
 from piker import config
 from piker.brokers._util import (
     log,
     get_logger,
 )
-from piker.data._source import base_ohlc_dtype
+
+# Broker specific ohlc schema which includes a vwap field
+_ohlc_dtype: list[tuple[str, type]] = [
+    ('index', int),
+
+    # NOTE XXX: only part that's diff
+    # from our default fields where
+    # time is normally an int.
+    # TODO: can we just cast to this
+    # at np.ndarray load time?
+    ('time', float),
+
+    ('open', float),
+    ('high', float),
+    ('low', float),
+    ('close', float),
+    ('volume', float),
+    ('count', int),
+    ('bar_wap', float),  # Wait do we need this?
+]
 
 
 _time_units = {
@@ -295,7 +317,7 @@ def bars_to_np(bars: list) -> np.ndarray:
 
     nparr = np.array(
         np_ready,
-        dtype=base_ohlc_dtype,
+        dtype=_ohlc_dtype,
     )
     assert nparr['time'][0] == bars[0].date.timestamp()
     assert nparr['time'][-1] == bars[-1].date.timestamp()
diff --git a/piker/brokers/kraken/__init__.py b/piker/brokers/kraken/__init__.py
index cd04c950f..0589981b7 100644
--- a/piker/brokers/kraken/__init__.py
+++ b/piker/brokers/kraken/__init__.py
@@ -25,11 +25,6 @@
   wrapping around ``ib_insync``.
 
 '''
-
-from piker.log import get_logger
-
-log = get_logger(__name__)
-
 from .api import (
     get_client,
 )
@@ -44,8 +39,10 @@
     norm_trade_records,
 )
 
+
 __all__ = [
     'get_client',
+    'get_mkt_info',
     'trades_dialogue',
     'open_history_client',
     'open_symbol_search',
diff --git a/piker/brokers/kraken/api.py b/piker/brokers/kraken/api.py
index 1ebdb7597..de2be68c1 100644
--- a/piker/brokers/kraken/api.py
+++ b/piker/brokers/kraken/api.py
@@ -41,6 +41,7 @@
 
 from piker import config
 from piker.data.types import Struct
+from piker.data import def_iohlcv_fields
 from piker.accounting._mktinfo import (
     Asset,
     digits_to_dec,
@@ -52,29 +53,15 @@
     DataThrottle,
 )
 from piker.accounting import Transaction
-from . import log
+from piker.log import get_logger
+
+log = get_logger('piker.brokers.kraken')
 
 # <uri>/<version>/
 _url = 'https://api.kraken.com/0'
-
-
-# Broker specific ohlc schema which includes a vwap field
-_ohlc_dtype = [
-    ('index', int),
-    ('time', int),
-    ('open', float),
-    ('high', float),
-    ('low', float),
-    ('close', float),
-    ('volume', float),
-    ('count', int),
-    ('bar_wap', float),
-]
-
-# UI components allow this to be declared such that additional
-# (historical) fields can be exposed.
-ohlc_dtype = np.dtype(_ohlc_dtype)
-
+# TODO: this is the only backend providing this right?
+# in which case we should drop it from the defaults and
+# instead make a custom fields descr in this module!
 _show_wap_in_history = True
 _symbol_info_translation: dict[str, str] = {
     'tick_decimals': 'pair_decimals',
@@ -622,11 +609,11 @@ async def bars(
                 new_bars.append(
                     (i,) + tuple(
                         ftype(bar[j]) for j, (name, ftype) in enumerate(
-                            _ohlc_dtype[1:]
+                            def_iohlcv_fields[1:]
                         )
                     )
                 )
-            array = np.array(new_bars, dtype=_ohlc_dtype) if as_np else bars
+            array = np.array(new_bars, dtype=def_iohlcv_fields) if as_np else bars
             return array
         except KeyError:
             errmsg = json['error'][0]
diff --git a/piker/brokers/kraken/broker.py b/piker/brokers/kraken/broker.py
index 28f5d026c..96ab77ae2 100644
--- a/piker/brokers/kraken/broker.py
+++ b/piker/brokers/kraken/broker.py
@@ -63,8 +63,8 @@
     BrokerdPosition,
     BrokerdStatus,
 )
-from . import log
 from .api import (
+    log,
     Client,
     BrokerError,
     get_client,
diff --git a/piker/brokers/kraken/feed.py b/piker/brokers/kraken/feed.py
index 526590fe0..02b2866af 100644
--- a/piker/brokers/kraken/feed.py
+++ b/piker/brokers/kraken/feed.py
@@ -54,8 +54,8 @@
 from piker.data.types import Struct
 from piker.data.validate import FeedInit
 from piker.data._web_bs import open_autorecon_ws, NoBsWs
-from . import log
 from .api import (
+    log,
     Client,
     Pair,
 )

From 7d1cc47db9ced572b40b2be912f7e3cf9a66a40e Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Wed, 31 May 2023 18:28:14 -0400
Subject: [PATCH 13/85] ROFL, even using `pymarketstore`'s json-RPC it's
 borked..

Turns out trying to switch to the old sync client and going back to
using the old json-RPC API (after having had to patch the upstream repo
to not import gRPC machinery to avoid crashes..) I'm basically getting
the exact same issues.

New tinkering results does possibly tell some new stuff:
- the EOF error seems to indeed be due to trying fetch records which haven't been
  written (properly) - like asking for a `end=<epoch_int>` that is
  earlier then the earliest record.
- the "snappy input corrupt" error seems to have something to do with
  the `Params.end` field not being an `int` and/or the int precision not
  being chosen correctly?
  - toying with this a bunch manually shows that the internals of the
    client (particularly `.build_query()` stuff) is parsing/calcing the
    `Epoch` and `Nanoseconds` values out incorrectly.. which is likely
    part of the problem.
  - we also changed `anyio_marketstore.MarketStoreclient.build_query()`
    logic when removing `pandas` a while back, which also seems to be
    part of the problem on the async side, however reverting those
    changes also didn't fix the issue entirely; likely something else
    more subtle going on (maybe with the write vs. read `Epoch` field
    type we pass?).

Despite all this malarky, we're already underway more or less obsoleting
this whole thing with a much less complex approach of using apache
parquet files and modern filesystem tools to get a more flexible and
numerics-native dataframe-oriented tsdb B)
---
 piker/storage/marketstore/__init__.py | 118 ++++++++++++++++----------
 1 file changed, 75 insertions(+), 43 deletions(-)

diff --git a/piker/storage/marketstore/__init__.py b/piker/storage/marketstore/__init__.py
index d1a3d67f9..d435fb66a 100644
--- a/piker/storage/marketstore/__init__.py
+++ b/piker/storage/marketstore/__init__.py
@@ -46,7 +46,7 @@
 import pendulum
 import purerpc
 
-from ..service.marketstore import (
+from piker.service.marketstore import (
     MarketstoreClient,
     tf_in_1s,
     mk_tbk,
@@ -58,7 +58,7 @@
     MarketstoreClient,
     Params,
 )
-from ..log import get_logger
+from piker.log import get_logger
 # from .._profile import Profiler
 
 
@@ -107,7 +107,6 @@ async def load(
         datetime | None,  # first dt
         datetime | None,  # last dt
     ]:
-
         first_tsdb_dt, last_tsdb_dt = None, None
         hist = await self.read_ohlcv(
             fqme,
@@ -119,10 +118,13 @@ async def load(
         log.info(f'Loaded tsdb history {hist}')
 
         if len(hist):
-            times = hist['Epoch']
+            # breakpoint()
+            times: np.ndarray = hist['Epoch']
+
             first, last = times[0], times[-1]
             first_tsdb_dt, last_tsdb_dt = map(
-                pendulum.from_timestamp, [first, last]
+                pendulum.from_timestamp,
+                [first, last]
             )
 
         return (
@@ -135,53 +137,82 @@ async def read_ohlcv(
         self,
         fqme: str,
         timeframe: int | str,
-        end: int | None = None,
-        limit: int = int(800e3),
+        end: float | None = None,  # epoch or none
+        limit: int = int(200e3),
 
     ) -> np.ndarray:
 
         client = self.client
         syms = await client.list_symbols()
-
         if fqme not in syms:
             return {}
 
+        # ensure end time is in correct int format!
+        if (
+            end
+            and not isinstance(end, float)
+        ):
+            end = int(float(end))
+            # breakpoint()
+
         # use the provided timeframe or 1s by default
         tfstr = tf_in_1s.get(timeframe, tf_in_1s[1])
 
-        params = Params(
+        import pymarketstore as pymkts
+        sync_client = pymkts.Client()
+        param = pymkts.Params(
             symbols=fqme,
             timeframe=tfstr,
             attrgroup='OHLCV',
             end=end,
-            # limit_from_start=True,
 
-            # TODO: figure the max limit here given the
-            # ``purepc`` msg size limit of purerpc: 33554432
             limit=limit,
+            # limit_from_start=True,
         )
-
-        for i in range(3):
-            try:
-                result = await client.query(params)
-                break
-            except purerpc.grpclib.exceptions.UnknownError as err:
-                if 'snappy' in err.args:
-                    await tractor.breakpoint()
-
-                # indicate there is no history for this timeframe
-                log.exception(
-                    f'Unknown mkts QUERY error: {params}\n'
-                    f'{err.args}'
-                )
-        else:
-            return {}
-
-        # TODO: it turns out column access on recarrays is actually slower:
-        # https://jakevdp.github.io/PythonDataScienceHandbook/02.09-structured-data-numpy.html#RecordArrays:-Structured-Arrays-with-a-Twist
-        # it might make sense to make these structured arrays?
-        data_set = result.by_symbols()[fqme]
-        array = data_set.array
+        try:
+            reply = sync_client.query(param)
+        except Exception as err:
+            if 'no files returned from query parse: None' in err.args:
+                return []
+
+            raise
+
+        data_set: pymkts.results.DataSet = reply.first()
+        array: np.ndarray = data_set.array
+
+        # params = Params(
+        #     symbols=fqme,
+        #     timeframe=tfstr,
+        #     attrgroup='OHLCV',
+        #     end=end,
+        #     # limit_from_start=True,
+
+        #     # TODO: figure the max limit here given the
+        #     # ``purepc`` msg size limit of purerpc: 33554432
+        #     limit=limit,
+        # )
+
+        # for i in range(3):
+        #     try:
+        #         result = await client.query(params)
+        #         break
+        #     except purerpc.grpclib.exceptions.UnknownError as err:
+        #         if 'snappy' in err.args:
+        #             await tractor.breakpoint()
+
+        #         # indicate there is no history for this timeframe
+        #         log.exception(
+        #             f'Unknown mkts QUERY error: {params}\n'
+        #             f'{err.args}'
+        #         )
+        # else:
+        #     return {}
+
+        # # TODO: it turns out column access on recarrays is actually slower:
+        # # https://jakevdp.github.io/PythonDataScienceHandbook/02.09-structured-data-numpy.html#RecordArrays:-Structured-Arrays-with-a-Twist
+        # # it might make sense to make these structured arrays?
+        # data_set = result.by_symbols()[fqme]
+        # array = data_set.array
 
         # XXX: ensure sample rate is as expected
         time = data_set.array['Epoch']
@@ -191,19 +222,20 @@ async def read_ohlcv(
 
             if time_step != ts:
                 log.warning(
-                    f'MKTS BUG: wrong timeframe loaded: {time_step}'
-                    'YOUR DATABASE LIKELY CONTAINS BAD DATA FROM AN OLD BUG'
+                    f'MKTS BUG: wrong timeframe loaded: {time_step}\n'
+                    'YOUR DATABASE LIKELY CONTAINS BAD DATA FROM AN OLD BUG '
                     f'WIPING HISTORY FOR {ts}s'
                 )
-                await self.delete_ts(fqme, timeframe)
+                await tractor.breakpoint()
+                # await self.delete_ts(fqme, timeframe)
 
                 # try reading again..
-                return await self.read_ohlcv(
-                    fqme,
-                    timeframe,
-                    end,
-                    limit,
-                )
+                # return await self.read_ohlcv(
+                #     fqme,
+                #     timeframe,
+                #     end,
+                #     limit,
+                # )
 
         return array
 

From 94733c4a0bd77b2d509dad60f0f0c87cf9ae0804 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Wed, 31 May 2023 18:39:41 -0400
Subject: [PATCH 14/85] A PoC tsdb prototype: `parqdb` using `polars`

Turns out just (over)writing `.parquet` files with >= 1M datums is like
less then a second, and we can likely speed up appends using
`fastparquet` (usage coming soon).

Includes:
- a new `clone` CLI subcmd to test this all out by ad-hoc copy of
  (literally hardcoded to a daemon-actor specific shm allocation X) an
  existing `/dev/shm/<ShmArray>` and push to `.parquet` file.
  - code to convert from our `ShmArray.array: np.ndarray` ->
    `polars.DataFrame` (thanks SO).
  - timing checks around the file IO and np -> polars conversion.
- a `read` subcmd which i was using to test the sync `pymarketstore`
  client against our async one to see if the issues from
  https://github.com/pikers/piker/issues/443 were resolved, but nope!
---
 piker/storage/cli.py | 193 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 189 insertions(+), 4 deletions(-)

diff --git a/piker/storage/cli.py b/piker/storage/cli.py
index 352db2cd5..3afb696a4 100644
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@@ -19,13 +19,18 @@
 
 """
 from __future__ import annotations
+from pathlib import Path
 from typing import TYPE_CHECKING
-import trio
+
+import numpy as np
+import pendulum
 from rich.console import Console
+import trio
 # from rich.markdown import Markdown
 import typer
 
-from ..cli import cli
+from piker.service import open_piker_runtime
+from piker.cli import cli
 from . import (
     log,
 )
@@ -44,7 +49,7 @@ def ls(
         help='Storage backends to query, default is all.'
     ),
 ):
-    from piker.service import open_piker_runtime
+    # from piker.service import open_piker_runtime
     from . import (
         __tsdbs__,
         open_storage_client,
@@ -132,7 +137,6 @@ def delete(
     ``symbols``.
 
     '''
-    from piker.service import open_piker_runtime
     from . import open_storage_client
 
     async def main(symbols: list[str]):
@@ -157,5 +161,186 @@ async def main(symbols: list[str]):
     trio.run(main, symbols)
 
 
+@store.command()
+def read(
+    fqme: str,
+
+    limit: int = int(800e3),
+    client_type: str = 'async',
+
+) -> np.ndarray:
+
+    end: int | None = None
+
+    if client_type == 'sync':
+        import pymarketstore as pymkts
+        cli = pymkts.Client()
+
+
+        while end != 0:
+            param = pymkts.Params(
+                fqme,
+                '1Min',
+                'OHLCV',
+                limit=limit,
+                # limit_from_start=True,
+                end=end,
+            )
+            if end is not None:
+                breakpoint()
+            reply = cli.query(param)
+            ds: pymkts.results.DataSet = reply.first()
+            array: np.ndarray = ds.array
+
+            print(f'loaded {len(array)}-len array:\n{array}')
+
+            times = array['Epoch']
+            end: float = float(times[0])
+            dt = pendulum.from_timestamp(end)
+            # end: str = dt.isoformat('T')
+            breakpoint()
+            print(
+                f'trying to load next {limit} datums frame starting @ {dt}'
+            )
+    else:
+        from anyio_marketstore import (  # noqa
+            open_marketstore_client,
+            MarketstoreClient,
+            Params,
+        )
+        async def main():
+
+            end: int | None = None
+
+            async with open_marketstore_client(
+                'localhost',
+                5995,
+            ) as client:
+
+                while end != 0:
+                    params = Params(
+                        symbols=fqme,
+                        # timeframe=tfstr,
+                        timeframe='1Min',
+                        attrgroup='OHLCV',
+                        end=end,
+                        # limit_from_start=True,
+
+                        # TODO: figure the max limit here given the
+                        # ``purepc`` msg size limit of purerpc: 33554432
+                        limit=limit,
+                    )
+
+                    if end is not None:
+                        breakpoint()
+                    result = await client.query(params)
+                    data_set = result.by_symbols()[fqme]
+                    array = data_set.array
+                    times = array['Epoch']
+                    end: float = float(times[0])
+                    dt = pendulum.from_timestamp(end)
+                    breakpoint()
+                    print(
+                        f'trying to load next {limit} datums frame starting @ {dt}'
+                    )
+
+        trio.run(main)
+
+
+@store.command()
+def clone(
+    fqme: str,
+) -> None:
+    import time
+    from piker.config import get_conf_dir
+    from piker.data import (
+        maybe_open_shm_array,
+        def_iohlcv_fields,
+    )
+    import polars as pl
+
+    # open existing shm buffer for kucoin backend
+    key: str = 'piker.brokerd[d07c9bb7-b720-41].tlosusdt.kucoin.hist'
+    shmpath: Path = Path('/dev/shm') / key
+    assert shmpath.is_file()
+
+    async def main():
+        async with (
+            open_piker_runtime(
+                'polars_boi',
+                enable_modules=['piker.data._sharedmem'],
+            ),
+        ):
+            # attach to any shm buffer, load array into polars df,
+            # write to local parquet file.
+            shm, opened = maybe_open_shm_array(
+                key=key,
+                dtype=def_iohlcv_fields,
+            )
+            assert not opened
+            ohlcv = shm.array
+
+            start = time.time()
+
+            # XXX: thanks to this SO answer for this conversion tip:
+            # https://stackoverflow.com/a/72054819
+            df = pl.DataFrame({
+                field_name: ohlcv[field_name]
+                for field_name in ohlcv.dtype.fields
+            })
+            delay: float = round(
+                time.time() - start,
+                ndigits=6,
+            )
+            print(
+                f'numpy -> polars conversion took {delay} secs\n'
+                f'polars df: {df}'
+            )
+
+            # compute ohlc properties for naming
+            times: np.ndarray = ohlcv['time']
+            secs: float = times[-1] - times[-2]
+            if secs < 1.:
+                breakpoint()
+                raise ValueError(
+                    f'Something is wrong with time period for {shm}:\n{ohlcv}'
+                )
+
+            timeframe: str = f'{secs}s'
+
+            # write to parquet file
+            datadir: Path = get_conf_dir() / 'parqdb'
+            if not datadir.is_dir():
+                datadir.mkdir()
+
+            path: Path = datadir / f'{fqme}.{timeframe}.parquet'
+
+            # write to fs
+            start = time.time()
+            df.write_parquet(path)
+            delay: float = round(
+                time.time() - start,
+                ndigits=6,
+            )
+            print(
+                f'parquet write took {delay} secs\n'
+                f'file path: {path}'
+            )
+
+            # read back from fs
+            start = time.time()
+            read_df: pl.DataFrame = pl.read_parquet(path)
+            delay: float = round(
+                time.time() - start,
+                ndigits=6,
+            )
+            print(
+                f'parquet read took {delay} secs\n'
+                f'polars df: {read_df}'
+            )
+
+    trio.run(main)
+
+
 typer_click_object = typer.main.get_command(store)
 cli.add_command(typer_click_object, 'store')

From 8de92179daff95d3538644fb52cda845136ec837 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 1 Jun 2023 11:49:02 -0400
Subject: [PATCH 15/85] kucoin: fix missing default fields def import

---
 piker/brokers/kucoin.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/piker/brokers/kucoin.py b/piker/brokers/kucoin.py
index 998ee3ed9..2ab4a785c 100755
--- a/piker/brokers/kucoin.py
+++ b/piker/brokers/kucoin.py
@@ -55,14 +55,15 @@
     digits_to_dec,
     MktPair,
 )
-from piker.data.validate import FeedInit
 from piker import config
 from piker._cacheables import (
     open_cached_client,
     async_lifo_cache,
 )
 from piker.log import get_logger
+from piker.data.validate import FeedInit
 from piker.data.types import Struct
+from piker.data import def_iohlcv_fields
 from piker.data._web_bs import (
     open_autorecon_ws,
     NoBsWs,
@@ -524,7 +525,7 @@ async def _get_bars(
             )
 
         array = np.array(
-            new_bars, dtype=_ohlc_dtype) if as_np else bars
+            new_bars, dtype=def_iohlcv_fields) if as_np else bars
         return array
 
 

From 7b4f4bf804d2364d31be3527cfe95d6a587aca90 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Fri, 2 Jun 2023 12:02:49 -0400
Subject: [PATCH 16/85] First draft `.storage.nativedb.` using parquet files

After much frustration with a particular tsdb (cough) this instead
implements a new native-file (and apache tech based) backend which
stores time series in parquet files (for now) using the `polars` apis
(since we plan to use that lib as well for processing).

Note this code is currently **very** rough and in draft mode.

Details:
- add conversion routines for going from `polars.DataFrame` to
  `numpy.ndarray` and back.
- lay out a simple file-name as series key symbology:
  `fqme.<datadescriptions>.parquet`, though probably it will evolve.
- implement the entire `StorageClient` interface as it stands.
- adjust `storage.cli` cmds to instead expect to use this new backend,
  which means it's a complete mess XD

Main benefits/motivation:
- wayy faster load times with no "datums to load limit" required.
- smaller space footprint and we haven't even touched compression
  settings yet!
- wayyy more compatible with other systems which can lever the apache
  ecosystem.
- gives us finer grained control over the filesystem usage so we can
  choose to swap out stuff like the replication system or networking
  access.
---
 piker/storage/__init__.py             |   5 +-
 piker/storage/cli.py                  | 184 ++++++++-------
 piker/storage/marketstore/__init__.py |   2 +-
 piker/storage/nativedb.py             | 309 ++++++++++++++++++++++++++
 setup.py                              |   2 +
 5 files changed, 419 insertions(+), 83 deletions(-)
 create mode 100644 piker/storage/nativedb.py

diff --git a/piker/storage/__init__.py b/piker/storage/__init__.py
index 21e258a68..cca77c694 100644
--- a/piker/storage/__init__.py
+++ b/piker/storage/__init__.py
@@ -157,7 +157,7 @@ def get_storagemod(name: str) -> ModuleType:
 
 @acm
 async def open_storage_client(
-    name: str | None = None,
+    name: str = 'nativedb',
 
 ) -> tuple[ModuleType, StorageClient]:
     '''
@@ -168,6 +168,9 @@ async def open_storage_client(
 
     # load root config and any tsdb user defined settings
     conf, path = config.load('conf', touch_if_dne=True)
+
+    # TODO: maybe not under a "network" section.. since
+    # no more chitty mkts..
     net = conf.get('network')
     if net:
         tsdbconf = net.get('tsdb')
diff --git a/piker/storage/cli.py b/piker/storage/cli.py
index 3afb696a4..11d2b4901 100644
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@@ -93,7 +93,8 @@ async def del_ts_by_timeframe(
 
 ) -> None:
 
-    resp = await client.delete_ts(fqme, timeframe)
+    path: Path = await client.delete_ts(fqme, timeframe)
+    log.info(f'Deleted {path}')
 
     # TODO: encapsulate per backend errors..
     # - MEGA LOL, apparently the symbols don't
@@ -105,15 +106,15 @@ async def del_ts_by_timeframe(
     # for the delete errurz..llululu
     # if fqme not in syms:
     #     log.error(f'Pair {fqme} dne in DB')
-    msgish = resp.ListFields()[0][1]
-    if 'error' in str(msgish):
-        log.error(
-            f'Deletion error:\n'
-            f'backend: {client.name}\n'
-            f'fqme: {fqme}\n'
-            f'timeframe: {timeframe}s\n'
-            f'Error msg:\n\n{msgish}\n',
-        )
+    # msgish = resp.ListFields()[0][1]
+    # if 'error' in str(msgish):
+    #     log.error(
+    #         f'Deletion error:\n'
+    #         f'backend: {client.name}\n'
+    #         f'fqme: {fqme}\n'
+    #         f'timeframe: {timeframe}s\n'
+    #         f'Error msg:\n\n{msgish}\n',
+    #     )
 
 
 @store.command()
@@ -166,85 +167,106 @@ def read(
     fqme: str,
 
     limit: int = int(800e3),
-    client_type: str = 'async',
+    # client_type: str = 'async',
 
 ) -> np.ndarray:
 
-    end: int | None = None
+    # end: int | None = None
+    # import tractor
+    from .nativedb import get_client
 
-    if client_type == 'sync':
-        import pymarketstore as pymkts
-        cli = pymkts.Client()
-
-
-        while end != 0:
-            param = pymkts.Params(
+    async def main():
+        async with get_client() as client:
+            syms: list[str] = await client.list_keys()
+
+            (
+                history,
+                first_dt,
+                last_dt,
+            ) = await client.load(
                 fqme,
-                '1Min',
-                'OHLCV',
-                limit=limit,
-                # limit_from_start=True,
-                end=end,
+                60,
             )
-            if end is not None:
-                breakpoint()
-            reply = cli.query(param)
-            ds: pymkts.results.DataSet = reply.first()
-            array: np.ndarray = ds.array
-
-            print(f'loaded {len(array)}-len array:\n{array}')
-
-            times = array['Epoch']
-            end: float = float(times[0])
-            dt = pendulum.from_timestamp(end)
-            # end: str = dt.isoformat('T')
+            assert first_dt < last_dt
+            print(f'{fqme} SIZE -> {history.size}')
             breakpoint()
-            print(
-                f'trying to load next {limit} datums frame starting @ {dt}'
-            )
-    else:
-        from anyio_marketstore import (  # noqa
-            open_marketstore_client,
-            MarketstoreClient,
-            Params,
-        )
-        async def main():
-
-            end: int | None = None
-
-            async with open_marketstore_client(
-                'localhost',
-                5995,
-            ) as client:
-
-                while end != 0:
-                    params = Params(
-                        symbols=fqme,
-                        # timeframe=tfstr,
-                        timeframe='1Min',
-                        attrgroup='OHLCV',
-                        end=end,
-                        # limit_from_start=True,
-
-                        # TODO: figure the max limit here given the
-                        # ``purepc`` msg size limit of purerpc: 33554432
-                        limit=limit,
-                    )
+            # await tractor.breakpoint()
 
-                    if end is not None:
-                        breakpoint()
-                    result = await client.query(params)
-                    data_set = result.by_symbols()[fqme]
-                    array = data_set.array
-                    times = array['Epoch']
-                    end: float = float(times[0])
-                    dt = pendulum.from_timestamp(end)
-                    breakpoint()
-                    print(
-                        f'trying to load next {limit} datums frame starting @ {dt}'
-                    )
+    trio.run(main)
 
-        trio.run(main)
+    # if client_type == 'sync':
+    #     import pymarketstore as pymkts
+    #     cli = pymkts.Client()
+
+
+    #     while end != 0:
+    #         param = pymkts.Params(
+    #             fqme,
+    #             '1Min',
+    #             'OHLCV',
+    #             limit=limit,
+    #             # limit_from_start=True,
+    #             end=end,
+    #         )
+    #         if end is not None:
+    #             breakpoint()
+    #         reply = cli.query(param)
+    #         ds: pymkts.results.DataSet = reply.first()
+    #         array: np.ndarray = ds.array
+
+    #         print(f'loaded {len(array)}-len array:\n{array}')
+
+    #         times = array['Epoch']
+    #         end: float = float(times[0])
+    #         dt = pendulum.from_timestamp(end)
+    #         # end: str = dt.isoformat('T')
+    #         breakpoint()
+    #         print(
+    #             f'trying to load next {limit} datums frame starting @ {dt}'
+    #         )
+    # else:
+    #     from anyio_marketstore import (  # noqa
+    #         open_marketstore_client,
+    #         MarketstoreClient,
+    #         Params,
+    #     )
+    #     async def main():
+
+    #         end: int | None = None
+
+    #         async with open_marketstore_client(
+    #             'localhost',
+    #             5995,
+    #         ) as client:
+
+    #             while end != 0:
+    #                 params = Params(
+    #                     symbols=fqme,
+    #                     # timeframe=tfstr,
+    #                     timeframe='1Min',
+    #                     attrgroup='OHLCV',
+    #                     end=end,
+    #                     # limit_from_start=True,
+
+    #                     # TODO: figure the max limit here given the
+    #                     # ``purepc`` msg size limit of purerpc: 33554432
+    #                     limit=limit,
+    #                 )
+
+    #                 if end is not None:
+    #                     breakpoint()
+    #                 result = await client.query(params)
+    #                 data_set = result.by_symbols()[fqme]
+    #                 array = data_set.array
+    #                 times = array['Epoch']
+    #                 end: float = float(times[0])
+    #                 dt = pendulum.from_timestamp(end)
+    #                 breakpoint()
+    #                 print(
+    #                     f'trying to load next {limit} datums frame starting @ {dt}'
+    #                 )
+
+    #     trio.run(main)
 
 
 @store.command()
@@ -260,7 +282,7 @@ def clone(
     import polars as pl
 
     # open existing shm buffer for kucoin backend
-    key: str = 'piker.brokerd[d07c9bb7-b720-41].tlosusdt.kucoin.hist'
+    key: str = 'piker.brokerd[a9e7a4fe-39ae-44].btcusdt.binance.hist'
     shmpath: Path = Path('/dev/shm') / key
     assert shmpath.is_file()
 
diff --git a/piker/storage/marketstore/__init__.py b/piker/storage/marketstore/__init__.py
index d435fb66a..416ef7ebb 100644
--- a/piker/storage/marketstore/__init__.py
+++ b/piker/storage/marketstore/__init__.py
@@ -44,7 +44,7 @@
     Params,
 )
 import pendulum
-import purerpc
+# import purerpc
 
 from piker.service.marketstore import (
     MarketstoreClient,
diff --git a/piker/storage/nativedb.py b/piker/storage/nativedb.py
new file mode 100644
index 000000000..8b151111e
--- /dev/null
+++ b/piker/storage/nativedb.py
@@ -0,0 +1,309 @@
+# piker: trading gear for hackers
+# Copyright (C) Tyler Goodlet (in stewardship for pikers)
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+'''
+`nativedb`: a lulzy Apache-parquet file manager (that some might
+            call a poor man's tsdb).
+
+AKA a `piker`-native file-system native "time series database"
+without needing an extra process and no standard TSDB features, YET!
+
+'''
+# TODO: like there's soo much..
+# - better name like "parkdb" or "nativedb" (lel)? bundle this lib with
+#   others to make full system:
+#   - tractor for failover and reliablity?
+#   - borg for replication and sync?
+#
+# - use `fastparquet` for appends:
+#   https://fastparquet.readthedocs.io/en/latest/api.html#fastparquet.write
+#   (presuming it's actually faster then overwrites and
+#   makes sense in terms of impl?)
+#
+# - use `polars` support for lazy scanning, processing and schema
+#   validation?
+#   - https://pola-rs.github.io/polars-book/user-guide/io/parquet/#scan
+#   - https://pola-rs.github.io/polars-book/user-guide/concepts/lazy-vs-eager/
+# - consider delta writes for appends?
+#   - https://github.com/pola-rs/polars/blob/main/py-polars/polars/dataframe/frame.py#L3232
+# - consider multi-file appends with appropriate time-range naming?
+#   - https://pola-rs.github.io/polars-book/user-guide/io/multiple/
+#
+# - use `borg` for replication?
+#   - https://borgbackup.readthedocs.io/en/stable/quickstart.html#remote-repositories
+#   - https://github.com/borgbackup/borg
+#   - https://borgbackup.readthedocs.io/en/stable/faq.html#usage-limitations
+#   - https://github.com/borgbackup/community
+#   - https://github.com/spslater/borgapi
+#   - https://nixos.wiki/wiki/ZFS
+from __future__ import annotations
+from contextlib import asynccontextmanager as acm
+from datetime import datetime
+from pathlib import Path
+import time
+
+# from bidict import bidict
+# import tractor
+import numpy as np
+import polars as pl
+from pendulum import (
+    from_timestamp,
+)
+
+from piker import config
+from piker.data import def_iohlcv_fields
+# from piker.data import ShmArray
+from piker.log import get_logger
+# from .._profile import Profiler
+
+
+log = get_logger('storage.nativedb')
+
+
+# NOTE: thanks to this SO answer for the below conversion routines
+# to go from numpy struct-arrays to polars dataframes and back:
+# https://stackoverflow.com/a/72054819
+def np2pl(array: np.ndarray) -> pl.DataFrame:
+    return pl.DataFrame({
+        field_name: array[field_name]
+        for field_name in array.dtype.fields
+    })
+
+
+def pl2np(
+    df: pl.DataFrame,
+    dtype: np.dtype,
+) -> np.ndarray:
+
+    # Create numpy struct array of the correct size and dtype
+    # and loop through df columns to fill in array fields.
+    array = np.empty(
+        df.height,
+        dtype,
+    )
+    for field, col in zip(
+        dtype.fields,
+        df.columns,
+    ):
+        array[field] = df.get_column(col).to_numpy()
+
+    return array
+
+
+def mk_ohlcv_shm_keyed_filepath(
+    fqme: str,
+    period: float,  # ow known as the "timeframe"
+    # shm: ShmArray,
+    datadir: Path,
+
+) -> str:
+
+    # calc ohlc sample period for naming
+    # ohlcv: np.ndarray = shm.array
+    # times: np.ndarray = ohlcv['time']
+    # period: float = times[-1] - times[-2]
+    if period < 1.:
+        raise ValueError('Sample period should be >= 1.!?')
+
+    period_s: str = f'{period}s'
+    path: Path = datadir / f'{fqme}.ohlcv{period_s}.parquet'
+    return path
+
+
+ohlc_key_map = None
+
+
+class NativeStorageClient:
+    '''
+    High level storage api for OHLCV time series stored in
+    a (modern) filesystem as apache parquet files B)
+
+    Part of a grander scheme to use arrow and parquet as our main
+    lowlevel data framework: https://arrow.apache.org/faq/.
+
+    '''
+    name: str = 'nativedb'
+
+    def __init__(
+        self,
+        datadir: Path,
+
+    ) -> None:
+        self._datadir = datadir
+        self._index: dict[str, dict] = {}
+
+        # series' cache from tsdb reads
+        self._dfs: dict[str, pl.DataFrame] = {}
+
+    @property
+    def address(self) -> str:
+        return self._datadir.as_uri()
+
+    @property
+    def cardinality(self) -> int:
+        return len(self._index)
+
+    # @property
+    # def compression(self) -> str:
+    #     ...
+
+    async def list_keys(self) -> list[str]:
+        return list(self._index)
+
+    def index_files(self):
+        for path in self._datadir.iterdir():
+            if 'borked' in path.name:
+                continue
+
+            key: str = path.name.rstrip('.parquet')
+            fqme, _, descr = key.rpartition('.')
+            prefix, _, suffix = descr.partition('ohlcv')
+            period: int = int(suffix.strip('s'))
+
+            # cache description data
+            self._index[fqme] = {
+                'path': path,
+                'period': period,
+            }
+
+        return self._index
+
+
+    # async def search_keys(self, pattern: str) -> list[str]:
+    #     '''
+    #     Search for time series key in the storage backend.
+
+    #     '''
+    #     ...
+
+    # async def write_ticks(self, ticks: list) -> None:
+    #     ...
+
+    async def load(
+        self,
+        fqme: str,
+        timeframe: int,
+
+    ) -> tuple[
+        np.ndarray,  # timeframe sampled array-series
+        datetime | None,  # first dt
+        datetime | None,  # last dt
+    ] | None:
+        try:
+            array: np.ndarray = await self.read_ohlcv(
+                fqme,
+                timeframe,
+            )
+        except FileNotFoundError:
+            return None
+
+        times = array['time']
+        return (
+            array,
+            from_timestamp(times[0]),
+            from_timestamp(times[-1]),
+        )
+
+    async def read_ohlcv(
+        self,
+        fqme: str,
+        timeframe: int | str,
+        end: float | None = None,  # epoch or none
+        # limit: int = int(200e3),
+
+    ) -> np.ndarray:
+        path: Path = mk_ohlcv_shm_keyed_filepath(
+            fqme=fqme,
+            period=timeframe,
+            datadir=self._datadir,
+        )
+        df: pl.DataFrame = pl.read_parquet(path)
+
+        # TODO: filter by end and limit inputs
+        # times: pl.Series = df['time']
+
+        return pl2np(
+            df,
+            dtype=np.dtype(def_iohlcv_fields),
+        )
+
+    async def write_ohlcv(
+        self,
+        fqme: str,
+        ohlcv: np.ndarray,
+        timeframe: int,
+        # limit: int = int(800e3),
+
+    ) -> Path:
+
+        path: Path = mk_ohlcv_shm_keyed_filepath(
+            fqme=fqme,
+            period=timeframe,
+            datadir=self._datadir,
+        )
+        df: pl.DataFrame = np2pl(ohlcv)
+
+        # TODO: use a proper profiler
+        start = time.time()
+        df.write_parquet(path)
+        delay: float = round(
+            time.time() - start,
+            ndigits=6,
+        )
+        print(
+            f'parquet write took {delay} secs\n'
+            f'file path: {path}'
+        )
+        return path
+
+    async def delete_ts(
+        self,
+        key: str,
+        timeframe: int | None = None,
+
+    ) -> bool:
+        path: Path = mk_ohlcv_shm_keyed_filepath(
+            fqme=key,
+            period=timeframe,
+            datadir=self._datadir,
+        )
+        path.unlink()
+        return path
+
+
+@acm
+async def get_client(
+
+    # TODO: eventually support something something apache arrow
+    # transport over ssh something..?
+    # host: str | None = None,
+
+    **kwargs,
+
+) -> NativeStorageClient:
+    '''
+    Load a ``anyio_marketstore`` grpc client connected
+    to an existing ``marketstore`` server.
+
+    '''
+    datadir: Path = config.get_conf_dir() / 'nativedb'
+    if not datadir.is_dir():
+        log.info(f'Creating `nativedb` director: {datadir}')
+        datadir.mkdir()
+
+    client = NativeStorageClient(datadir)
+    client.index_files()
+    yield client
diff --git a/setup.py b/setup.py
index c63622b25..cb5d7df8a 100755
--- a/setup.py
+++ b/setup.py
@@ -41,6 +41,7 @@
             'piker = piker.cli:cli',
             'pikerd = piker.cli:pikerd',
             'ledger = piker.accounting.cli:ledger',
+            # 'store = piker.storage.cli:store',
         ]
     },
     install_requires=[
@@ -78,6 +79,7 @@
         'cython',
         'numpy',
         'numba',
+        'polars',  # dataframes
 
         # UI
         'PyQt5',

From 0ba3c798d7ab4bb36bdce28450971bf4187dba17 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Fri, 2 Jun 2023 12:15:33 -0400
Subject: [PATCH 17/85] Drop `bar_wap` from default ohlc field set

Turns out no backend (including kraken) requires it and really this
kinda of measure should be implemented and recorded from our fsp layer
instead of (hackily) sometimes expecting it to be in "source data".
---
 piker/brokers/ib/api.py | 15 +++++++++------
 piker/brokers/kucoin.py |  3 +--
 piker/data/_sampling.py |  5 +++--
 piker/data/_source.py   |  2 +-
 piker/ui/_cursor.py     |  6 +++---
 5 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/piker/brokers/ib/api.py b/piker/brokers/ib/api.py
index 8fc8c6519..a5069e95d 100644
--- a/piker/brokers/ib/api.py
+++ b/piker/brokers/ib/api.py
@@ -82,10 +82,7 @@
     get_logger,
 )
 
-# Broker specific ohlc schema which includes a vwap field
-_ohlc_dtype: list[tuple[str, type]] = [
-    ('index', int),
-
+_bar_load_dtype: list[tuple[str, type]] = [
     # NOTE XXX: only part that's diff
     # from our default fields where
     # time is normally an int.
@@ -99,9 +96,15 @@
     ('close', float),
     ('volume', float),
     ('count', int),
-    ('bar_wap', float),  # Wait do we need this?
 ]
 
+# Broker specific ohlc schema which includes a vwap field
+_ohlc_dtype: list[tuple[str, type]] = _bar_load_dtype.copy()
+_ohlc_dtype.insert(
+    0,
+    ('index', int),
+)
+
 
 _time_units = {
     's': ' sec',
@@ -317,7 +320,7 @@ def bars_to_np(bars: list) -> np.ndarray:
 
     nparr = np.array(
         np_ready,
-        dtype=_ohlc_dtype,
+        dtype=_bar_load_dtype,
     )
     assert nparr['time'][0] == bars[0].date.timestamp()
     assert nparr['time'][-1] == bars[-1].date.timestamp()
diff --git a/piker/brokers/kucoin.py b/piker/brokers/kucoin.py
index 2ab4a785c..e3ef599e4 100755
--- a/piker/brokers/kucoin.py
+++ b/piker/brokers/kucoin.py
@@ -455,7 +455,6 @@ async def _get_bars(
             ('low', float),
             ('close', float),
             ('volume', float),
-            ('bar_wap', float),  # will be zeroed by sampler if not filled
         ]
 
         '''
@@ -520,7 +519,7 @@ async def _get_bars(
                     # volume
                     float(bar[5]),
                     # bar_wap
-                    0.0,
+                    # 0.0,
                 )
             )
 
diff --git a/piker/data/_sampling.py b/piker/data/_sampling.py
index 20bf9b493..af8216c39 100644
--- a/piker/data/_sampling.py
+++ b/piker/data/_sampling.py
@@ -505,6 +505,7 @@ async def open_sample_stream(
             },
         ) as (ctx, first)
     ):
+        assert len(first) > 1
         async with (
             ctx.open_stream() as istream,
 
@@ -591,14 +592,14 @@ async def sample_and_broadcast(
                             'high',
                             'low',
                             'close',
-                            'bar_wap',  # can be optionally provided
+                            # 'bar_wap',  # can be optionally provided
                             'volume',
                         ]][-1] = (
                             o,
                             max(high, last),
                             min(low, last),
                             last,
-                            quote.get('bar_wap', 0),
+                            # quote.get('bar_wap', 0),
                             volume,
                         )
 
diff --git a/piker/data/_source.py b/piker/data/_source.py
index 7f739d7d0..fc22d6f4e 100644
--- a/piker/data/_source.py
+++ b/piker/data/_source.py
@@ -47,7 +47,7 @@
     # TODO: can we elim this from default field set to save on mem?
     # i think only kraken really uses this in terms of what we get from
     # their ohlc history API?
-    ('bar_wap', float),  # shouldn't be default right?
+    # ('bar_wap', float),  # shouldn't be default right?
 ]
 
 # remove index field
diff --git a/piker/ui/_cursor.py b/piker/ui/_cursor.py
index 0a2c82b1b..f69f503a5 100644
--- a/piker/ui/_cursor.py
+++ b/piker/ui/_cursor.py
@@ -215,8 +215,8 @@ def update_from_ohlc(
             "<b>H</b>:{}<br/>"
             "<b>L</b>:{}<br/>"
             "<b>C</b>:{}<br/>"
-            "<b>V</b>:{}<br/>"
-            "<b>wap</b>:{}".format(
+            "<b>V</b>:{}<br/>".format(
+            # "<b>wap</b>:{}".format(
                 *array[ix][
                     [
                         'time',
@@ -225,7 +225,7 @@ def update_from_ohlc(
                         'low',
                         'close',
                         'volume',
-                        'bar_wap',
+                        # 'bar_wap',
                     ]
                 ],
                 # name=name,

From c52e889fe5f834e31e306c124aed7c9dad622ee6 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Fri, 2 Jun 2023 12:17:31 -0400
Subject: [PATCH 18/85] First draft history loading rework

It was a concurrency-hack mess somewhat due to all sorts of limitations
imposed by marketstore (query size limits, strange datetime/timestamp
errors, slow table loads for large queries..) and we can drastically
simplify. There's still some issues with getting new backfills (not yet
in storage) correctly prepended: there's sometimes little gaps due to shm
races when reading history indexing vs. when the live-feed startup
finishes.

We generally need tests for all this and likely a better rework of the
feed layer's init such that we're showing history in chart afap instead
of waiting on backfills or the live feed to come up.

Much more to come B)
---
 piker/data/feed.py    |   5 +-
 piker/data/history.py | 595 ++++++++++++++++++++++++------------------
 2 files changed, 339 insertions(+), 261 deletions(-)

diff --git a/piker/data/feed.py b/piker/data/feed.py
index 91793440a..0cfdb8487 100644
--- a/piker/data/feed.py
+++ b/piker/data/feed.py
@@ -340,7 +340,7 @@ async def allocate_persistent_feed(
 
     # yield back control to starting nursery once we receive either
     # some history or a real-time quote.
-    log.info(f'waiting on history to load: {fqme}')
+    log.info(f'loading OHLCV history: {fqme}')
     await some_data_ready.wait()
 
     flume = Flume(
@@ -370,7 +370,8 @@ async def allocate_persistent_feed(
         mkt.bs_fqme: flume,
     })
 
-    # signal the ``open_feed_bus()`` caller task to continue
+    # signal the ``open_feed_bus()`` caller task to continue since
+    # we now have (some) history pushed to the shm buffer.
     task_status.started(init)
 
     if not start_stream:
diff --git a/piker/data/history.py b/piker/data/history.py
index 4a0ab29bb..aef3a15fe 100644
--- a/piker/data/history.py
+++ b/piker/data/history.py
@@ -57,6 +57,7 @@
 )
 
 if TYPE_CHECKING:
+    from bidict import bidict
     from ..service.marketstore import StorageClient
     from .feed import _FeedsBus
 
@@ -83,13 +84,13 @@ async def start_backfill(
     mkt: MktPair,
     shm: ShmArray,
     timeframe: float,
-    sampler_stream: tractor.MsgStream,
+    # sampler_stream: tractor.MsgStream,
     feed_is_live: trio.Event,
 
     last_tsdb_dt: datetime | None = None,
     storage: StorageClient | None = None,
     write_tsdb: bool = True,
-    tsdb_is_up: bool = False,
+    tsdb_is_up: bool = True,
 
     task_status: TaskStatus[tuple] = trio.TASK_STATUS_IGNORED,
 
@@ -120,6 +121,13 @@ async def start_backfill(
             - pendulum.from_timestamp(times[-2])
         ).seconds
 
+        if step_size_s not in (1, 60):
+            log.error(f'Last 2 sample period is off!? -> {step_size_s}')
+            step_size_s = (
+                pendulum.from_timestamp(times[-2])
+                - pendulum.from_timestamp(times[-3])
+            ).seconds
+
         # if the market is open (aka we have a live feed) but the
         # history sample step index seems off we report the surrounding
         # data and drop into a bp. this case shouldn't really ever
@@ -158,12 +166,15 @@ async def start_backfill(
         )
 
         log.info(f'Pushing {to_push.size} to shm!')
-        shm.push(to_push, prepend=True)
+        shm.push(
+            to_push,
+            # prepend=True,
+        )
 
         # TODO: *** THIS IS A BUG ***
         # we need to only broadcast to subscribers for this fqme..
         # otherwise all fsps get reset on every chart..
-        await sampler_stream.send('broadcast_all')
+        # await sampler_stream.send('broadcast_all')
 
         # signal that backfilling to tsdb's end datum is complete
         bf_done = trio.Event()
@@ -297,9 +308,13 @@ async def start_backfill(
                     f'{ln} BARS TO PUSH after diff?!: {start_dt} -> {end_dt}'
                 )
 
-            # bail gracefully on shm allocation overrun/full condition
+            # bail gracefully on shm allocation overrun/full
+            # condition
             try:
-                shm.push(to_push, prepend=True)
+                shm.push(
+                    to_push,
+                    prepend=True,
+                )
             except ValueError:
                 log.info(
                     f'Shm buffer overrun on: {start_dt} -> {end_dt}?'
@@ -316,6 +331,7 @@ async def start_backfill(
             if (
                 storage is not None
                 and write_tsdb
+                # and False
             ):
                 log.info(
                     f'Writing {ln} frame to storage:\n'
@@ -334,7 +350,7 @@ async def start_backfill(
 
                 await storage.write_ohlcv(
                     col_sym_key,
-                    to_push,
+                    shm.array,
                     timeframe,
                 )
 
@@ -345,44 +361,165 @@ async def start_backfill(
         # in the block above to avoid entering new ``frames``
         # values while we're pipelining the current ones to
         # memory...
-        await sampler_stream.send('broadcast_all')
+        # await sampler_stream.send('broadcast_all')
 
         # short-circuit (for now)
         bf_done.set()
 
 
-async def basic_backfill(
-    bus: _FeedsBus,
-    mod: ModuleType,
-    mkt: MktPair,
-    shms: dict[int, ShmArray],
-    sampler_stream: tractor.MsgStream,
-    feed_is_live: trio.Event,
+def push_tsdb_history_to_shm(
+    storemod: ModuleType,
+    shm: ShmArray,
+    tsdb_history: np.ndarray,
+    time_field_key: str,
+) -> datetime:
+
+    # TODO: see if there's faster multi-field reads:
+    # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields
+    # re-index  with a `time` and index field
+    prepend_start = shm._first.value
+    to_push = tsdb_history[-prepend_start:]
+    shm.push(
+        to_push,
+
+        # insert the history pre a "days worth" of samples
+        # to leave some real-time buffer space at the end.
+        prepend=True,
+        # update_first=False,
+        # start=prepend_start,
+        field_map=storemod.ohlc_key_map,
+    )
 
-) -> None:
+    log.info(f'Loaded {to_push.shape} datums from storage')
+    tsdb_last_frame_start = tsdb_history[time_field_key][0]
+    return pendulum.from_timestamp(tsdb_last_frame_start)
 
-    # do a legacy incremental backfill from the provider.
-    log.info('No TSDB (marketstored) found, doing basic backfill..')
 
-    # start history backfill task ``backfill_bars()`` is
-    # a required backend func this must block until shm is
-    # filled with first set of ohlc bars
-    for timeframe, shm in shms.items():
-        try:
-            await bus.nursery.start(
-                partial(
-                    start_backfill,
-                    mod,
-                    mkt,
-                    shm,
-                    timeframe,
-                    sampler_stream,
-                    feed_is_live,
-                )
-            )
-        except DataUnavailable:
-            # XXX: timeframe not supported for backend
-            continue
+async def back_load_from_tsdb(
+    storemod: ModuleType,
+    storage: StorageClient,
+
+    fqme: str,
+    # dts_per_tf: dict[int, datetime],
+
+    tsdb_history: np.ndarray,
+
+    last_tsdb_dt: datetime,
+    latest_start_dt: datetime,
+    latest_end_dt: datetime,
+
+    bf_done: trio.Event,
+
+    timeframe: int,
+    shm: ShmArray,
+):
+    assert len(tsdb_history)
+
+    # sync to backend history task's query/load completion
+    # if bf_done:
+    #     await bf_done.wait()
+
+    # TODO: eventually it'd be nice to not require a shm array/buffer
+    # to accomplish this.. maybe we can do some kind of tsdb direct to
+    # graphics format eventually in a child-actor?
+    if storemod.name == 'nativedb':
+        return
+
+        await tractor.breakpoint()
+        assert shm._first.value == 0
+
+    array = shm.array
+
+    # if timeframe == 1:
+    #     times = shm.array['time']
+    #     assert (times[1] - times[0]) == 1
+
+    if len(array):
+        shm_last_dt = pendulum.from_timestamp(
+            shm.array[0]['time']
+        )
+    else:
+        shm_last_dt = None
+
+    if last_tsdb_dt:
+        assert shm_last_dt >= last_tsdb_dt
+
+    # do diff against start index of last frame of history and only
+    # fill in an amount of datums from tsdb allows for most recent
+    # to be loaded into mem *before* tsdb data.
+    if (
+        last_tsdb_dt
+        and latest_start_dt
+    ):
+        backfilled_size_s = (
+            latest_start_dt - last_tsdb_dt
+        ).seconds
+        # if the shm buffer len is not large enough to contain
+        # all missing data between the most recent backend-queried frame
+        # and the most recent dt-index in the db we warn that we only
+        # want to load a portion of the next tsdb query to fill that
+        # space.
+        log.info(
+            f'{backfilled_size_s} seconds worth of {timeframe}s loaded'
+        )
+
+    # Load TSDB history into shm buffer (for display) if there is
+    # remaining buffer space.
+
+    time_key: str = 'time'
+    if getattr(storemod, 'ohlc_key_map', False):
+        keymap: bidict = storemod.ohlc_key_map
+        time_key: str = keymap.inverse['time']
+
+    # if (
+    #     not len(tsdb_history)
+    # ):
+    #     return
+
+    tsdb_last_frame_start: datetime = last_tsdb_dt
+    # load as much from storage into shm possible (depends on
+    # user's shm size settings).
+    while shm._first.value > 0:
+
+        tsdb_history = await storage.read_ohlcv(
+            fqme,
+            timeframe=timeframe,
+            end=tsdb_last_frame_start,
+        )
+
+        # # empty query
+        # if not len(tsdb_history):
+        #     break
+
+        next_start = tsdb_history[time_key][0]
+        if next_start >= tsdb_last_frame_start:
+            # no earlier data detected
+            break
+
+        else:
+            tsdb_last_frame_start = next_start
+
+        tsdb_last_frame_start: datetime = push_tsdb_history_to_shm(
+            storemod,
+            shm,
+            tsdb_history,
+            time_key,
+        )
+
+        # manually trigger step update to update charts/fsps
+        # which need an incremental update.
+        # NOTE: the way this works is super duper
+        # un-intuitive right now:
+        # - the broadcaster fires a msg to the fsp subsystem.
+        # - fsp subsys then checks for a sample step diff and
+        #   possibly recomputes prepended history.
+        # - the fsp then sends back to the parent actor
+        #   (usually a chart showing graphics for said fsp)
+        #   which tells the chart to conduct a manual full
+        #   graphics loop cycle.
+        # await sampler_stream.send('broadcast_all')
+
+        # TODO: write new data to tsdb to be ready to for next read.
 
 
 async def tsdb_backfill(
@@ -392,7 +529,7 @@ async def tsdb_backfill(
     storage: StorageClient,
     mkt: MktPair,
     shms: dict[int, ShmArray],
-    sampler_stream: tractor.MsgStream,
+    # sampler_stream: tractor.MsgStream,
     feed_is_live: trio.Event,
 
     task_status: TaskStatus[
@@ -406,16 +543,42 @@ async def tsdb_backfill(
     dts_per_tf: dict[int, datetime] = {}
     fqme: str = mkt.fqme
 
+    time_key: str = 'time'
+    if getattr(storemod, 'ohlc_key_map', False):
+        keymap: bidict = storemod.ohlc_key_map
+        time_key: str = keymap.inverse['time']
+
     # start history anal and load missing new data via backend.
-    timeframe: int
+    last_tsdb_dt: datetime | None = None
+    timeframe: int  # OHLC sample period
     for timeframe, shm in shms.items():
+
         # loads a (large) frame of data from the tsdb depending
-        # on the db's query size limit.
-        tsdb_history, first_tsdb_dt, last_tsdb_dt = await storage.load(
+        # on the db's query size limit; our "nativedb" (using
+        # parquet) generally can load the entire history into mem
+        # but if not then below the remaining history can be lazy
+        # loaded?
+        tsdb_entry: tuple | None =  await storage.load(
             fqme,
             timeframe=timeframe,
         )
 
+        if tsdb_entry:
+            (
+                tsdb_history,
+                first_tsdb_dt,
+                last_tsdb_dt,
+            ) = tsdb_entry
+
+            tsdb_last_frame_start: datetime = push_tsdb_history_to_shm(
+                storemod,
+                shm,
+                tsdb_history,
+                time_key,
+            )
+            assert tsdb_last_frame_start == first_tsdb_dt
+
+        # begin backfiller task ASAP
         try:
             (
                 latest_start_dt,
@@ -428,7 +591,7 @@ async def tsdb_backfill(
                     mkt,
                     shm,
                     timeframe,
-                    sampler_stream,
+                    # sampler_stream,
                     feed_is_live,
 
                     last_tsdb_dt=last_tsdb_dt,
@@ -436,25 +599,20 @@ async def tsdb_backfill(
                     storage=storage,
                 )
             )
+            if tsdb_entry:
+                dts_per_tf[timeframe] = (
+                    tsdb_history,
+                    last_tsdb_dt,
+                    latest_start_dt,
+                    latest_end_dt,
+                    bf_done,
+                )
         except DataUnavailable:
-            # XXX: timeframe not supported for backend
-            dts_per_tf[timeframe] = (
-                tsdb_history,
-                last_tsdb_dt,
-                None,
-                None,
-                None,
-            )
+            # XXX: timeframe not supported for backend (since
+            # above exception type), so skip and move on to next.
             continue
 
         # tsdb_history = series.get(timeframe)
-        dts_per_tf[timeframe] = (
-            tsdb_history,
-            last_tsdb_dt,
-            latest_start_dt,
-            latest_end_dt,
-            bf_done,
-        )
 
         # if len(hist_shm.array) < 2:
         # TODO: there's an edge case here to solve where if the last
@@ -470,143 +628,49 @@ async def tsdb_backfill(
     # assert len(shms[1].array)
     task_status.started()
 
-    async def back_load_from_tsdb(
-        timeframe: int,
-        shm: ShmArray,
-    ):
-        (
-            tsdb_history,
-            last_tsdb_dt,
-            latest_start_dt,
-            latest_end_dt,
-            bf_done,
-        ) = dts_per_tf[timeframe]
-
-        # sync to backend history task's query/load completion
-        if bf_done:
-            await bf_done.wait()
-
-        # TODO: eventually it'd be nice to not require a shm array/buffer
-        # to accomplish this.. maybe we can do some kind of tsdb direct to
-        # graphics format eventually in a child-actor?
-
-        # TODO: see if there's faster multi-field reads:
-        # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields
-        # re-index  with a `time` and index field
-        prepend_start = shm._first.value
-        array = shm.array
-        if len(array):
-            shm_last_dt = pendulum.from_timestamp(shm.array[0]['time'])
-        else:
-            shm_last_dt = None
-
-        if last_tsdb_dt:
-            assert shm_last_dt >= last_tsdb_dt
-
-        # do diff against start index of last frame of history and only
-        # fill in an amount of datums from tsdb allows for most recent
-        # to be loaded into mem *before* tsdb data.
-        if (
-            last_tsdb_dt
-            and latest_start_dt
-        ):
-            backfilled_size_s = (
-                latest_start_dt - last_tsdb_dt
-            ).seconds
-            # if the shm buffer len is not large enough to contain
-            # all missing data between the most recent backend-queried frame
-            # and the most recent dt-index in the db we warn that we only
-            # want to load a portion of the next tsdb query to fill that
-            # space.
-            log.info(
-                f'{backfilled_size_s} seconds worth of {timeframe}s loaded'
-            )
-
-        # Load TSDB history into shm buffer (for display) if there is
-        # remaining buffer space.
-
-        if (
-            len(tsdb_history)
-        ):
-            # load the first (smaller) bit of history originally loaded
-            # above from ``StorageClient.load()``.
-            to_push = tsdb_history[-prepend_start:]
-            shm.push(
-                to_push,
-
-                # insert the history pre a "days worth" of samples
-                # to leave some real-time buffer space at the end.
-                prepend=True,
-                # update_first=False,
-                # start=prepend_start,
-                field_map=storemod.ohlc_key_map,
-            )
-
-            tsdb_last_frame_start = tsdb_history['Epoch'][0]
+    # backload any further data from tsdb (concurrently per
+    # timeframe) if not all data was able to be loaded (in memory)
+    # from the ``StorageClient.load()`` call above.
+    async with trio.open_nursery() as nurse:
+        for timeframe, shm in shms.items():
 
-            if timeframe == 1:
-                times = shm.array['time']
-                assert (times[1] - times[0]) == 1
+            entry = dts_per_tf.get(timeframe)
+            if not entry:
+                continue
 
-            # load as much from storage into shm possible (depends on
-            # user's shm size settings).
-            while shm._first.value > 0:
+            (
+                tsdb_history,
+                last_tsdb_dt,
+                latest_start_dt,
+                latest_end_dt,
+                bf_done,
+            ) = entry
 
-                tsdb_history = await storage.read_ohlcv(
-                    fqme,
-                    timeframe=timeframe,
-                    end=tsdb_last_frame_start,
-                )
+            if not tsdb_history.size:
+                continue
 
-                # empty query
-                if not len(tsdb_history):
-                    break
+            nurse.start_soon(
+                back_load_from_tsdb,
 
-                next_start = tsdb_history['Epoch'][0]
-                if next_start >= tsdb_last_frame_start:
-                    # no earlier data detected
-                    break
-                else:
-                    tsdb_last_frame_start = next_start
+                storemod,
+                storage,
+                fqme,
 
-                prepend_start = shm._first.value
-                to_push = tsdb_history[-prepend_start:]
+                tsdb_history,
+                last_tsdb_dt,
+                latest_start_dt,
+                latest_end_dt,
+                bf_done,
 
-                # insert the history pre a "days worth" of samples
-                # to leave some real-time buffer space at the end.
-                shm.push(
-                    to_push,
-                    prepend=True,
-                    field_map=storemod.ohlc_key_map,
-                )
-                log.info(f'Loaded {to_push.shape} datums from storage')
-
-                # manually trigger step update to update charts/fsps
-                # which need an incremental update.
-                # NOTE: the way this works is super duper
-                # un-intuitive right now:
-                # - the broadcaster fires a msg to the fsp subsystem.
-                # - fsp subsys then checks for a sample step diff and
-                #   possibly recomputes prepended history.
-                # - the fsp then sends back to the parent actor
-                #   (usually a chart showing graphics for said fsp)
-                #   which tells the chart to conduct a manual full
-                #   graphics loop cycle.
-                await sampler_stream.send('broadcast_all')
-
-                # TODO: write new data to tsdb to be ready to for next read.
-
-    # backload from db (concurrently per timeframe) once backfilling of
-    # recent dat a loaded from the backend provider (see
-    # ``bf_done.wait()`` call).
-    async with trio.open_nursery() as nurse:
-        for timeframe, shm in shms.items():
-            nurse.start_soon(
-                back_load_from_tsdb,
                 timeframe,
                 shm,
             )
 
+    # try:
+    #     await trio.sleep_forever()
+    # finally:
+    #     write_ohlcv
+
 
 async def manage_history(
     mod: ModuleType,
@@ -624,8 +688,23 @@ async def manage_history(
     '''
     Load and manage historical data including the loading of any
     available series from any connected tsdb as well as conduct
-    real-time update of both that existing db and the allocated shared
-    memory buffer.
+    real-time update of both that existing db and the allocated
+    shared memory buffer.
+
+    Init sequence:
+    - allocate shm (numpy array) buffers for 60s & 1s sample rates
+    - configure "zero index" for each buffer: the index where
+      history will prepended *to* and new live data will be
+      appened *from*.
+    - open a ``.storage.StorageClient`` and load any existing tsdb
+      history as well as (async) start a backfill task which loads
+      missing (newer) history from the data provider backend:
+      - tsdb history is loaded first and pushed to shm ASAP.
+      - the backfill task loads the most recent history before
+        unblocking its parent task, so that the `ShmArray._last` is
+        up to date to allow the OHLC sampler to begin writing new
+        samples as the correct buffer index once the provider feed
+        engages.
 
     '''
     # TODO: is there a way to make each shm file key
@@ -684,88 +763,86 @@ async def manage_history(
             "Persistent shm for sym was already open?!"
         )
 
-    # register 1s and 1m buffers with the global incrementer task
-    async with open_sample_stream(
-        period_s=1.,
-        shms_by_period={
-            1.: rt_shm.token,
-            60.: hist_shm.token,
-        },
-
-        # NOTE: we want to only open a stream for doing broadcasts on
-        # backfill operations, not receive the sample index-stream
-        # (since there's no code in this data feed layer that needs to
-        # consume it).
-        open_index_stream=True,
-        sub_for_broadcasts=False,
-
-    ) as sample_stream:
+    open_history_client = getattr(
+        mod,
+        'open_history_client',
+        None,
+    )
+    assert open_history_client
+
+    # TODO: maybe it should be a subpkg of `.data`?
+    from piker import storage
+
+    async with storage.open_storage_client() as (storemod, client):
+        log.info(
+            f'Connecting to storage backend `{storemod.name}`:\n'
+            f'location: {client.address}\n'
+            f'db cardinality: {client.cardinality}\n'
+            # TODO: show backend config, eg:
+            # - network settings
+            # - storage size with compression
+            # - number of loaded time series?
+        )
 
-        open_history_client = getattr(
+        # NOTE: this call ONLY UNBLOCKS once the latest-most frame
+        # (i.e. history just before the live feed latest datum) of
+        # history has been loaded and written to the shm buffer:
+        # - the backfiller task can write in reverse chronological
+        #   to the shm and tsdb
+        # - the tsdb data can be loaded immediately and the
+        #   backfiller can do a single append from it's end datum and
+        #   then prepends backward to that from the current time
+        #   step.
+        await bus.nursery.start(
+            tsdb_backfill,
             mod,
-            'open_history_client',
-            None,
+            storemod,
+            bus,
+            client,
+            mkt,
+            {
+                1: rt_shm,
+                60: hist_shm,
+            },
+            # sample_stream,
+            feed_is_live,
         )
-        assert open_history_client
-        from .. import storage
-        try:
-            async with storage.open_storage_client() as (storemod, client):
-                log.info(f'Found existing `{storemod.name}`')
-                # TODO: drop returning the output that we pass in?
-                await bus.nursery.start(
-                    tsdb_backfill,
-                    mod,
-                    storemod,
-                    bus,
-                    client,
-                    mkt,
-                    {
-                        1: rt_shm,
-                        60: hist_shm,
-                    },
-                    sample_stream,
-                    feed_is_live,
-                )
 
-                # yield back after client connect with filled shm
-                task_status.started((
-                    hist_zero_index,
-                    hist_shm,
-                    rt_zero_index,
-                    rt_shm,
-                ))
-
-                # indicate to caller that feed can be delivered to
-                # remote requesting client since we've loaded history
-                # data that can be used.
-                some_data_ready.set()
-
-                # history retreival loop depending on user interaction
-                # and thus a small RPC-prot for remotely controllinlg
-                # what data is loaded for viewing.
-                await trio.sleep_forever()
-
-        except storage.StorageConnectionError:
-            log.exception(
-                "Can't connect to tsdb backend!?\n"
-                'Starting basic backfille to shm..'
-            )
-            await basic_backfill(
-                bus,
-                mod,
-                mkt,
-                {
-                    1: rt_shm,
-                    60: hist_shm,
-                },
-                sample_stream,
-                feed_is_live,
-            )
+        # indicate to caller that feed can be delivered to
+        # remote requesting client since we've loaded history
+        # data that can be used.
+        some_data_ready.set()
+
+        # wait for a live feed before starting the sampler.
+        await feed_is_live.wait()
+
+        # register 1s and 1m buffers with the global incrementer task
+        async with open_sample_stream(
+            period_s=1.,
+            shms_by_period={
+                1.: rt_shm.token,
+                60.: hist_shm.token,
+            },
+
+            # NOTE: we want to only open a stream for doing
+            # broadcasts on backfill operations, not receive the
+            # sample index-stream (since there's no code in this
+            # data feed layer that needs to consume it).
+            open_index_stream=True,
+            sub_for_broadcasts=False,
+
+        ) as sample_stream:
+            log.info(f'Connected to sampler stream: {sample_stream}')
+
+            # yield back after client connect with filled shm
             task_status.started((
                 hist_zero_index,
                 hist_shm,
                 rt_zero_index,
                 rt_shm,
             ))
-            some_data_ready.set()
+
+            # history retreival loop depending on user interaction
+            # and thus a small RPC-prot for remotely controllinlg
+            # what data is loaded for viewing.
             await trio.sleep_forever()

From c020ab76becef4816b1ab267c5254ea7fa6b586a Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Fri, 2 Jun 2023 13:20:28 -0400
Subject: [PATCH 19/85] Clean out marketstore specifics

- drop buncha cruft from `store ls` cmd and make it work for
  multi-backend fqme listing.
  - including adding an `.address` to the mkts client which shows the
    grpc socketaddr details.
- change defauls to new `'nativedb'.
- drop 'marketstore' from built-in backend list (for now)
---
 piker/storage/__init__.py             |  47 ++++++------
 piker/storage/cli.py                  | 101 ++++----------------------
 piker/storage/marketstore/__init__.py |  16 +++-
 3 files changed, 53 insertions(+), 111 deletions(-)

diff --git a/piker/storage/__init__.py b/piker/storage/__init__.py
index cca77c694..465d3e284 100644
--- a/piker/storage/__init__.py
+++ b/piker/storage/__init__.py
@@ -62,7 +62,8 @@
 
 
 __tsdbs__: list[str] = [
-    'marketstore',
+    'nativedb',
+    # 'marketstore',
 ]
 
 
@@ -157,13 +158,14 @@ def get_storagemod(name: str) -> ModuleType:
 
 @acm
 async def open_storage_client(
-    name: str = 'nativedb',
+    backend: str | None = None,
 
 ) -> tuple[ModuleType, StorageClient]:
     '''
     Load the ``StorageClient`` for named backend.
 
     '''
+    def_backend: str = 'nativedb'
     tsdb_host: str = 'localhost'
 
     # load root config and any tsdb user defined settings
@@ -171,44 +173,45 @@ async def open_storage_client(
 
     # TODO: maybe not under a "network" section.. since
     # no more chitty mkts..
-    net = conf.get('network')
-    if net:
-        tsdbconf = net.get('tsdb')
+    tsdbconf: dict = {}
+    service_section = conf.get('service')
+    if (
+        not backend
+        and service_section
+    ):
+        tsdbconf = service_section.get('tsdb')
 
         # lookup backend tsdb module by name and load any user service
         # settings for connecting to the tsdb service.
-        name: str = tsdbconf.pop('backend')
+        backend: str = tsdbconf.pop('backend')
         tsdb_host: str = tsdbconf['host']
 
-    if name is None:
-        raise RuntimeError('No tsdb backend has been set!?')
+    if backend is None:
+        backend: str = def_backend
 
     # import and load storagemod by name
-    mod: ModuleType = get_storagemod(name)
+    mod: ModuleType = get_storagemod(backend)
     get_client = mod.get_client
 
     log.info('Scanning for existing `{tsbd_backend}`')
-    tsdb_is_up: bool = await check_for_service(f'{name}d')
-    if (
-        tsdb_host == 'localhost'
-        or tsdb_is_up
-    ):
-        log.info(f'Connecting to local {name}@{tsdbconf}')
+    if backend != def_backend:
+        tsdb_is_up: bool = await check_for_service(f'{backend}d')
+        if (
+            tsdb_host == 'localhost'
+            or tsdb_is_up
+        ):
+            log.info(f'Connecting to local: {backend}@{tsdbconf}')
+        else:
+            log.info(f'Attempting to connect to remote: {backend}@{tsdbconf}')
     else:
-        log.info(f'Attempting to connect to remote {name}@{tsdbconf}')
+        log.info(f'Connecting to default storage: {backend}@{tsdbconf}')
 
-    # try:
     async with (
         get_client(**tsdbconf) as client,
     ):
         # slap on our wrapper api
         yield mod, client
 
-    # except Exception as err:
-    #     raise StorageConnectionError(
-    #         f'No connection to {name}'
-    #     ) from err
-
 
 # NOTE: pretty sure right now this is only being
 # called by a CLI entrypoint?
diff --git a/piker/storage/cli.py b/piker/storage/cli.py
index 11d2b4901..9ca170c86 100644
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@@ -59,7 +59,6 @@ def ls(
     if not backends:
         backends: list[str] = __tsdbs__
 
-    table = Table(title=f'Table keys for backends {backends}:')
     console = Console()
 
     async def query_all():
@@ -71,17 +70,21 @@ async def query_all():
                 enable_modules=['piker.service._ahab'],
             ),
         ):
-            for backend in backends:
-                async with open_storage_client(name=backend) as (
-                    mod,
-                    client,
-                ):
-                    table.add_column(f'{mod.name} fqmes')
-                    keys: list[str] = await client.list_keys()
-                    for key in keys:
-                        table.add_row(key)
-
-            console.print(table)
+            for i, backend in enumerate(backends):
+                table = Table()
+                try:
+                    async with open_storage_client(backend=backend) as (
+                        mod,
+                        client,
+                    ):
+                        table.add_column(f'{mod.name}@{client.address}')
+                        keys: list[str] = await client.list_keys()
+                        for key in keys:
+                            table.add_row(key)
+
+                    console.print(table)
+                except Exception:
+                    log.error(f'Unable to connect to storage engine: `{backend}`')
 
     trio.run(query_all)
 
@@ -194,80 +197,6 @@ async def main():
 
     trio.run(main)
 
-    # if client_type == 'sync':
-    #     import pymarketstore as pymkts
-    #     cli = pymkts.Client()
-
-
-    #     while end != 0:
-    #         param = pymkts.Params(
-    #             fqme,
-    #             '1Min',
-    #             'OHLCV',
-    #             limit=limit,
-    #             # limit_from_start=True,
-    #             end=end,
-    #         )
-    #         if end is not None:
-    #             breakpoint()
-    #         reply = cli.query(param)
-    #         ds: pymkts.results.DataSet = reply.first()
-    #         array: np.ndarray = ds.array
-
-    #         print(f'loaded {len(array)}-len array:\n{array}')
-
-    #         times = array['Epoch']
-    #         end: float = float(times[0])
-    #         dt = pendulum.from_timestamp(end)
-    #         # end: str = dt.isoformat('T')
-    #         breakpoint()
-    #         print(
-    #             f'trying to load next {limit} datums frame starting @ {dt}'
-    #         )
-    # else:
-    #     from anyio_marketstore import (  # noqa
-    #         open_marketstore_client,
-    #         MarketstoreClient,
-    #         Params,
-    #     )
-    #     async def main():
-
-    #         end: int | None = None
-
-    #         async with open_marketstore_client(
-    #             'localhost',
-    #             5995,
-    #         ) as client:
-
-    #             while end != 0:
-    #                 params = Params(
-    #                     symbols=fqme,
-    #                     # timeframe=tfstr,
-    #                     timeframe='1Min',
-    #                     attrgroup='OHLCV',
-    #                     end=end,
-    #                     # limit_from_start=True,
-
-    #                     # TODO: figure the max limit here given the
-    #                     # ``purepc`` msg size limit of purerpc: 33554432
-    #                     limit=limit,
-    #                 )
-
-    #                 if end is not None:
-    #                     breakpoint()
-    #                 result = await client.query(params)
-    #                 data_set = result.by_symbols()[fqme]
-    #                 array = data_set.array
-    #                 times = array['Epoch']
-    #                 end: float = float(times[0])
-    #                 dt = pendulum.from_timestamp(end)
-    #                 breakpoint()
-    #                 print(
-    #                     f'trying to load next {limit} datums frame starting @ {dt}'
-    #                 )
-
-    #     trio.run(main)
-
 
 @store.command()
 def clone(
diff --git a/piker/storage/marketstore/__init__.py b/piker/storage/marketstore/__init__.py
index 416ef7ebb..2f0a79703 100644
--- a/piker/storage/marketstore/__init__.py
+++ b/piker/storage/marketstore/__init__.py
@@ -75,15 +75,22 @@ class MktsStorageClient:
     def __init__(
         self,
         client: MarketstoreClient,
+        config: dict,
 
     ) -> None:
         # TODO: eventually this should be an api/interface type that
         # ensures we can support multiple tsdb backends.
         self.client = client
+        self._config = config
 
         # series' cache from tsdb reads
         self._arrays: dict[str, np.ndarray] = {}
 
+    @property
+    def address(self) -> str:
+        conf = self._config
+        return f'grpc://{conf["host"]}:{conf["port"]}'
+
     async def list_keys(self) -> list[str]:
         return await self.client.list_symbols()
 
@@ -359,8 +366,8 @@ async def write_ohlcv(
 
 @acm
 async def get_client(
-    grpc_port: int,  # required
-    host: str | None,
+    grpc_port: int = 5995,  # required
+    host: str = 'localhost',
 
 ) -> MarketstoreClient:
     '''
@@ -372,4 +379,7 @@ async def get_client(
         host or 'localhost',
         grpc_port,
     ) as client:
-        yield MktsStorageClient(client)
+        yield MktsStorageClient(
+            client,
+            config={'host': host, 'port': grpc_port},
+        )

From d2accdac9bfc1f25d3f067515b217c8bd1e9e452 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Fri, 2 Jun 2023 14:22:42 -0400
Subject: [PATCH 20/85] Drop remaining mkts nonsense from `store delete`

---
 piker/storage/cli.py      | 47 +++++----------------------------------
 piker/storage/nativedb.py |  7 +++++-
 2 files changed, 11 insertions(+), 43 deletions(-)

diff --git a/piker/storage/cli.py b/piker/storage/cli.py
index 9ca170c86..5d7119e2f 100644
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@@ -89,37 +89,6 @@ async def query_all():
     trio.run(query_all)
 
 
-async def del_ts_by_timeframe(
-    client: Storage,
-    fqme: str,
-    timeframe: int,
-
-) -> None:
-
-    path: Path = await client.delete_ts(fqme, timeframe)
-    log.info(f'Deleted {path}')
-
-    # TODO: encapsulate per backend errors..
-    # - MEGA LOL, apparently the symbols don't
-    # flush out until you refresh something or other
-    # (maybe the WALFILE)... #lelandorlulzone, classic
-    # alpaca(Rtm) design here ..
-    # well, if we ever can make this work we
-    # probably want to dogsplain the real reason
-    # for the delete errurz..llululu
-    # if fqme not in syms:
-    #     log.error(f'Pair {fqme} dne in DB')
-    # msgish = resp.ListFields()[0][1]
-    # if 'error' in str(msgish):
-    #     log.error(
-    #         f'Deletion error:\n'
-    #         f'backend: {client.name}\n'
-    #         f'fqme: {fqme}\n'
-    #         f'timeframe: {timeframe}s\n'
-    #         f'Error msg:\n\n{msgish}\n',
-    #     )
-
-
 @store.command()
 def delete(
     symbols: list[str],
@@ -128,13 +97,8 @@ def delete(
         default=None,
         help='Storage backend to update'
     ),
-
-    # delete: bool = typer.Option(False, '-d'),
-    # host: str = typer.Option(
-    #     'localhost',
-    #     '-h',
-    # ),
-    # port: int = typer.Option('5993', '-p'),
+    # TODO: expose this as flagged multi-option?
+    timeframes: list[int] = [1, 60],
 ):
     '''
     Delete a storage backend's time series for (table) keys provided as
@@ -149,15 +113,14 @@ async def main(symbols: list[str]):
                 'tsdb_storage',
                 enable_modules=['piker.service._ahab']
             ),
-            open_storage_client(name=backend) as (_, storage),
+            open_storage_client(backend) as (_, client),
             trio.open_nursery() as n,
         ):
             # spawn queries as tasks for max conc!
             for fqme in symbols:
-                for tf in [1, 60]:
+                for tf in timeframes:
                     n.start_soon(
-                        del_ts_by_timeframe,
-                        storage,
+                        client.delete_ts,
                         fqme,
                         tf,
                     )
diff --git a/piker/storage/nativedb.py b/piker/storage/nativedb.py
index 8b151111e..1a4c5e123 100644
--- a/piker/storage/nativedb.py
+++ b/piker/storage/nativedb.py
@@ -280,7 +280,12 @@ async def delete_ts(
             period=timeframe,
             datadir=self._datadir,
         )
-        path.unlink()
+        if path.is_file():
+            path.unlink()
+            log.warning(f'Deleting parquet entry:\n{path}')
+        else:
+            log.warning(f'No path exists:\n{path}')
+
         return path
 
 

From 106ebe94bf25ede716a3bae211ae560290241e96 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Fri, 2 Jun 2023 15:35:22 -0400
Subject: [PATCH 21/85] Drop marketstore and tina install from readme, add
 polars and apache!

---
 README.rst           | 135 ++++++++++---------------------------------
 docs/tina_readme.rst |  91 +++++++++++++++++++++++++++++
 2 files changed, 122 insertions(+), 104 deletions(-)
 create mode 100644 docs/tina_readme.rst

diff --git a/README.rst b/README.rst
index e0612e28e..031da9e77 100644
--- a/README.rst
+++ b/README.rst
@@ -13,13 +13,14 @@ computational trading targeted at `hardcore Linux users <comp_trader>`_ .
 we use as much bleeding edge tech as possible including (but not limited to):
 
 - latest python for glue_
-- trio_ for `structured concurrency`_
-- tractor_ for distributed, multi-core, real-time streaming
-- marketstore_ for historical and real-time tick data persistence and sharing
-- techtonicdb_ for L2 book storage
+- trio_ & tractor_ for our distributed, multi-core, real-time streaming
+  `structured concurrency`_ runtime B)
 - Qt_ for pristine high performance UIs
 - pyqtgraph_ for real-time charting
-- ``numpy`` and ``numba`` for `fast numerics`_
+- ``polars`` ``numpy`` and ``numba`` for `fast numerics`_
+- `apache arrow and parquet`_ for time series history management
+  persistence and sharing
+- (prototyped) techtonicdb_ for L2 book storage
 
 .. |travis| image:: https://img.shields.io/travis/pikers/piker/master.svg
     :target: https://travis-ci.org/pikers/piker
@@ -31,6 +32,7 @@ we use as much bleeding edge tech as possible including (but not limited to):
 .. _Qt: https://www.qt.io/
 .. _pyqtgraph: https://github.com/pyqtgraph/pyqtgraph
 .. _glue: https://numpy.org/doc/stable/user/c-info.python-as-glue.html#using-python-as-glue
+.. _apache arrow and parquet: https://arrow.apache.org/faq/
 .. _fast numerics: https://zerowithdot.com/python-numpy-and-pandas-performance/
 .. _comp_trader: https://jfaleiro.wordpress.com/2019/10/09/computational-trader/
 
@@ -58,23 +60,20 @@ building the best looking, most reliable, keyboard friendly trading
 platform is the dream; join the cause.
 
 
-install
-*******
-``piker`` is currently under heavy pre-alpha development and as such
-should be cloned from this repo and hacked on directly.
+sane install with `poetry`
+**************************
+TODO!
 
-for a development install::
 
-    git clone git@github.com:pikers/piker.git
-    cd piker
-    virtualenv env
-    source ./env/bin/activate
-    pip install -r requirements.txt -e .
+rigorous install on ``nixos`` using ``poetry2nix``
+**************************************************
+TODO!
 
-install for nixos
-*****************
-for users of `NixOS` we offer a development shell envoirment that can be
-loaded with::
+
+hacky install on nixos
+**********************
+`NixOS` is our core devs' distro of choice for which we offer
+a stringently defined development shell envoirment that can be loaded with::
 
     nix-shell develop.nix
 
@@ -85,91 +84,19 @@ run::
 
 once after loading the shell
 
-install for tinas
-*****************
-for windows peeps you can start by installing all the prerequisite software:
-
-- install git with all default settings - https://git-scm.com/download/win
-- install anaconda all default settings - https://www.anaconda.com/products/individual
-- install microsoft build tools (check the box for Desktop development for C++, you might be able to uncheck some optional downloads)  - https://visualstudio.microsoft.com/visual-cpp-build-tools/
-- install visual studio code default settings - https://code.visualstudio.com/download
-
-
-then, `crack a conda shell`_ and run the following commands::
-
-    mkdir code # create code directory
-    cd code # change directory to code
-    git clone https://github.com/pikers/piker.git # downloads piker installation package from github
-    cd piker # change directory to piker
-    
-    conda create -n pikonda # creates conda environment named pikonda
-    conda activate pikonda # activates pikonda
-    
-    conda install -c conda-forge python-levenshtein # in case it is not already installed
-    conda install pip # may already be installed
-    pip # will show if pip is installed
-    
-    pip install -e . -r requirements.txt # install piker in editable mode
-
-test Piker to see if it is working::
-
-    piker -b binance chart btcusdt.binance # formatting for loading a chart
-    piker -b kraken -b binance chart xbtusdt.kraken
-    piker -b kraken -b binance -b ib chart qqq.nasdaq.ib
-    piker -b ib chart tsla.nasdaq.ib
-
-potential error::
-    
-    FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\user\\AppData\\Roaming\\piker\\brokers.toml'
-    
-solution:
-
-- navigate to file directory above (may be different on your machine, location should be listed in the error code)
-- copy and paste file from 'C:\\Users\\user\\code\\data/brokers.toml' or create a blank file using notepad at the location above
-
-Visual Studio Code setup:
-
-- now that piker is installed we can set up vscode as the default terminal for running piker and editing the code
-- open Visual Studio Code
-- file --> Add Folder to Workspace --> C:\Users\user\code\piker (adds piker directory where all piker files are located)
-- file --> Save Workspace As --> save it wherever you want and call it whatever you want, this is going to be your default workspace for running and editing piker code
-- ctrl + shift + p --> start typing Python: Select Interpetter --> when the option comes up select it --> Select at the workspace level --> select the one that shows ('pikonda')
-- change the default terminal to cmd.exe instead of powershell (default)
-- now when you create a new terminal VScode should automatically activate you conda env so that piker can be run as the first command after a new terminal is created
-
-also, try out fancyzones as part of powertoyz for a decent tiling windows manager to manage all the cool new software you are going to be running.
-
-.. _conda installed: https://
-.. _C++ build toolz: https://
-.. _crack a conda shell: https://
-.. _vscode: https://
-
-.. link to the tina guide
-.. _setup a coolio tiled wm console: https://
-
-provider support
-****************
-for live data feeds the in-progress set of supported brokers is:
-
-- IB_ via ``ib_insync``, also see our `container docs`_
-- binance_ and kraken_ for crypto over their public websocket API
-- questrade_ (ish) which comes with effectively free L1
-
-coming soon...
-
-- webull_ via the reverse engineered public API
-- yahoo via yliveticker_
-
-if you want your broker supported and they have an API let us know.
-
-.. _IB: https://interactivebrokers.github.io/tws-api/index.html
-.. _container docs: https://github.com/pikers/piker/tree/master/dockering/ib
-.. _questrade: https://www.questrade.com/api/documentation
-.. _kraken: https://www.kraken.com/features/api#public-market-data
-.. _binance: https://github.com/pikers/piker/pull/182
-.. _webull: https://github.com/tedchou12/webull
-.. _yliveticker: https://github.com/yahoofinancelive/yliveticker
-.. _coinbase: https://docs.pro.coinbase.com/#websocket-feed
+
+install wild-west style via `pip`
+*********************************
+``piker`` is currently under heavy pre-alpha development and as such
+should be cloned from this repo and hacked on directly.
+
+for a development install::
+
+    git clone git@github.com:pikers/piker.git
+    cd piker
+    virtualenv env
+    source ./env/bin/activate
+    pip install -r requirements.txt -e .
 
 
 check out our charts
diff --git a/docs/tina_readme.rst b/docs/tina_readme.rst
new file mode 100644
index 000000000..b9e60dcee
--- /dev/null
+++ b/docs/tina_readme.rst
@@ -0,0 +1,91 @@
+### NOTE this is likely out of date given it was written some
+(years) time ago by a user that has since not really partaken in
+contributing since.
+
+install for tinas
+*****************
+for windows peeps you can start by installing all the prerequisite software:
+
+- install git with all default settings - https://git-scm.com/download/win
+- install anaconda all default settings - https://www.anaconda.com/products/individual
+- install microsoft build tools (check the box for Desktop development for C++, you might be able to uncheck some optional downloads)  - https://visualstudio.microsoft.com/visual-cpp-build-tools/
+- install visual studio code default settings - https://code.visualstudio.com/download
+
+
+then, `crack a conda shell`_ and run the following commands::
+
+    mkdir code # create code directory
+    cd code # change directory to code
+    git clone https://github.com/pikers/piker.git # downloads piker installation package from github
+    cd piker # change directory to piker
+    
+    conda create -n pikonda # creates conda environment named pikonda
+    conda activate pikonda # activates pikonda
+    
+    conda install -c conda-forge python-levenshtein # in case it is not already installed
+    conda install pip # may already be installed
+    pip # will show if pip is installed
+    
+    pip install -e . -r requirements.txt # install piker in editable mode
+
+test Piker to see if it is working::
+
+    piker -b binance chart btcusdt.binance # formatting for loading a chart
+    piker -b kraken -b binance chart xbtusdt.kraken
+    piker -b kraken -b binance -b ib chart qqq.nasdaq.ib
+    piker -b ib chart tsla.nasdaq.ib
+
+potential error::
+    
+    FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\user\\AppData\\Roaming\\piker\\brokers.toml'
+    
+solution:
+
+- navigate to file directory above (may be different on your machine, location should be listed in the error code)
+- copy and paste file from 'C:\\Users\\user\\code\\data/brokers.toml' or create a blank file using notepad at the location above
+
+Visual Studio Code setup:
+
+- now that piker is installed we can set up vscode as the default terminal for running piker and editing the code
+- open Visual Studio Code
+- file --> Add Folder to Workspace --> C:\Users\user\code\piker (adds piker directory where all piker files are located)
+- file --> Save Workspace As --> save it wherever you want and call it whatever you want, this is going to be your default workspace for running and editing piker code
+- ctrl + shift + p --> start typing Python: Select Interpetter --> when the option comes up select it --> Select at the workspace level --> select the one that shows ('pikonda')
+- change the default terminal to cmd.exe instead of powershell (default)
+- now when you create a new terminal VScode should automatically activate you conda env so that piker can be run as the first command after a new terminal is created
+
+also, try out fancyzones as part of powertoyz for a decent tiling windows manager to manage all the cool new software you are going to be running.
+
+.. _conda installed: https://
+.. _C++ build toolz: https://
+.. _crack a conda shell: https://
+.. _vscode: https://
+
+.. link to the tina guide
+.. _setup a coolio tiled wm console: https://
+
+provider support
+****************
+for live data feeds the in-progress set of supported brokers is:
+
+- IB_ via ``ib_insync``, also see our `container docs`_
+- binance_ and kraken_ for crypto over their public websocket API
+- questrade_ (ish) which comes with effectively free L1
+
+coming soon...
+
+- webull_ via the reverse engineered public API
+- yahoo via yliveticker_
+
+if you want your broker supported and they have an API let us know.
+
+.. _IB: https://interactivebrokers.github.io/tws-api/index.html
+.. _container docs: https://github.com/pikers/piker/tree/master/dockering/ib
+.. _questrade: https://www.questrade.com/api/documentation
+.. _kraken: https://www.kraken.com/features/api#public-market-data
+.. _binance: https://github.com/pikers/piker/pull/182
+.. _webull: https://github.com/tedchou12/webull
+.. _yliveticker: https://github.com/yahoofinancelive/yliveticker
+.. _coinbase: https://docs.pro.coinbase.com/#websocket-feed
+
+

From d027ad5a4f1b26b94dff8a443b0ad72ffca7568e Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Fri, 2 Jun 2023 16:59:37 -0400
Subject: [PATCH 22/85] Whenever there is overlays, set a title on main chart
 price-y axis!

---
 piker/ui/_axes.py    | 18 +++++++++++-------
 piker/ui/_chart.py   | 23 ++++++++++++++++++++---
 piker/ui/_display.py |  6 +++---
 3 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/piker/ui/_axes.py b/piker/ui/_axes.py
index 470df3f5e..7ed3d5cbb 100644
--- a/piker/ui/_axes.py
+++ b/piker/ui/_axes.py
@@ -215,13 +215,17 @@ def set_title(
         # self.setLabel(title)
         # self.showLabel()
 
-        label = self.title = Label(
-            view=view or self.linkedView(),
-            fmt_str=title,
-            color=color or self.text_color,
-            parent=self,
-            # update_on_range_change=False,
-        )
+        label: Label | None = self.title
+        if label is None:
+            label = self.title = Label(
+                view=view or self.linkedView(),
+                fmt_str=title,
+                color=color or self.text_color,
+                parent=self,
+                # update_on_range_change=False,
+            )
+        else:
+            label.fmt_str: str = title
 
         def below_axis() -> QPointF:
             return QPointF(
diff --git a/piker/ui/_chart.py b/piker/ui/_chart.py
index 21ef0bcb7..1b410164b 100644
--- a/piker/ui/_chart.py
+++ b/piker/ui/_chart.py
@@ -1121,8 +1121,23 @@ def overlay_plotitem(
         # add axis title
         # TODO: do we want this API to still work?
         # raxis = pi.getAxis('right')
-        axis = self.pi_overlay.get_axis(pi, axis_side)
-        axis.set_title(axis_title or name, view=pi.getViewBox())
+        overlay: PlotItemOverlay = self.pi_overlay
+
+        # Whenever overlays exist always add a y-axis label to the
+        # main axis as well!
+        for name, axis_info in self.plotItem.axes.items():
+            axis = axis_info['item']
+            if isinstance(axis, PriceAxis):
+                axis.set_title(self.linked.mkt.pair())
+
+        axis: PriceAxis = overlay.get_axis(
+            pi,
+            axis_side,
+        )
+        axis.set_title(
+            axis_title or name,
+            view=pi.getViewBox(),
+        )
 
         return pi
 
@@ -1213,11 +1228,13 @@ def draw_curve(
 
         if add_sticky:
 
-            if pi is not self.plotItem:
+            main_pi: pgo.PlotItem = self.plotItem
+            if pi is not main_pi:
                 # overlay = self.pi_overlay
                 # assert pi in overlay.overlays
                 overlay = self.pi_overlay
                 assert pi in overlay.overlays
+                assert main_pi is overlay.root_plotitem
                 axis = overlay.get_axis(
                     pi,
                     add_sticky,
diff --git a/piker/ui/_display.py b/piker/ui/_display.py
index c747eb311..7b1728153 100644
--- a/piker/ui/_display.py
+++ b/piker/ui/_display.py
@@ -1289,7 +1289,7 @@ async def display_symbol_data(
         hist_ohlcv: ShmArray = flume.hist_shm
 
         mkt: MktPair = flume.mkt
-        fqme = mkt.fqme
+        fqme: str = mkt.fqme
 
         hist_chart = hist_linked.plot_ohlc_main(
             mkt,
@@ -1386,7 +1386,7 @@ async def display_symbol_data(
 
                 hist_pi = hist_chart.overlay_plotitem(
                     name=fqme,
-                    axis_title=fqme,
+                    axis_title=flume.mkt.pair(),
                 )
 
                 hist_viz = hist_chart.draw_curve(
@@ -1416,7 +1416,7 @@ async def display_symbol_data(
 
                 rt_pi = rt_chart.overlay_plotitem(
                     name=fqme,
-                    axis_title=fqme,
+                    axis_title=flume.mkt.pair(),
                 )
 
                 rt_viz = rt_chart.draw_curve(

From 9fd412f6313e1e2b2fb65317e209aeea7f0aeee2 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 6 Jun 2023 13:00:25 -0400
Subject: [PATCH 23/85] Add basic time-sampling gap detection via `polars`

For OHLCV time series we normally presume a uniform sampling period
(1s or 60s by default) and it's handy to have tools to ensure a series
is gapless or contains expected gaps based on (legacy) market hours.

For this we leverage `polars`:
- add `.nativedb.with_dts()` a datetime-from-epoch-time-column frame
  "column-expander" which inserts datetime-casted, epoch-diff and
  dt-diff columns.
- add `.nativedb.detect_time_gaps()` which filters to any larger then
  expected sampling period rows.
- wrap the above (for now) in a `piker store anal` (analysis) cmd which
  atm always enters a breakpoint for tinkering.

Supporting storage client additions:
- add a `detect_period()` helper for extracting expected OHLC time step.
- add new `NativedbStorageClient` methods and attrs to provide for the above:
    - `.mk_path()` to **only** deliver a parquet-file path for use in
      other methods.
    - `._dfs` to house cached `pl.DataFrame`s loaded from `.parquet` files.
    - `.as_df()` which loads cached frames or loads them from disk and
      then caches (for next use).
    - `_write_ohlcv()` a private-sync version of the public equivalent
      meth since we don't currently have any actual async file IO
      underneath; add a flag for whether to return as a `numpy.ndarray`.
---
 piker/storage/cli.py      |  38 ++++-----
 piker/storage/nativedb.py | 161 +++++++++++++++++++++++++++++++++-----
 2 files changed, 162 insertions(+), 37 deletions(-)

diff --git a/piker/storage/cli.py b/piker/storage/cli.py
index 5d7119e2f..ae7393e43 100644
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@@ -20,10 +20,11 @@
 """
 from __future__ import annotations
 from pathlib import Path
-from typing import TYPE_CHECKING
+# from typing import TYPE_CHECKING
 
+import polars as pl
 import numpy as np
-import pendulum
+# import pendulum
 from rich.console import Console
 import trio
 # from rich.markdown import Markdown
@@ -34,9 +35,10 @@
 from . import (
     log,
 )
-
-if TYPE_CHECKING:
-    from . import Storage
+from . import (
+    __tsdbs__,
+    open_storage_client,
+)
 
 
 store = typer.Typer()
@@ -49,11 +51,6 @@ def ls(
         help='Storage backends to query, default is all.'
     ),
 ):
-    # from piker.service import open_piker_runtime
-    from . import (
-        __tsdbs__,
-        open_storage_client,
-    )
     from rich.table import Table
 
     if not backends:
@@ -129,21 +126,18 @@ async def main(symbols: list[str]):
 
 
 @store.command()
-def read(
+def anal(
     fqme: str,
-
-    limit: int = int(800e3),
-    # client_type: str = 'async',
+    period: int = 60,
 
 ) -> np.ndarray:
 
-    # end: int | None = None
     # import tractor
-    from .nativedb import get_client
 
     async def main():
-        async with get_client() as client:
+        async with open_storage_client() as (mod, client):
             syms: list[str] = await client.list_keys()
+            print(f'{len(syms)} FOUND for {mod.name}')
 
             (
                 history,
@@ -151,10 +145,16 @@ async def main():
                 last_dt,
             ) = await client.load(
                 fqme,
-                60,
+                period,
             )
             assert first_dt < last_dt
-            print(f'{fqme} SIZE -> {history.size}')
+
+            src_df = await client.as_df(fqme, period)
+            df = mod.with_dts(src_df)
+            gaps: pl.DataFrame = mod.detect_time_gaps(df)
+            if gaps.is_empty():
+                breakpoint()
+
             breakpoint()
             # await tractor.breakpoint()
 
diff --git a/piker/storage/nativedb.py b/piker/storage/nativedb.py
index 1a4c5e123..9e4e848d6 100644
--- a/piker/storage/nativedb.py
+++ b/piker/storage/nativedb.py
@@ -65,7 +65,7 @@
 
 from piker import config
 from piker.data import def_iohlcv_fields
-# from piker.data import ShmArray
+from piker.data import ShmArray
 from piker.log import get_logger
 # from .._profile import Profiler
 
@@ -86,6 +86,7 @@ def np2pl(array: np.ndarray) -> pl.DataFrame:
 def pl2np(
     df: pl.DataFrame,
     dtype: np.dtype,
+
 ) -> np.ndarray:
 
     # Create numpy struct array of the correct size and dtype
@@ -103,18 +104,31 @@ def pl2np(
     return array
 
 
+def detect_period(shm: ShmArray) -> float:
+    '''
+    Attempt to detect the series time step sampling period
+    in seconds.
+
+    '''
+    # TODO: detect sample rate helper?
+    # calc ohlc sample period for naming
+    ohlcv: np.ndarray = shm.array
+    times: np.ndarray = ohlcv['time']
+    period: float = times[-1] - times[-2]
+    if period == 0:
+        # maybe just last sample is borked?
+        period: float = times[-2] - times[-3]
+
+    return period
+
+
 def mk_ohlcv_shm_keyed_filepath(
     fqme: str,
     period: float,  # ow known as the "timeframe"
-    # shm: ShmArray,
     datadir: Path,
 
 ) -> str:
 
-    # calc ohlc sample period for naming
-    # ohlcv: np.ndarray = shm.array
-    # times: np.ndarray = ohlcv['time']
-    # period: float = times[-1] - times[-2]
     if period < 1.:
         raise ValueError('Sample period should be >= 1.!?')
 
@@ -146,7 +160,7 @@ def __init__(
         self._index: dict[str, dict] = {}
 
         # series' cache from tsdb reads
-        self._dfs: dict[str, pl.DataFrame] = {}
+        self._dfs: dict[str, dict[str, pl.DataFrame]] = {}
 
     @property
     def address(self) -> str:
@@ -217,6 +231,17 @@ async def load(
             from_timestamp(times[-1]),
         )
 
+    def mk_path(
+        self,
+        fqme: str,
+        period: float,
+    ) -> Path:
+        return mk_ohlcv_shm_keyed_filepath(
+            fqme=fqme,
+            period=period,
+            datadir=self._datadir,
+        )
+
     async def read_ohlcv(
         self,
         fqme: str,
@@ -225,36 +250,51 @@ async def read_ohlcv(
         # limit: int = int(200e3),
 
     ) -> np.ndarray:
-        path: Path = mk_ohlcv_shm_keyed_filepath(
-            fqme=fqme,
-            period=timeframe,
-            datadir=self._datadir,
-        )
+        path: Path = self.mk_path(fqme, period=int(timeframe))
         df: pl.DataFrame = pl.read_parquet(path)
+        self._dfs.setdefault(timeframe, {})[fqme] = df
 
         # TODO: filter by end and limit inputs
         # times: pl.Series = df['time']
-
-        return pl2np(
+        array: np.ndarray = pl2np(
             df,
             dtype=np.dtype(def_iohlcv_fields),
         )
+        return array
 
-    async def write_ohlcv(
+    async def as_df(
         self,
         fqme: str,
-        ohlcv: np.ndarray,
+        period: int = 60,
+
+    ) -> pl.DataFrame:
+        try:
+            return self._dfs[period][fqme]
+        except KeyError:
+            await self.read_ohlcv(fqme, period)
+            return self._dfs[period][fqme]
+
+    def _write_ohlcv(
+        self,
+        fqme: str,
+        ohlcv: np.ndarray | pl.DataFrame,
         timeframe: int,
-        # limit: int = int(800e3),
 
     ) -> Path:
+        '''
+        Sync version of the public interface meth, since we don't
+        currently actually need or support an async impl.
 
+        '''
         path: Path = mk_ohlcv_shm_keyed_filepath(
             fqme=fqme,
             period=timeframe,
             datadir=self._datadir,
         )
-        df: pl.DataFrame = np2pl(ohlcv)
+        if isinstance(ohlcv, np.ndarray):
+            df: pl.DataFrame = np2pl(ohlcv)
+        else:
+            df = ohlcv
 
         # TODO: use a proper profiler
         start = time.time()
@@ -269,6 +309,25 @@ async def write_ohlcv(
         )
         return path
 
+
+    async def write_ohlcv(
+        self,
+        fqme: str,
+        ohlcv: np.ndarray,
+        timeframe: int,
+
+    ) -> Path:
+        '''
+        Write input ohlcv time series for fqme and sampling period
+        to (local) disk.
+
+        '''
+        return self._write_ohlcv(
+            fqme,
+            ohlcv,
+            timeframe,
+        )
+
     async def delete_ts(
         self,
         key: str,
@@ -312,3 +371,69 @@ async def get_client(
     client = NativeStorageClient(datadir)
     client.index_files()
     yield client
+
+
+def with_dts(
+    df: pl.DataFrame,
+    time_col: str = 'time',
+) -> pl.DataFrame:
+    '''
+    Insert datetime (casted) columns to a (presumably) OHLC sampled
+    time series with an epoch-time column keyed by ``time_col``.
+
+    '''
+    return df.with_columns([
+        pl.col(time_col).shift(1).suffix('_prev'),
+        pl.col(time_col).diff().alias('s_diff'),
+        pl.from_epoch(pl.col(time_col)).alias('dt'),
+    ]).with_columns([
+        pl.from_epoch(pl.col(f'{time_col}_prev')).alias('dt_prev'),
+    ]).with_columns(
+        (pl.col('dt') - pl.col('dt_prev')).alias('dt_diff'),
+    )
+
+
+def detect_time_gaps(
+    df: pl.DataFrame,
+    expect_period: float = 60,
+    time_col: str = 'time',
+
+) -> pl.DataFrame:
+    '''
+    Filter to OHLC datums which contain sample step gaps.
+
+    For eg. legacy markets which have venue close gaps and/or
+    actual missing data segments.
+
+    '''
+    return with_dts(df).filter(pl.col('s_diff') > expect_period)
+
+
+def detect_price_gaps(
+    df: pl.DataFrame,
+    gt_multiplier: float = 2.,
+    price_fields: list[str] = ['high', 'low'],
+
+) -> pl.DataFrame:
+    '''
+    Detect gaps in clearing price over an OHLC series.
+
+    2 types of gaps generally exist; up gaps and down gaps:
+
+    - UP gap: when any next sample's lo price is strictly greater
+      then the current sample's hi price.
+
+    - DOWN gap: when any next sample's hi price is strictly
+      less then the current samples lo price.
+
+    '''
+    # return df.filter(
+    #     pl.col('high') - ) > expect_period,
+    # ).select([
+    #     pl.dt.datetime(pl.col(time_col).shift(1)).suffix('_previous'),
+    #     pl.all(),
+    # ]).select([
+    #     pl.all(),
+    #     (pl.col(time_col) - pl.col(f'{time_col}_previous')).alias('diff'),
+    # ])
+    ...

From 1e683a4b910ea1d31f062088e25d10e5a861c272 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 6 Jun 2023 13:13:26 -0400
Subject: [PATCH 24/85] Another guard around sampling subscriber popped race..

---
 piker/data/_sampling.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/piker/data/_sampling.py b/piker/data/_sampling.py
index af8216c39..a8dba30bc 100644
--- a/piker/data/_sampling.py
+++ b/piker/data/_sampling.py
@@ -366,7 +366,12 @@ async def register_with_sampler(
                         sub_for_broadcasts
                         and subs
                     ):
-                        subs.remove(stream)
+                        try:
+                            subs.remove(stream)
+                        except KeyError:
+                            log.warning(
+                                f'{stream._ctx.chan.uid} sub already removed!?'
+                            )
             else:
                 # if no shms are passed in we just wait until cancelled
                 # by caller.

From 4f4860cfb0c823054140b612578b3986400dc9bb Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 6 Jun 2023 13:19:32 -0400
Subject: [PATCH 25/85] Update shm.push() type sig style

---
 piker/data/_sharedmem.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/piker/data/_sharedmem.py b/piker/data/_sharedmem.py
index 41dd00718..3366621b2 100644
--- a/piker/data/_sharedmem.py
+++ b/piker/data/_sharedmem.py
@@ -322,7 +322,7 @@ def push(
         field_map: Optional[dict[str, str]] = None,
         prepend: bool = False,
         update_first: bool = True,
-        start: Optional[int] = None,
+        start: int | None = None,
 
     ) -> int:
         '''
@@ -364,7 +364,11 @@ def push(
             # tries to access ``.array`` (which due to the index
             # overlap will be empty). Pretty sure we've fixed it now
             # but leaving this here as a reminder.
-            if prepend and update_first and length:
+            if (
+                prepend
+                and update_first
+                and length
+            ):
                 assert index < self._first.value
 
             if (

From 6dc3ed8d6a79af4ed9aadc85eb0cc2b8439037b5 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 6 Jun 2023 17:22:50 -0400
Subject: [PATCH 26/85] Expose a `force_reformat: bool` up through graphics
 stack

---
 piker/data/_formatters.py | 6 +++++-
 piker/ui/_dataviz.py      | 9 ++++++++-
 piker/ui/_render.py       | 7 ++++++-
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/piker/data/_formatters.py b/piker/data/_formatters.py
index ef892c7a6..4fbe3151c 100644
--- a/piker/data/_formatters.py
+++ b/piker/data/_formatters.py
@@ -222,6 +222,7 @@ def format_to_1d(
         profiler: Profiler,
 
         slice_to_inview: bool = True,
+        force_full_realloc: bool = False,
 
     ) -> tuple[
         np.ndarray,
@@ -248,7 +249,10 @@ def format_to_1d(
 
         # we first need to allocate xy data arrays
         # from the source data.
-        if self.y_nd is None:
+        if (
+            self.y_nd is None
+            or force_full_realloc
+        ):
             self.xy_nd_start = shm._first.value
             self.xy_nd_stop = shm._last.value
             self.x_nd, self.y_nd = self.allocate_xy_nd(
diff --git a/piker/ui/_dataviz.py b/piker/ui/_dataviz.py
index a24c7d5c3..57a477d12 100644
--- a/piker/ui/_dataviz.py
+++ b/piker/ui/_dataviz.py
@@ -752,6 +752,7 @@ def update_graphics(
 
         profiler: Profiler | None = None,
         do_append: bool = True,
+        force_redraw: bool = False,
 
         **kwargs,
 
@@ -796,7 +797,7 @@ def update_graphics(
                 graphics,
             )
 
-        should_redraw: bool = False
+        should_redraw: bool = force_redraw or False
         ds_allowed: bool = True  # guard for m4 activation
 
         # TODO: probably specialize ``Renderer`` types instead of
@@ -906,6 +907,11 @@ def update_graphics(
             should_ds=should_ds,
             showing_src_data=showing_src_data,
 
+            # XXX: reallocate entire underlying "format graphics array"
+            # whenever the caller insists, such as on history
+            # backfills.
+            force_reformat=force_redraw,
+
             do_append=do_append,
         )
 
@@ -925,6 +931,7 @@ def update_graphics(
         reset_cache = False
         if (
             reset_cache
+            or should_redraw
         ):
             # assign output paths to graphicis obj but
             # after a coords-cache reset.
diff --git a/piker/ui/_render.py b/piker/ui/_render.py
index fb41b696b..2a442e987 100644
--- a/piker/ui/_render.py
+++ b/piker/ui/_render.py
@@ -136,6 +136,7 @@ def render(
 
         do_append: bool = True,
         use_fpath: bool = True,
+        force_reformat: bool = False,
 
         # only render datums "in view" of the ``ChartView``
         use_vr: bool = True,
@@ -174,6 +175,7 @@ def render(
             profiler,
 
             slice_to_inview=use_vr,
+            force_full_realloc=force_reformat,
         )
 
         # no history in view case
@@ -222,7 +224,10 @@ def render(
             or should_redraw
         ):
             # print(f"{self.viz.name} -> REDRAWING BRUH")
-            if new_sample_rate and showing_src_data:
+            if (
+                new_sample_rate
+                and showing_src_data
+            ):
                 log.info(f'DE-downsampling -> {array_key}')
                 self._in_ds = False
 

From f1252983e41c135b3a232ab608b504de408fe9a2 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 6 Jun 2023 17:24:21 -0400
Subject: [PATCH 27/85] kucoin: support start and end dt based bars queries

---
 piker/brokers/kucoin.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/piker/brokers/kucoin.py b/piker/brokers/kucoin.py
index e3ef599e4..eebbc5f7c 100755
--- a/piker/brokers/kucoin.py
+++ b/piker/brokers/kucoin.py
@@ -460,12 +460,22 @@ async def _get_bars(
         '''
         # Generate generic end and start time if values not passed
         # Currently gives us 12hrs of data
-        if end_dt is None:
+        if (
+            end_dt is None
+            and start_dt is None
+        ):
             end_dt = pendulum.now('UTC').add(minutes=1)
+            start_dt = end_dt.start_of('minute').subtract(minutes=limit)
+
+        if (
+            start_dt
+            and end_dt is None
+        ):
+            # just set end to limit's worth in future
+            end_dt = start_dt.start_of('minute').add(minutes=limit)
 
-        if start_dt is None:
-            start_dt = end_dt.start_of(
-                'minute').subtract(minutes=limit)
+        else:
+            start_dt = end_dt.start_of('minute').subtract(minutes=limit)
 
         start_dt = int(start_dt.timestamp())
         end_dt = int(end_dt.timestamp())

From 7a5c43d01ae72b4a15f108153cf4cafc0efd6f2d Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 6 Jun 2023 17:29:03 -0400
Subject: [PATCH 28/85] Support injecting a `info: dict` to
 `Sampler.broadcast_all()` calls

---
 piker/data/_sampling.py | 39 ++++++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/piker/data/_sampling.py b/piker/data/_sampling.py
index a8dba30bc..98a7603ff 100644
--- a/piker/data/_sampling.py
+++ b/piker/data/_sampling.py
@@ -230,6 +230,7 @@ async def broadcast(
         self,
         period_s: float,
         time_stamp: float | None = None,
+        info: dict | None = None,
 
     ) -> None:
         '''
@@ -258,10 +259,14 @@ async def broadcast(
             try:
                 for stream in (subs - sent):
                     try:
-                        await stream.send({
+                        msg = {
                             'index': time_stamp or last_ts,
                             'period': period_s,
-                        })
+                        }
+                        if info:
+                            msg.update(info)
+
+                        await stream.send(msg)
                         sent.add(stream)
 
                     except (
@@ -287,9 +292,15 @@ async def broadcast(
                 )
 
     @classmethod
-    async def broadcast_all(self) -> None:
+    async def broadcast_all(
+        self,
+        info: dict | None = None,
+    ) -> None:
         for period_s in self.subscribers:
-            await self.broadcast(period_s)
+            await self.broadcast(
+                period_s,
+                info=info,
+            )
 
 
 @tractor.context
@@ -359,8 +370,10 @@ async def register_with_sampler(
 
                         # except broadcast requests from the subscriber
                         async for msg in stream:
-                            if msg == 'broadcast_all':
-                                await Sampler.broadcast_all()
+                            if 'broadcast_all' in msg:
+                                await Sampler.broadcast_all(
+                                    info=msg['broadcast_all'],
+                                )
                 finally:
                     if (
                         sub_for_broadcasts
@@ -468,6 +481,8 @@ async def open_sample_stream(
     cache_key: str | None = None,
     allow_new_sampler: bool = True,
 
+    ensure_is_active: bool = False,
+
 ) -> AsyncIterator[dict[str, float]]:
     '''
     Subscribe to OHLC sampling "step" events: when the time aggregation
@@ -510,12 +525,18 @@ async def open_sample_stream(
             },
         ) as (ctx, first)
     ):
-        assert len(first) > 1
+        if ensure_is_active:
+            assert len(first) > 1
+
         async with (
             ctx.open_stream() as istream,
 
-            # TODO: we don't need this task-bcasting right?
-            # istream.subscribe() as istream,
+            # TODO: we DO need this task-bcasting so that
+            # for eg. the history chart update loop eventually
+            # receceives all backfilling event msgs such that
+            # the underlying graphics format arrays are
+            # re-allocated until all history is loaded!
+            istream.subscribe() as istream,
         ):
             yield istream
 

From 0dcfcea6ee2ff8762f2b6abae322df982b25ce34 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 6 Jun 2023 23:59:59 -0400
Subject: [PATCH 29/85] Finally get partial backfills after tsdb load workinnn

It took a little while (and a lot of commenting out of old no longer
needed code) but, this gets tsdb (from parquet file) loading *before*
final backfilling from the most recent history frame until the most
recent tsdb time stamp!

More or less all the convoluted concurrency shit we had for coping with
`marketstore` IPC junk is no longer needed, particularly all the query
size limits and accompanying load loops.. The recent frame loading
technique/order *has* now changed though since we'd like to show charts
asap once tsdb history loads.

The new load sequence is as follows:
- load mr (most recent) frame from backend.
- load existing history (one shot) from the "tsdb" aka parquet files
  with `polars`.
- backfill the gap part from the mr frame back to the tsdb start
  incrementally by making (hacky) `ShmArray.push(start=<blah>)` calls
  and *not* updating the `._first.value` while doing it XD

Dirtier deatz:
- make `tsdb_backfill()` run per timeframe in a separate task.
  - drop all the loop through timeframes and insert `dts_per_tf` crap.
  - only spawn a subtask for the `start_backfill()` call which in turn
    only does the gap backfilling as mentioned above.
- mask out all the code related to being limited to certain query sizes
  (over gRPC) as was restricted by marketstore.. not gonna go through
  what all of that was since it's probably getting deleted in a follow
  up commit.
- buncha off-by-one tweaks to do with backfilling the gap from mr frame
  to tsdb start.. mostly tinkered it to get it all right but seems to be
  working correctly B)
- still use the `broadcast_all()` msg stuff when doing the gap backfill
  though don't have it really working yet on the UI side (since
  previously we were relying on the shm first/last values.. so this will
  be "coming soon" :)
---
 piker/data/history.py | 756 ++++++++++++++++++++++++++++--------------
 1 file changed, 506 insertions(+), 250 deletions(-)

diff --git a/piker/data/history.py b/piker/data/history.py
index aef3a15fe..51e19c5ae 100644
--- a/piker/data/history.py
+++ b/piker/data/history.py
@@ -24,7 +24,7 @@
 )
 from datetime import datetime
 from functools import partial
-import time
+# import time
 from types import ModuleType
 from typing import (
     Callable,
@@ -34,7 +34,10 @@
 import trio
 from trio_typing import TaskStatus
 import tractor
-import pendulum
+from pendulum import (
+    Duration,
+    from_timestamp,
+)
 import numpy as np
 
 from ..accounting import (
@@ -64,112 +67,170 @@
 
 def diff_history(
     array: np.ndarray,
-    timeframe: int,
-    start_dt: datetime,
-    end_dt: datetime,
-    last_tsdb_dt: datetime | None = None
+    # timeframe: int,
+    # start_dt: datetime,
+    # end_dt: datetime,
+
+    append_until_dt: datetime | None = None,
+    prepend_until_dt: datetime | None = None,
 
 ) -> np.ndarray:
 
     # no diffing with tsdb dt index possible..
-    if last_tsdb_dt is None:
+    if (
+        prepend_until_dt is None
+        and append_until_dt is None
+    ):
         return array
 
-    time = array['time']
-    return array[time > last_tsdb_dt.timestamp()]
+    times = array['time']
+
+    if append_until_dt:
+        return array[times < append_until_dt.timestamp()]
+    else:
+        return array[times >= prepend_until_dt.timestamp()]
+
+
+# async def open_history_mngr(
+#     mod: ModuleType,
+#     mkt: MktPair,
+#     # shm: ShmArray,
+#     # timeframes: list[float] = [60, 1],
+#     timeframes: float,
+
+# ) -> Callable[
+#     [int, datetime, datetime],
+#     tuple[np.ndarray, str]
+# ]:
+#     '''
+#     Open a "history manager" for the backend data provider,
+#     get the latest "frames worth" of ohlcv history,
+#     push the history to shm and deliver
+#     the start datum's datetime value so that further history loading
+#     can be done until synchronized with the tsdb loaded time series.
+
+#     '''
+#     hist: Callable[
+#         [int, datetime, datetime],
+#         tuple[np.ndarray, str]
+#     ]
+#     config: dict[str, int]
+
+#     async with mod.open_history_client(
+#         mkt,
+#     ) as (hist, config):
+#         log.info(f'{mod} history client returned backfill config: {config}')
+
+#         # get latest query's worth of history all the way
+#         # back to what is recorded in the tsdb
+#         array, mr_start_dt, mr_end_dt = await hist(
+#             timeframe,
+#             end_dt=None,
+#         )
+#         times: np.ndarray = array['time']
+
+#         # sample period step size in seconds
+#         step_size_s = (
+#             from_timestamp(times[-1])
+#             - from_timestamp(times[-2])
+#         ).seconds
+
+#         if step_size_s not in (1, 60):
+#             log.error(f'Last 2 sample period is off!? -> {step_size_s}')
+#             step_size_s = (
+#                 from_timestamp(times[-2])
+#                 - from_timestamp(times[-3])
+#             ).seconds
+
+#         # NOTE: on the first history, most recent history
+#         # frame we PREPEND from the current shm ._last index
+#         # and thus a gap between the earliest datum loaded here
+#         # and the latest loaded from the tsdb may exist!
+#         log.info(f'Pushing {to_push.size} to shm!')
+#         shm.push(
+#             to_push,
+#             prepend=True,
+#             # start=
+#         )
+
+
+#         # if the market is open (aka we have a live feed) but the
+#         # history sample step index seems off we report the surrounding
+#         # data and drop into a bp. this case shouldn't really ever
+#         # happen if we're doing history retrieval correctly.
+#         # if (
+#         #     step_size_s == 60
+#         #     and feed_is_live.is_set()
+#         # ):
+#         #     inow = round(time.time())
+#         #     diff = inow - times[-1]
+#         #     if abs(diff) > 60:
+#         #         surr = array[-6:]
+#         #         diff_in_mins = round(diff/60., ndigits=2)
+#         #         log.warning(
+#         #             f'STEP ERROR `{mkt.fqme}` for period {step_size_s}s:\n'
+#         #             f'Off by `{diff}` seconds (or `{diff_in_mins}` mins)\n'
+#         #             'Surrounding 6 time stamps:\n'
+#         #             f'{list(surr["time"])}\n'
+#         #             'Here is surrounding 6 samples:\n'
+#         #             f'{surr}\nn'
+#         #         )
+
+#                 # uncomment this for a hacker who wants to investigate
+#                 # this case manually..
+#                 # await tractor.breakpoint()
+
+#         # frame's worth of sample-period-steps, in seconds
+#         # frame_size_s = len(array) * step_size_s
+
+#         to_push = array
+#         # to_push = diff_history(
+#         #     array,
+#         #     # timeframe,
+#         #     # mr_start_dt,
+#         #     # mr_end_dt,
+
+#         #     # backfill scenario for "most recent" frame
+#         #     prepend_until_dt=last_tsdb_dt,
+#         # )
+
+#         # NOTE: on the first history, most recent history
+#         # frame we PREPEND from the current shm ._last index
+#         # and thus a gap between the earliest datum loaded here
+#         # and the latest loaded from the tsdb may exist!
+#         log.info(f'Pushing {to_push.size} to shm!')
+#         shm.push(
+#             to_push,
+#             prepend=True,
+#             # start=
+#         )
+#         # TODO: should we wrap this in a "history frame" type or
+#         # something?
+#         yield hist, mr_start_dt, mr_end_dt
 
 
 async def start_backfill(
+    get_hist,
     mod: ModuleType,
     mkt: MktPair,
     shm: ShmArray,
     timeframe: float,
-    # sampler_stream: tractor.MsgStream,
-    feed_is_live: trio.Event,
 
-    last_tsdb_dt: datetime | None = None,
-    storage: StorageClient | None = None,
-    write_tsdb: bool = True,
-    tsdb_is_up: bool = True,
+    backfill_from_shm_index: int,
+    backfill_from_dt: datetime,
 
-    task_status: TaskStatus[tuple] = trio.TASK_STATUS_IGNORED,
+    sampler_stream: tractor.MsgStream,
 
-) -> int:
 
-    hist: Callable[
-        [int, datetime, datetime],
-        tuple[np.ndarray, str]
-    ]
-    config: dict[str, int]
-
-    async with mod.open_history_client(
-        mkt,
-    ) as (hist, config):
-        log.info(f'{mod} history client returned backfill config: {config}')
-
-        # get latest query's worth of history all the way
-        # back to what is recorded in the tsdb
-        array, start_dt, end_dt = await hist(
-            timeframe,
-            end_dt=None,
-        )
-        times = array['time']
-
-        # sample period step size in seconds
-        step_size_s = (
-            pendulum.from_timestamp(times[-1])
-            - pendulum.from_timestamp(times[-2])
-        ).seconds
-
-        if step_size_s not in (1, 60):
-            log.error(f'Last 2 sample period is off!? -> {step_size_s}')
-            step_size_s = (
-                pendulum.from_timestamp(times[-2])
-                - pendulum.from_timestamp(times[-3])
-            ).seconds
-
-        # if the market is open (aka we have a live feed) but the
-        # history sample step index seems off we report the surrounding
-        # data and drop into a bp. this case shouldn't really ever
-        # happen if we're doing history retrieval correctly.
-        if (
-            step_size_s == 60
-            and feed_is_live.is_set()
-        ):
-            inow = round(time.time())
-            diff = inow - times[-1]
-            if abs(diff) > 60:
-                surr = array[-6:]
-                diff_in_mins = round(diff/60., ndigits=2)
-                log.warning(
-                    f'STEP ERROR `{mkt.fqme}` for period {step_size_s}s:\n'
-                    f'Off by `{diff}` seconds (or `{diff_in_mins}` mins)\n'
-                    'Surrounding 6 time stamps:\n'
-                    f'{list(surr["time"])}\n'
-                    'Here is surrounding 6 samples:\n'
-                    f'{surr}\nn'
-                )
-
-                # uncomment this for a hacker who wants to investigate
-                # this case manually..
-                # await tractor.breakpoint()
+    backfill_until_dt: datetime | None = None,
+    storage: StorageClient | None = None,
 
-        # frame's worth of sample-period-steps, in seconds
-        frame_size_s = len(array) * step_size_s
+    write_tsdb: bool = True,
+    # tsdb_is_up: bool = True,
 
-        to_push = diff_history(
-            array,
-            timeframe,
-            start_dt,
-            end_dt,
-            last_tsdb_dt=last_tsdb_dt,
-        )
+    task_status: TaskStatus[tuple] = trio.TASK_STATUS_IGNORED,
 
-        log.info(f'Pushing {to_push.size} to shm!')
-        shm.push(
-            to_push,
-            # prepend=True,
-        )
+) -> int:
 
         # TODO: *** THIS IS A BUG ***
         # we need to only broadcast to subscribers for this fqme..
@@ -180,43 +241,45 @@ async def start_backfill(
         bf_done = trio.Event()
 
         # let caller unblock and deliver latest history frame
-        task_status.started((
-            start_dt,
-            end_dt,
+        task_status.started( #(
+            # mr_start_dt,
+            # mr_end_dt,
             bf_done,
-        ))
+        )# )
 
         # based on the sample step size, maybe load a certain amount history
-        if last_tsdb_dt is None:
+        update_start_on_prepend: bool = False
+        if backfill_until_dt is None:
 
-            if step_size_s not in (1, 60):
+            if timeframe not in (1, 60):
                 raise ValueError(
                     '`piker` only needs to support 1m and 1s sampling '
                     'but ur api is trying to deliver a longer '
-                    f'timeframe of {step_size_s} seconds..\n'
+                    f'timeframe of {timeframe} seconds..\n'
                     'So yuh.. dun do dat brudder.'
                 )
 
             # when no tsdb "last datum" is provided, we just load
             # some near-term history.
+            # periods = {
+            #     1: {'days': 1},
+            #     60: {'days': 14},
+            # }
+
+            # if tsdb_is_up:
+            # do a decently sized backfill and load it into storage.
             periods = {
-                1: {'days': 1},
-                60: {'days': 14},
+                1: {'days': 6},
+                60: {'years': 6},
             }
+            period_duration: int = periods[timeframe]
 
-            if tsdb_is_up:
-                # do a decently sized backfill and load it into storage.
-                periods = {
-                    1: {'days': 6},
-                    60: {'years': 6},
-                }
-
-            period_duration = periods[step_size_s]
+            update_start_on_prepend = True
 
             # NOTE: manually set the "latest" datetime which we intend to
             # backfill history "until" so as to adhere to the history
             # settings above when the tsdb is detected as being empty.
-            last_tsdb_dt = start_dt.subtract(**period_duration)
+            backfill_until_dt = backfill_from_dt.subtract(**period_duration)
 
         # configure async query throttling
         # rate = config.get('rate', 1)
@@ -228,18 +291,39 @@ async def start_backfill(
         # per time stamp.
         starts: Counter[datetime] = Counter()
 
-        # inline sequential loop where we simply pass the
-        # last retrieved start dt to the next request as
-        # it's end dt.
-        while end_dt > last_tsdb_dt:
+        # conduct "backward history filling" since
+        # no tsdb history yet exists.
+
+        # implemented via a simple inline sequential loop where we
+        # simply pass the last retrieved start dt to the next
+        # request as it's end dt.
+        # while end_dt < backfill_until_dt:
+        # while (
+        #     end_dt is None  # init case
+        #     or end_dt < mr_start_dt
+        # ):
+
+        # conduct "forward filling" from the last time step
+        # loaded from the tsdb until the first step loaded
+        # just above
+        end_dt: datetime = backfill_from_dt
+        # start_dt: datetime = backfill_until_dt
+        next_prepend_index: int = backfill_from_shm_index
+
+        while end_dt > backfill_until_dt:
             log.debug(
-                f'Requesting {step_size_s}s frame ending in {start_dt}'
+                f'Requesting {timeframe}s frame ending in {end_dt}'
             )
 
             try:
-                array, next_start_dt, end_dt = await hist(
+                (
+                    array,
+                    next_start_dt,
+                    next_end_dt,
+                ) = await get_hist(
                     timeframe,
-                    end_dt=start_dt,
+                    end_dt=end_dt,
+                    # start_dt=start_dt,
                 )
 
             # broker says there never was or is no more history to pull
@@ -272,15 +356,17 @@ async def start_backfill(
                 return
 
             # only update new start point if not-yet-seen
-            start_dt = next_start_dt
+            start_dt: datetime = next_start_dt
             starts[start_dt] += 1
 
             assert array['time'][0] == start_dt.timestamp()
 
             diff = end_dt - start_dt
             frame_time_diff_s = diff.seconds
-            expected_frame_size_s = frame_size_s + step_size_s
 
+            # frame's worth of sample-period-steps, in seconds
+            frame_size_s = len(array) * timeframe
+            expected_frame_size_s = frame_size_s + timeframe
             if frame_time_diff_s > expected_frame_size_s:
 
                 # XXX: query result includes a start point prior to our
@@ -294,10 +380,10 @@ async def start_backfill(
 
             to_push = diff_history(
                 array,
-                timeframe,
-                start_dt,
-                end_dt,
-                last_tsdb_dt=last_tsdb_dt,
+                # timeframe,
+                # start_dt,
+                # end_dt,
+                prepend_until_dt=backfill_until_dt,
             )
             ln = len(to_push)
             if ln:
@@ -314,11 +400,52 @@ async def start_backfill(
                 shm.push(
                     to_push,
                     prepend=True,
+
+                    # XXX: only update the ._first index if no tsdb
+                    # segment was previously prepended by the
+                    # parent task.
+                    update_first=update_start_on_prepend,
+
+                    # XXX: only prepend from a manually calculated shm
+                    # index if there was already a tsdb history
+                    # segment prepended (since then the
+                    # ._first.value is going to be wayyy in the
+                    # past!)
+                    start=(
+                        next_prepend_index
+                        if not update_start_on_prepend
+                        else None
+                    ),
                 )
-            except ValueError:
+                await sampler_stream.send({
+                    'broadcast_all': {
+                        'backfilling': True
+                    },
+                })
+
+                # decrement next prepend point
+                next_prepend_index = next_prepend_index - ln
+                end_dt = next_start_dt
+
+                # XXX: extremely important, there can be no checkpoints
+                # in the block above to avoid entering new ``frames``
+                # values while we're pipelining the current ones to
+                # memory...
+                array = shm.array
+                zeros = array[array['low'] == 0]
+                if (
+                    0 < zeros.size < 10
+                ):
+                    await tractor.breakpoint()
+
+
+            except ValueError as ve:
+                _ve = ve
                 log.info(
                     f'Shm buffer overrun on: {start_dt} -> {end_dt}?'
                 )
+
+                await tractor.breakpoint()
                 # can't push the entire frame? so
                 # push only the amount that can fit..
                 break
@@ -328,10 +455,12 @@ async def start_backfill(
                 f'{start_dt} -> {end_dt}'
             )
 
+            # FINALLY, maybe write immediately to the tsdb backend for
+            # long-term storage.
             if (
                 storage is not None
                 and write_tsdb
-                # and False
+                and False
             ):
                 log.info(
                     f'Writing {ln} frame to storage:\n'
@@ -372,19 +501,22 @@ def push_tsdb_history_to_shm(
     shm: ShmArray,
     tsdb_history: np.ndarray,
     time_field_key: str,
+    prepend: bool = False,
+
 ) -> datetime:
 
     # TODO: see if there's faster multi-field reads:
     # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields
     # re-index  with a `time` and index field
     prepend_start = shm._first.value
+
     to_push = tsdb_history[-prepend_start:]
     shm.push(
         to_push,
 
         # insert the history pre a "days worth" of samples
         # to leave some real-time buffer space at the end.
-        prepend=True,
+        prepend=prepend,
         # update_first=False,
         # start=prepend_start,
         field_map=storemod.ohlc_key_map,
@@ -392,7 +524,7 @@ def push_tsdb_history_to_shm(
 
     log.info(f'Loaded {to_push.shape} datums from storage')
     tsdb_last_frame_start = tsdb_history[time_field_key][0]
-    return pendulum.from_timestamp(tsdb_last_frame_start)
+    return from_timestamp(tsdb_last_frame_start)
 
 
 async def back_load_from_tsdb(
@@ -435,7 +567,7 @@ async def back_load_from_tsdb(
     #     assert (times[1] - times[0]) == 1
 
     if len(array):
-        shm_last_dt = pendulum.from_timestamp(
+        shm_last_dt = from_timestamp(
             shm.array[0]['time']
         )
     else:
@@ -525,12 +657,16 @@ async def back_load_from_tsdb(
 async def tsdb_backfill(
     mod: ModuleType,
     storemod: ModuleType,
-    bus: _FeedsBus,
+    # bus: _FeedsBus,
+    tn: trio.Nursery,
     storage: StorageClient,
     mkt: MktPair,
-    shms: dict[int, ShmArray],
-    # sampler_stream: tractor.MsgStream,
-    feed_is_live: trio.Event,
+    # shms: dict[int, ShmArray],
+    shm: ShmArray,
+    timeframe: float,
+
+    sampler_stream: tractor.MsgStream,
+    # feed_is_live: trio.Event,
 
     task_status: TaskStatus[
         tuple[ShmArray, ShmArray]
@@ -540,18 +676,75 @@ async def tsdb_backfill(
 
     # TODO: this should be used verbatim for the pure
     # shm backfiller approach below.
-    dts_per_tf: dict[int, datetime] = {}
+    # dts_per_tf: dict[int, datetime] = {}
     fqme: str = mkt.fqme
 
-    time_key: str = 'time'
-    if getattr(storemod, 'ohlc_key_map', False):
-        keymap: bidict = storemod.ohlc_key_map
-        time_key: str = keymap.inverse['time']
+    # time_key: str = 'time'
+    # if getattr(storemod, 'ohlc_key_map', False):
+    #     keymap: bidict = storemod.ohlc_key_map
+        # time_key: str = keymap.inverse['time']
+
+    get_hist: Callable[
+        [int, datetime, datetime],
+        tuple[np.ndarray, str]
+    ]
+    config: dict[str, int]
+
+    async with mod.open_history_client(
+        mkt,
+    ) as (get_hist, config):
+        log.info(f'{mod} history client returned backfill config: {config}')
+
+        # get latest query's worth of history all the way
+        # back to what is recorded in the tsdb
+        try:
+            array, mr_start_dt, mr_end_dt = await get_hist(
+                timeframe,
+                end_dt=None,
+            )
 
-    # start history anal and load missing new data via backend.
-    last_tsdb_dt: datetime | None = None
-    timeframe: int  # OHLC sample period
-    for timeframe, shm in shms.items():
+        # XXX: timeframe not supported for backend (since
+        # above exception type), terminate immediately since
+        # there's no backfilling possible.
+        except DataUnavailable:
+            task_status.started()
+            return
+
+        times: np.ndarray = array['time']
+
+        # sample period step size in seconds
+        step_size_s = (
+            from_timestamp(times[-1])
+            - from_timestamp(times[-2])
+        ).seconds
+
+        if step_size_s not in (1, 60):
+            log.error(f'Last 2 sample period is off!? -> {step_size_s}')
+            step_size_s = (
+                from_timestamp(times[-2])
+                - from_timestamp(times[-3])
+            ).seconds
+
+        # NOTE: on the first history, most recent history
+        # frame we PREPEND from the current shm ._last index
+        # and thus a gap between the earliest datum loaded here
+        # and the latest loaded from the tsdb may exist!
+        log.info(f'Pushing {array.size} to shm!')
+        shm.push(
+            array,
+            prepend=True,  # append on first frame
+            # start=
+        )
+        backfill_gap_from_shm_index: int = shm._first.value + 1
+
+        # tell parent task to continue
+        task_status.started()
+
+        # start history anal and load missing new data via backend.
+        # backfill_until_dt: datetime | None = None
+        # started_after_tsdb_load: bool = False
+
+        # for timeframe, shm in shms.items():
 
         # loads a (large) frame of data from the tsdb depending
         # on the db's query size limit; our "nativedb" (using
@@ -563,6 +756,7 @@ async def tsdb_backfill(
             timeframe=timeframe,
         )
 
+        last_tsdb_dt: datetime | None = None
         if tsdb_entry:
             (
                 tsdb_history,
@@ -570,106 +764,160 @@ async def tsdb_backfill(
                 last_tsdb_dt,
             ) = tsdb_entry
 
-            tsdb_last_frame_start: datetime = push_tsdb_history_to_shm(
-                storemod,
+            # calc the index from which the tsdb data should be
+            # prepended, presuming there is a gap between the
+            # latest frame (loaded/read above) and the latest
+            # sample loaded from the tsdb.
+            backfill_diff: Duration =  mr_start_dt - last_tsdb_dt
+            offset_s: float = backfill_diff.in_seconds()
+            offset_samples: int = round(offset_s / timeframe)
+
+            # TODO: see if there's faster multi-field reads:
+            # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields
+            # re-index  with a `time` and index field
+            prepend_start = shm._first.value - offset_samples + 1
+
+            to_push = tsdb_history[-prepend_start:]
+            shm.push(
+                to_push,
+
+                # insert the history pre a "days worth" of samples
+                # to leave some real-time buffer space at the end.
+                prepend=True,
+                # update_first=False,
+                start=prepend_start,
+                field_map=storemod.ohlc_key_map,
+            )
+
+            log.info(f'Loaded {to_push.shape} datums from storage')
+
+            # tsdb_last_frame_start: datetime = push_tsdb_history_to_shm(
+            #     storemod,
+            #     shm,
+            #     tsdb_history,
+            #     time_key,
+            #     prepend=True,
+            # )
+            # assert tsdb_last_frame_start == first_tsdb_dt
+
+            # unblock the feed bus management task
+            # assert len(shms[1].array)
+            # if not started_after_tsdb_load:
+            #     task_status.started()
+            #     started_after_tsdb_load = True
+
+            # begin backfiller task ASAP
+            # try:
+
+        # if there is a gap to backfill from the first
+        # history frame until the last datum loaded from the tsdb
+        # continue that now in the background
+        # try:
+            # (
+                # latest_start_dt,
+                # latest_end_dt,
+        bf_done = await tn.start(
+            partial(
+                start_backfill,
+                get_hist,
+                mod,
+                mkt,
                 shm,
-                tsdb_history,
-                time_key,
+                timeframe,
+
+                backfill_from_shm_index=backfill_gap_from_shm_index,
+                backfill_from_dt=mr_start_dt,
+                backfill_until_dt=last_tsdb_dt,
+                sampler_stream=sampler_stream,
+
+                # feed_is_live,
+
+                storage=storage,
+                # tsdb_is_up=True,
             )
-            assert tsdb_last_frame_start == first_tsdb_dt
+        )
 
-        # begin backfiller task ASAP
+                # if tsdb_entry:
+                #     dts_per_tf[timeframe] = (
+                #         tsdb_history,
+                #         last_tsdb_dt,
+                #         latest_start_dt,
+                #         latest_end_dt,
+                #         bf_done,
+                #     )
+                # elif not started_after_tsdb_load:
+                #     task_status.started()
+                #     started_after_tsdb_load = True
+
+        # XXX: timeframe not supported for backend (since
+        # above exception type), terminate immediately since
+        # there's no backfilling possible.
+        # except DataUnavailable:
+        #     return
+            # continue
+
+            # tsdb_history = series.get(timeframe)
+
+            # if len(hist_shm.array) < 2:
+            # TODO: there's an edge case here to solve where if the last
+            # frame before market close (at least on ib) was pushed and
+            # there was only "1 new" row pushed from the first backfill
+            # query-iteration, then the sample step sizing calcs will
+            # break upstream from here since you can't diff on at least
+            # 2 steps... probably should also add logic to compute from
+            # the tsdb series and stash that somewhere as meta data on
+            # the shm buffer?.. no se.
+
+        # backload any further data from tsdb (concurrently per
+        # timeframe) if not all data was able to be loaded (in memory)
+        # from the ``StorageClient.load()`` call above.
         try:
-            (
-                latest_start_dt,
-                latest_end_dt,
-                bf_done,
-            ) = await bus.nursery.start(
-                partial(
-                    start_backfill,
-                    mod,
-                    mkt,
-                    shm,
-                    timeframe,
-                    # sampler_stream,
-                    feed_is_live,
+            await trio.sleep_forever()
+        finally:
+            return
+            # write_ohlcv
 
-                    last_tsdb_dt=last_tsdb_dt,
-                    tsdb_is_up=True,
-                    storage=storage,
-                )
-            )
-            if tsdb_entry:
-                dts_per_tf[timeframe] = (
-                    tsdb_history,
-                    last_tsdb_dt,
-                    latest_start_dt,
-                    latest_end_dt,
-                    bf_done,
-                )
-        except DataUnavailable:
-            # XXX: timeframe not supported for backend (since
-            # above exception type), so skip and move on to next.
-            continue
-
-        # tsdb_history = series.get(timeframe)
-
-        # if len(hist_shm.array) < 2:
-        # TODO: there's an edge case here to solve where if the last
-        # frame before market close (at least on ib) was pushed and
-        # there was only "1 new" row pushed from the first backfill
-        # query-iteration, then the sample step sizing calcs will
-        # break upstream from here since you can't diff on at least
-        # 2 steps... probably should also add logic to compute from
-        # the tsdb series and stash that somewhere as meta data on
-        # the shm buffer?.. no se.
-
-    # unblock the feed bus management task
-    # assert len(shms[1].array)
-    task_status.started()
-
-    # backload any further data from tsdb (concurrently per
-    # timeframe) if not all data was able to be loaded (in memory)
-    # from the ``StorageClient.load()`` call above.
-    async with trio.open_nursery() as nurse:
-        for timeframe, shm in shms.items():
-
-            entry = dts_per_tf.get(timeframe)
-            if not entry:
-                continue
+        # IF we need to continue backloading incrementall from the
+        # tsdb client..
+        tn.start_soon(
+            back_load_from_tsdb,
 
-            (
-                tsdb_history,
-                last_tsdb_dt,
-                latest_start_dt,
-                latest_end_dt,
-                bf_done,
-            ) = entry
+            storemod,
+            storage,
+            fqme,
 
-            if not tsdb_history.size:
-                continue
+            tsdb_history,
+            last_tsdb_dt,
+            mr_start_dt,
+            mr_end_dt,
+            bf_done,
 
-            nurse.start_soon(
-                back_load_from_tsdb,
+            timeframe,
+            shm,
+        )
+        # async with trio.open_nursery() as nurse:
+            # for timeframe, shm in shms.items():
 
-                storemod,
-                storage,
-                fqme,
+                # entry = dts_per_tf.get(timeframe)
+                # if not entry:
+                #     continue
 
-                tsdb_history,
-                last_tsdb_dt,
-                latest_start_dt,
-                latest_end_dt,
-                bf_done,
+                # (
+                #     tsdb_history,
+                #     last_tsdb_dt,
+                #     latest_start_dt,
+                #     latest_end_dt,
+                #     bf_done,
+                # ) = entry
+
+                # if not tsdb_history.size:
+                #     continue
 
-                timeframe,
-                shm,
-            )
 
-    # try:
-    #     await trio.sleep_forever()
-    # finally:
-    #     write_ohlcv
+        # try:
+        #     await trio.sleep_forever()
+        # finally:
+        #     write_ohlcv
 
 
 async def manage_history(
@@ -773,7 +1021,10 @@ async def manage_history(
     # TODO: maybe it should be a subpkg of `.data`?
     from piker import storage
 
-    async with storage.open_storage_client() as (storemod, client):
+    async with (
+        storage.open_storage_client() as (storemod, client),
+        trio.open_nursery() as tn,
+    ):
         log.info(
             f'Connecting to storage backend `{storemod.name}`:\n'
             f'location: {client.address}\n'
@@ -793,30 +1044,10 @@ async def manage_history(
         #   backfiller can do a single append from it's end datum and
         #   then prepends backward to that from the current time
         #   step.
-        await bus.nursery.start(
-            tsdb_backfill,
-            mod,
-            storemod,
-            bus,
-            client,
-            mkt,
-            {
-                1: rt_shm,
-                60: hist_shm,
-            },
-            # sample_stream,
-            feed_is_live,
-        )
-
-        # indicate to caller that feed can be delivered to
-        # remote requesting client since we've loaded history
-        # data that can be used.
-        some_data_ready.set()
-
-        # wait for a live feed before starting the sampler.
-        await feed_is_live.wait()
-
-        # register 1s and 1m buffers with the global incrementer task
+        tf2mem: dict = {
+            1: rt_shm,
+            60: hist_shm,
+        }
         async with open_sample_stream(
             period_s=1.,
             shms_by_period={
@@ -832,8 +1063,33 @@ async def manage_history(
             sub_for_broadcasts=False,
 
         ) as sample_stream:
+            # register 1s and 1m buffers with the global incrementer task
             log.info(f'Connected to sampler stream: {sample_stream}')
 
+            for timeframe in [60, 1]:
+                await tn.start(
+                    tsdb_backfill,
+                    mod,
+                    storemod,
+                    tn,
+                    # bus,
+                    client,
+                    mkt,
+                    tf2mem[timeframe],
+                    timeframe,
+
+                    sample_stream,
+                    # feed_is_live,
+                )
+
+            # indicate to caller that feed can be delivered to
+            # remote requesting client since we've loaded history
+            # data that can be used.
+            some_data_ready.set()
+
+            # wait for a live feed before starting the sampler.
+            await feed_is_live.wait()
+
             # yield back after client connect with filled shm
             task_status.started((
                 hist_zero_index,

From 2dbcecdac7062f9d9c59adff6a7efb50b3bd0bfe Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 8 Jun 2023 10:22:53 -0400
Subject: [PATCH 30/85] Generalize time-gap detector to accept unit and
 threshold

---
 piker/storage/nativedb.py | 48 +++++++++++++++++++++++++++++++++++----
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/piker/storage/nativedb.py b/piker/storage/nativedb.py
index 9e4e848d6..4a6ecf0e8 100644
--- a/piker/storage/nativedb.py
+++ b/piker/storage/nativedb.py
@@ -54,6 +54,9 @@
 from datetime import datetime
 from pathlib import Path
 import time
+from typing import (
+    Literal,
+)
 
 # from bidict import bidict
 # import tractor
@@ -388,15 +391,38 @@ def with_dts(
         pl.from_epoch(pl.col(time_col)).alias('dt'),
     ]).with_columns([
         pl.from_epoch(pl.col(f'{time_col}_prev')).alias('dt_prev'),
-    ]).with_columns(
-        (pl.col('dt') - pl.col('dt_prev')).alias('dt_diff'),
-    )
+        pl.col('dt').diff().alias('dt_diff'),
+    ]) #.with_columns(
+        # pl.col('dt').diff().dt.days().alias('days_dt_diff'),
+    # )
+
+
+t_unit: Literal[
+    'days',
+    'hours',
+    'minutes',
+    'seconds',
+    'miliseconds',
+    'microseconds',
+    'nanoseconds',
+]
 
 
 def detect_time_gaps(
     df: pl.DataFrame,
-    expect_period: float = 60,
+
     time_col: str = 'time',
+    # epoch sampling step diff
+    expect_period: float = 60,
+
+    # datetime diff unit and gap value
+    # crypto mkts
+    # gap_dt_unit: t_unit = 'minutes',
+    # gap_thresh: int = 1,
+
+    # legacy stock mkts
+    gap_dt_unit: t_unit = 'days',
+    gap_thresh: int = 2,
 
 ) -> pl.DataFrame:
     '''
@@ -406,7 +432,19 @@ def detect_time_gaps(
     actual missing data segments.
 
     '''
-    return with_dts(df).filter(pl.col('s_diff') > expect_period)
+    dt_gap_col: str = f'{gap_dt_unit}_diff'
+    return with_dts(
+        df
+    ).filter(
+        pl.col('s_diff').abs() > expect_period
+    ).with_columns(
+        getattr(
+            pl.col('dt_diff').dt,
+            gap_dt_unit,  # NOTE: must be valid ``Expr.dt.<name>``
+        )().alias(dt_gap_col)
+    ).filter(
+        pl.col(dt_gap_col).abs() > gap_thresh
+    )
 
 
 def detect_price_gaps(

From 54f8a615fcdada32568b666ebda6525187329e25 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 8 Jun 2023 10:24:42 -0400
Subject: [PATCH 31/85] Use `code.interact()` in anal subcmd for now

---
 piker/storage/cli.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/piker/storage/cli.py b/piker/storage/cli.py
index ae7393e43..b0ec881a6 100644
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@@ -152,11 +152,15 @@ async def main():
             src_df = await client.as_df(fqme, period)
             df = mod.with_dts(src_df)
             gaps: pl.DataFrame = mod.detect_time_gaps(df)
-            if gaps.is_empty():
-                breakpoint()
 
-            breakpoint()
-            # await tractor.breakpoint()
+            # TODO: something better with tab completion..
+            # is there something more minimal but nearly as
+            # functional as ipython?
+            import code
+            code.interact(
+                f'df: {df}\ngaps: {gaps}\n',
+                local=locals()
+            )
 
     trio.run(main)
 
@@ -173,8 +177,10 @@ def clone(
     )
     import polars as pl
 
+    # TODO: actually look up an existing shm buf (set) from
+    # an fqme and file name parsing..
     # open existing shm buffer for kucoin backend
-    key: str = 'piker.brokerd[a9e7a4fe-39ae-44].btcusdt.binance.hist'
+    key: str = 'piker.brokerd[3595d316-3c15-46].xmrusdt.kucoin.hist'
     shmpath: Path = Path('/dev/shm') / key
     assert shmpath.is_file()
 

From f25248c87188ac37673d748c52f27353c6473cdb Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 8 Jun 2023 11:11:13 -0400
Subject: [PATCH 32/85] Add `.data._timeseries` utility mod

Org all the new (time) gap detection routines here and also move in the
`slice_from_time()` epoch -> index converter routine from `._pathops` B)
---
 piker/data/_pathops.py    | 157 +------------------
 piker/data/_timeseries.py | 309 ++++++++++++++++++++++++++++++++++++++
 piker/storage/nativedb.py | 104 -------------
 piker/ui/_dataviz.py      |   2 +-
 piker/ui/view_mode.py     |   2 +-
 5 files changed, 312 insertions(+), 262 deletions(-)
 create mode 100644 piker/data/_timeseries.py

diff --git a/piker/data/_pathops.py b/piker/data/_pathops.py
index 48a11f402..a17f289a4 100644
--- a/piker/data/_pathops.py
+++ b/piker/data/_pathops.py
@@ -1,5 +1,5 @@
 # piker: trading gear for hackers
-# Copyright (C) 2018-present  Tyler Goodlet (in stewardship of piker0)
+# Copyright (C) 2018-present  Tyler Goodlet (in stewardship of pikers)
 
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
@@ -289,158 +289,3 @@ def ohlc_flatten(
             num=len(flat),
         )
     return x, flat
-
-
-def slice_from_time(
-    arr: np.ndarray,
-    start_t: float,
-    stop_t: float,
-    step: float,  # sampler period step-diff
-
-) -> slice:
-    '''
-    Calculate array indices mapped from a time range and return them in
-    a slice.
-
-    Given an input array with an epoch `'time'` series entry, calculate
-    the indices which span the time range and return in a slice. Presume
-    each `'time'` step increment is uniform and when the time stamp
-    series contains gaps (the uniform presumption is untrue) use
-    ``np.searchsorted()`` binary search to look up the appropriate
-    index.
-
-    '''
-    profiler = Profiler(
-        msg='slice_from_time()',
-        disabled=not pg_profile_enabled(),
-        ms_threshold=ms_slower_then,
-    )
-
-    times = arr['time']
-    t_first = floor(times[0])
-    t_last = ceil(times[-1])
-
-    # the greatest index we can return which slices to the
-    # end of the input array.
-    read_i_max = arr.shape[0]
-
-    # compute (presumed) uniform-time-step index offsets
-    i_start_t = floor(start_t)
-    read_i_start = floor(((i_start_t - t_first) // step)) - 1
-
-    i_stop_t = ceil(stop_t)
-
-    # XXX: edge case -> always set stop index to last in array whenever
-    # the input stop time is detected to be greater then the equiv time
-    # stamp at that last entry.
-    if i_stop_t >= t_last:
-        read_i_stop = read_i_max
-    else:
-        read_i_stop = ceil((i_stop_t - t_first) // step) + 1
-
-    # always clip outputs to array support
-    # for read start:
-    # - never allow a start < the 0 index
-    # - never allow an end index > the read array len
-    read_i_start = min(
-        max(0, read_i_start),
-        read_i_max - 1,
-    )
-    read_i_stop = max(
-        0,
-        min(read_i_stop, read_i_max),
-    )
-
-    # check for larger-then-latest calculated index for given start
-    # time, in which case we do a binary search for the correct index.
-    # NOTE: this is usually the result of a time series with time gaps
-    # where it is expected that each index step maps to a uniform step
-    # in the time stamp series.
-    t_iv_start = times[read_i_start]
-    if (
-        t_iv_start > i_start_t
-    ):
-        # do a binary search for the best index mapping to ``start_t``
-        # given we measured an overshoot using the uniform-time-step
-        # calculation from above.
-
-        # TODO: once we start caching these per source-array,
-        # we can just overwrite ``read_i_start`` directly.
-        new_read_i_start = np.searchsorted(
-            times,
-            i_start_t,
-            side='left',
-        )
-
-        # TODO: minimize binary search work as much as possible:
-        # - cache these remap values which compensate for gaps in the
-        #   uniform time step basis where we calc a later start
-        #   index for the given input ``start_t``.
-        # - can we shorten the input search sequence by heuristic?
-        #   up_to_arith_start = index[:read_i_start]
-
-        if (
-            new_read_i_start <= read_i_start
-        ):
-            # t_diff = t_iv_start - start_t
-            # print(
-            #     f"WE'RE CUTTING OUT TIME - STEP:{step}\n"
-            #     f'start_t:{start_t} -> 0index start_t:{t_iv_start}\n'
-            #     f'diff: {t_diff}\n'
-            #     f'REMAPPED START i: {read_i_start} -> {new_read_i_start}\n'
-            # )
-            read_i_start = new_read_i_start
-
-    t_iv_stop = times[read_i_stop - 1]
-    if (
-        t_iv_stop > i_stop_t
-    ):
-        # t_diff = stop_t - t_iv_stop
-        # print(
-        #     f"WE'RE CUTTING OUT TIME - STEP:{step}\n"
-        #     f'calced iv stop:{t_iv_stop} -> stop_t:{stop_t}\n'
-        #     f'diff: {t_diff}\n'
-        #     # f'SHOULD REMAP STOP: {read_i_start} -> {new_read_i_start}\n'
-        # )
-        new_read_i_stop = np.searchsorted(
-            times[read_i_start:],
-            # times,
-            i_stop_t,
-            side='right',
-        )
-
-        if (
-            new_read_i_stop <= read_i_stop
-        ):
-            read_i_stop = read_i_start + new_read_i_stop + 1
-
-    # sanity checks for range size
-    # samples = (i_stop_t - i_start_t) // step
-    # index_diff = read_i_stop - read_i_start + 1
-    # if index_diff > (samples + 3):
-    #     breakpoint()
-
-    # read-relative indexes: gives a slice where `shm.array[read_slc]`
-    # will be the data spanning the input time range `start_t` ->
-    # `stop_t`
-    read_slc = slice(
-        int(read_i_start),
-        int(read_i_stop),
-    )
-
-    profiler(
-        'slicing complete'
-        # f'{start_t} -> {abs_slc.start} | {read_slc.start}\n'
-        # f'{stop_t} -> {abs_slc.stop} | {read_slc.stop}\n'
-    )
-
-    # NOTE: if caller needs absolute buffer indices they can
-    # slice the buffer abs index like so:
-    # index = arr['index']
-    # abs_indx = index[read_slc]
-    # abs_slc = slice(
-    #     int(abs_indx[0]),
-    #     int(abs_indx[-1]),
-    # )
-
-    return read_slc
diff --git a/piker/data/_timeseries.py b/piker/data/_timeseries.py
new file mode 100644
index 000000000..81d380c7a
--- /dev/null
+++ b/piker/data/_timeseries.py
@@ -0,0 +1,309 @@
+# piker: trading gear for hackers
+# Copyright (C) 2018-present  Tyler Goodlet (in stewardship of pikers)
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+'''
+Financial time series processing utilities usually
+pertaining to OHLCV style sampled data.
+
+Routines are generally implemented in either ``numpy`` or ``polars`` B)
+
+'''
+from __future__ import annotations
+from typing import Literal
+from math import (
+    ceil,
+    floor,
+)
+
+import numpy as np
+import polars as pl
+
+from ._sharedmem import ShmArray
+from .._profile import (
+    Profiler,
+    pg_profile_enabled,
+    ms_slower_then,
+)
+
+
+def slice_from_time(
+    arr: np.ndarray,
+    start_t: float,
+    stop_t: float,
+    step: float,  # sampler period step-diff
+
+) -> slice:
+    '''
+    Calculate array indices mapped from a time range and return them in
+    a slice.
+
+    Given an input array with an epoch `'time'` series entry, calculate
+    the indices which span the time range and return in a slice. Presume
+    each `'time'` step increment is uniform and when the time stamp
+    series contains gaps (the uniform presumption is untrue) use
+    ``np.searchsorted()`` binary search to look up the appropriate
+    index.
+
+    '''
+    profiler = Profiler(
+        msg='slice_from_time()',
+        disabled=not pg_profile_enabled(),
+        ms_threshold=ms_slower_then,
+    )
+
+    times = arr['time']
+    t_first = floor(times[0])
+    t_last = ceil(times[-1])
+
+    # the greatest index we can return which slices to the
+    # end of the input array.
+    read_i_max = arr.shape[0]
+
+    # compute (presumed) uniform-time-step index offsets
+    i_start_t = floor(start_t)
+    read_i_start = floor(((i_start_t - t_first) // step)) - 1
+
+    i_stop_t = ceil(stop_t)
+
+    # XXX: edge case -> always set stop index to last in array whenever
+    # the input stop time is detected to be greater then the equiv time
+    # stamp at that last entry.
+    if i_stop_t >= t_last:
+        read_i_stop = read_i_max
+    else:
+        read_i_stop = ceil((i_stop_t - t_first) // step) + 1
+
+    # always clip outputs to array support
+    # for read start:
+    # - never allow a start < the 0 index
+    # - never allow an end index > the read array len
+    read_i_start = min(
+        max(0, read_i_start),
+        read_i_max - 1,
+    )
+    read_i_stop = max(
+        0,
+        min(read_i_stop, read_i_max),
+    )
+
+    # check for larger-then-latest calculated index for given start
+    # time, in which case we do a binary search for the correct index.
+    # NOTE: this is usually the result of a time series with time gaps
+    # where it is expected that each index step maps to a uniform step
+    # in the time stamp series.
+    t_iv_start = times[read_i_start]
+    if (
+        t_iv_start > i_start_t
+    ):
+        # do a binary search for the best index mapping to ``start_t``
+        # given we measured an overshoot using the uniform-time-step
+        # calculation from above.
+
+        # TODO: once we start caching these per source-array,
+        # we can just overwrite ``read_i_start`` directly.
+        new_read_i_start = np.searchsorted(
+            times,
+            i_start_t,
+            side='left',
+        )
+
+        # TODO: minimize binary search work as much as possible:
+        # - cache these remap values which compensate for gaps in the
+        #   uniform time step basis where we calc a later start
+        #   index for the given input ``start_t``.
+        # - can we shorten the input search sequence by heuristic?
+        #   up_to_arith_start = index[:read_i_start]
+
+        if (
+            new_read_i_start <= read_i_start
+        ):
+            # t_diff = t_iv_start - start_t
+            # print(
+            #     f"WE'RE CUTTING OUT TIME - STEP:{step}\n"
+            #     f'start_t:{start_t} -> 0index start_t:{t_iv_start}\n'
+            #     f'diff: {t_diff}\n'
+            #     f'REMAPPED START i: {read_i_start} -> {new_read_i_start}\n'
+            # )
+            read_i_start = new_read_i_start
+
+    t_iv_stop = times[read_i_stop - 1]
+    if (
+        t_iv_stop > i_stop_t
+    ):
+        # t_diff = stop_t - t_iv_stop
+        # print(
+        #     f"WE'RE CUTTING OUT TIME - STEP:{step}\n"
+        #     f'calced iv stop:{t_iv_stop} -> stop_t:{stop_t}\n'
+        #     f'diff: {t_diff}\n'
+        #     # f'SHOULD REMAP STOP: {read_i_start} -> {new_read_i_start}\n'
+        # )
+        new_read_i_stop = np.searchsorted(
+            times[read_i_start:],
+            # times,
+            i_stop_t,
+            side='right',
+        )
+
+        if (
+            new_read_i_stop <= read_i_stop
+        ):
+            read_i_stop = read_i_start + new_read_i_stop + 1
+
+    # sanity checks for range size
+    # samples = (i_stop_t - i_start_t) // step
+    # index_diff = read_i_stop - read_i_start + 1
+    # if index_diff > (samples + 3):
+    #     breakpoint()
+
+    # read-relative indexes: gives a slice where `shm.array[read_slc]`
+    # will be the data spanning the input time range `start_t` ->
+    # `stop_t`
+    read_slc = slice(
+        int(read_i_start),
+        int(read_i_stop),
+    )
+
+    profiler(
+        'slicing complete'
+        # f'{start_t} -> {abs_slc.start} | {read_slc.start}\n'
+        # f'{stop_t} -> {abs_slc.stop} | {read_slc.stop}\n'
+    )
+
+    # NOTE: if caller needs absolute buffer indices they can
+    # slice the buffer abs index like so:
+    # index = arr['index']
+    # abs_indx = index[read_slc]
+    # abs_slc = slice(
+    #     int(abs_indx[0]),
+    #     int(abs_indx[-1]),
+    # )
+
+    return read_slc
+
+
+def detect_null_time_gap(shm: ShmArray) -> tuple[float, float] | None:
+    # detect if there are any zero-epoch stamped rows
+    zero_pred: np.ndarray = shm.array['time'] == 0
+    zero_t: np.ndarray = shm.array[zero_pred]
+    if zero_t.size:
+        istart, iend = zero_t['index'][[0, -1]]
+        start, end = shm._array['time'][
+            [istart - 2, iend + 2]
+        ]
+        return istart - 2, start, end, iend + 2
+
+    return None
+
+
+t_unit: Literal[
+    'days',
+    'hours',
+    'minutes',
+    'seconds',
+    'miliseconds',
+    'microseconds',
+    'nanoseconds',
+]
+
+
+def with_dts(
+    df: pl.DataFrame,
+    time_col: str = 'time',
+) -> pl.DataFrame:
+    '''
+    Insert datetime (casted) columns to a (presumably) OHLC sampled
+    time series with an epoch-time column keyed by ``time_col``.
+
+    '''
+    return df.with_columns([
+        pl.col(time_col).shift(1).suffix('_prev'),
+        pl.col(time_col).diff().alias('s_diff'),
+        pl.from_epoch(pl.col(time_col)).alias('dt'),
+    ]).with_columns([
+        pl.from_epoch(pl.col(f'{time_col}_prev')).alias('dt_prev'),
+        pl.col('dt').diff().alias('dt_diff'),
+    ]) #.with_columns(
+        # pl.col('dt').diff().dt.days().alias('days_dt_diff'),
+    # )
+
+
+def detect_time_gaps(
+    df: pl.DataFrame,
+
+    time_col: str = 'time',
+    # epoch sampling step diff
+    expect_period: float = 60,
+
+    # datetime diff unit and gap value
+    # crypto mkts
+    # gap_dt_unit: t_unit = 'minutes',
+    # gap_thresh: int = 1,
+
+    # legacy stock mkts
+    gap_dt_unit: t_unit = 'days',
+    gap_thresh: int = 2,
+
+) -> pl.DataFrame:
+    '''
+    Filter to OHLC datums which contain sample step gaps.
+
+    For eg. legacy markets which have venue close gaps and/or
+    actual missing data segments.
+
+    '''
+    dt_gap_col: str = f'{gap_dt_unit}_diff'
+    return with_dts(
+        df
+    ).filter(
+        pl.col('s_diff').abs() > expect_period
+    ).with_columns(
+        getattr(
+            pl.col('dt_diff').dt,
+            gap_dt_unit,  # NOTE: must be valid ``Expr.dt.<name>``
+        )().alias(dt_gap_col)
+    ).filter(
+        pl.col(dt_gap_col).abs() > gap_thresh
+    )
+
+
+def detect_price_gaps(
+    df: pl.DataFrame,
+    gt_multiplier: float = 2.,
+    price_fields: list[str] = ['high', 'low'],
+
+) -> pl.DataFrame:
+    '''
+    Detect gaps in clearing price over an OHLC series.
+
+    2 types of gaps generally exist; up gaps and down gaps:
+
+    - UP gap: when any next sample's lo price is strictly greater
+      then the current sample's hi price.
+
+    - DOWN gap: when any next sample's hi price is strictly
+      less then the current samples lo price.
+
+    '''
+    # return df.filter(
+    #     pl.col('high') - ) > expect_period,
+    # ).select([
+    #     pl.dt.datetime(pl.col(time_col).shift(1)).suffix('_previous'),
+    #     pl.all(),
+    # ]).select([
+    #     pl.all(),
+    #     (pl.col(time_col) - pl.col(f'{time_col}_previous')).alias('diff'),
+    # ])
+    ...
diff --git a/piker/storage/nativedb.py b/piker/storage/nativedb.py
index 4a6ecf0e8..e96856d17 100644
--- a/piker/storage/nativedb.py
+++ b/piker/storage/nativedb.py
@@ -54,9 +54,6 @@
 from datetime import datetime
 from pathlib import Path
 import time
-from typing import (
-    Literal,
-)
 
 # from bidict import bidict
 # import tractor
@@ -374,104 +371,3 @@ async def get_client(
     client = NativeStorageClient(datadir)
     client.index_files()
     yield client
-
-
-def with_dts(
-    df: pl.DataFrame,
-    time_col: str = 'time',
-) -> pl.DataFrame:
-    '''
-    Insert datetime (casted) columns to a (presumably) OHLC sampled
-    time series with an epoch-time column keyed by ``time_col``.
-
-    '''
-    return df.with_columns([
-        pl.col(time_col).shift(1).suffix('_prev'),
-        pl.col(time_col).diff().alias('s_diff'),
-        pl.from_epoch(pl.col(time_col)).alias('dt'),
-    ]).with_columns([
-        pl.from_epoch(pl.col(f'{time_col}_prev')).alias('dt_prev'),
-        pl.col('dt').diff().alias('dt_diff'),
-    ]) #.with_columns(
-        # pl.col('dt').diff().dt.days().alias('days_dt_diff'),
-    # )
-
-
-t_unit: Literal[
-    'days',
-    'hours',
-    'minutes',
-    'seconds',
-    'miliseconds',
-    'microseconds',
-    'nanoseconds',
-]
-
-
-def detect_time_gaps(
-    df: pl.DataFrame,
-
-    time_col: str = 'time',
-    # epoch sampling step diff
-    expect_period: float = 60,
-
-    # datetime diff unit and gap value
-    # crypto mkts
-    # gap_dt_unit: t_unit = 'minutes',
-    # gap_thresh: int = 1,
-
-    # legacy stock mkts
-    gap_dt_unit: t_unit = 'days',
-    gap_thresh: int = 2,
-
-) -> pl.DataFrame:
-    '''
-    Filter to OHLC datums which contain sample step gaps.
-
-    For eg. legacy markets which have venue close gaps and/or
-    actual missing data segments.
-
-    '''
-    dt_gap_col: str = f'{gap_dt_unit}_diff'
-    return with_dts(
-        df
-    ).filter(
-        pl.col('s_diff').abs() > expect_period
-    ).with_columns(
-        getattr(
-            pl.col('dt_diff').dt,
-            gap_dt_unit,  # NOTE: must be valid ``Expr.dt.<name>``
-        )().alias(dt_gap_col)
-    ).filter(
-        pl.col(dt_gap_col).abs() > gap_thresh
-    )
-
-
-def detect_price_gaps(
-    df: pl.DataFrame,
-    gt_multiplier: float = 2.,
-    price_fields: list[str] = ['high', 'low'],
-
-) -> pl.DataFrame:
-    '''
-    Detect gaps in clearing price over an OHLC series.
-
-    2 types of gaps generally exist; up gaps and down gaps:
-
-    - UP gap: when any next sample's lo price is strictly greater
-      then the current sample's hi price.
-
-    - DOWN gap: when any next sample's hi price is strictly
-      less then the current samples lo price.
-
-    '''
-    # return df.filter(
-    #     pl.col('high') - ) > expect_period,
-    # ).select([
-    #     pl.dt.datetime(pl.col(time_col).shift(1)).suffix('_previous'),
-    #     pl.all(),
-    # ]).select([
-    #     pl.all(),
-    #     (pl.col(time_col) - pl.col(f'{time_col}_previous')).alias('diff'),
-    # ])
-    ...
diff --git a/piker/ui/_dataviz.py b/piker/ui/_dataviz.py
index 57a477d12..242386fa0 100644
--- a/piker/ui/_dataviz.py
+++ b/piker/ui/_dataviz.py
@@ -49,7 +49,7 @@
     OHLCBarsAsCurveFmtr,  # OHLC converted to line
     StepCurveFmtr,  # "step" curve (like for vlm)
 )
-from ..data._pathops import (
+from ..data._timeseries import (
     slice_from_time,
 )
 from ._ohlc import (
diff --git a/piker/ui/view_mode.py b/piker/ui/view_mode.py
index ecb62557a..78e58f7af 100644
--- a/piker/ui/view_mode.py
+++ b/piker/ui/view_mode.py
@@ -30,7 +30,7 @@
 import pyqtgraph as pg
 
 from ..data.types import Struct
-from ..data._pathops import slice_from_time
+from ..data._timeseries import slice_from_time
 from ..log import get_logger
 from .._profile import Profiler
 

From 8233d12afb7530f219f85334295816a49034f416 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 8 Jun 2023 11:16:19 -0400
Subject: [PATCH 33/85] Detect and fill time gaps in tsdb history

For now, just detect and fill in gaps (via fresh backend queries)
*in the shm buffer* but eventually i'm pretty sure we can just write
these direct to the parquet file as well.

Use the new `.data._timeseries.detect_null_time_gap()` to find and fill
in the `ShmArray` index range, re-check it and enter a prompt if it
didn't totally fill.

Also,
- do a massive cleanup and removal of all unused/commented code.
  - drop the duplicate frames tracking, don't think we need it after
    removing multi-frame concurrent queries.
- change backfill loop variable `end_dt` -> `last_start_dt` which is
  more semantically correct.
- fix logic to backfill any missing sub-sequence portion for any frame
  query that overruns the shm buffer prependable space by detecting
  the available rows left to insert and only push those.
  - add a new `shm_push_in_between()` helper to match.
---
 piker/data/history.py | 587 +++++++++++++++---------------------------
 1 file changed, 211 insertions(+), 376 deletions(-)

diff --git a/piker/data/history.py b/piker/data/history.py
index 51e19c5ae..eea6e83f4 100644
--- a/piker/data/history.py
+++ b/piker/data/history.py
@@ -67,10 +67,6 @@
 
 def diff_history(
     array: np.ndarray,
-    # timeframe: int,
-    # start_dt: datetime,
-    # end_dt: datetime,
-
     append_until_dt: datetime | None = None,
     prepend_until_dt: datetime | None = None,
 
@@ -90,123 +86,45 @@ def diff_history(
     else:
         return array[times >= prepend_until_dt.timestamp()]
 
+async def shm_push_in_between(
+    shm: ShmArray,
+    to_push: np.ndarray,
+    prepend_index: int,
+
+    update_start_on_prepend: bool = False,
+
+) -> int:
+    shm.push(
+        to_push,
+        prepend=True,
+
+        # XXX: only update the ._first index if no tsdb
+        # segment was previously prepended by the
+        # parent task.
+        update_first=update_start_on_prepend,
+
+        # XXX: only prepend from a manually calculated shm
+        # index if there was already a tsdb history
+        # segment prepended (since then the
+        # ._first.value is going to be wayyy in the
+        # past!)
+        start=(
+            prepend_index
+            if not update_start_on_prepend
+            else None
+        ),
+    )
+    # XXX: extremely important, there can be no checkpoints
+    # in the block above to avoid entering new ``frames``
+    # values while we're pipelining the current ones to
+    # memory...
+    array = shm.array
+    zeros = array[array['low'] == 0]
+    if (
+        0 < zeros.size < 1000
+    ):
+        tractor.breakpoint()
 
-# async def open_history_mngr(
-#     mod: ModuleType,
-#     mkt: MktPair,
-#     # shm: ShmArray,
-#     # timeframes: list[float] = [60, 1],
-#     timeframes: float,
-
-# ) -> Callable[
-#     [int, datetime, datetime],
-#     tuple[np.ndarray, str]
-# ]:
-#     '''
-#     Open a "history manager" for the backend data provider,
-#     get the latest "frames worth" of ohlcv history,
-#     push the history to shm and deliver
-#     the start datum's datetime value so that further history loading
-#     can be done until synchronized with the tsdb loaded time series.
-
-#     '''
-#     hist: Callable[
-#         [int, datetime, datetime],
-#         tuple[np.ndarray, str]
-#     ]
-#     config: dict[str, int]
-
-#     async with mod.open_history_client(
-#         mkt,
-#     ) as (hist, config):
-#         log.info(f'{mod} history client returned backfill config: {config}')
-
-#         # get latest query's worth of history all the way
-#         # back to what is recorded in the tsdb
-#         array, mr_start_dt, mr_end_dt = await hist(
-#             timeframe,
-#             end_dt=None,
-#         )
-#         times: np.ndarray = array['time']
-
-#         # sample period step size in seconds
-#         step_size_s = (
-#             from_timestamp(times[-1])
-#             - from_timestamp(times[-2])
-#         ).seconds
-
-#         if step_size_s not in (1, 60):
-#             log.error(f'Last 2 sample period is off!? -> {step_size_s}')
-#             step_size_s = (
-#                 from_timestamp(times[-2])
-#                 - from_timestamp(times[-3])
-#             ).seconds
-
-#         # NOTE: on the first history, most recent history
-#         # frame we PREPEND from the current shm ._last index
-#         # and thus a gap between the earliest datum loaded here
-#         # and the latest loaded from the tsdb may exist!
-#         log.info(f'Pushing {to_push.size} to shm!')
-#         shm.push(
-#             to_push,
-#             prepend=True,
-#             # start=
-#         )
-
-
-#         # if the market is open (aka we have a live feed) but the
-#         # history sample step index seems off we report the surrounding
-#         # data and drop into a bp. this case shouldn't really ever
-#         # happen if we're doing history retrieval correctly.
-#         # if (
-#         #     step_size_s == 60
-#         #     and feed_is_live.is_set()
-#         # ):
-#         #     inow = round(time.time())
-#         #     diff = inow - times[-1]
-#         #     if abs(diff) > 60:
-#         #         surr = array[-6:]
-#         #         diff_in_mins = round(diff/60., ndigits=2)
-#         #         log.warning(
-#         #             f'STEP ERROR `{mkt.fqme}` for period {step_size_s}s:\n'
-#         #             f'Off by `{diff}` seconds (or `{diff_in_mins}` mins)\n'
-#         #             'Surrounding 6 time stamps:\n'
-#         #             f'{list(surr["time"])}\n'
-#         #             'Here is surrounding 6 samples:\n'
-#         #             f'{surr}\nn'
-#         #         )
-
-#                 # uncomment this for a hacker who wants to investigate
-#                 # this case manually..
-#                 # await tractor.breakpoint()
-
-#         # frame's worth of sample-period-steps, in seconds
-#         # frame_size_s = len(array) * step_size_s
-
-#         to_push = array
-#         # to_push = diff_history(
-#         #     array,
-#         #     # timeframe,
-#         #     # mr_start_dt,
-#         #     # mr_end_dt,
-
-#         #     # backfill scenario for "most recent" frame
-#         #     prepend_until_dt=last_tsdb_dt,
-#         # )
-
-#         # NOTE: on the first history, most recent history
-#         # frame we PREPEND from the current shm ._last index
-#         # and thus a gap between the earliest datum loaded here
-#         # and the latest loaded from the tsdb may exist!
-#         log.info(f'Pushing {to_push.size} to shm!')
-#         shm.push(
-#             to_push,
-#             prepend=True,
-#             # start=
-#         )
-#         # TODO: should we wrap this in a "history frame" type or
-#         # something?
-#         yield hist, mr_start_dt, mr_end_dt
 
 
 async def start_backfill(
@@ -221,44 +139,27 @@ async def start_backfill(
 
     sampler_stream: tractor.MsgStream,
 
-
     backfill_until_dt: datetime | None = None,
     storage: StorageClient | None = None,
 
     write_tsdb: bool = True,
-    # tsdb_is_up: bool = True,
 
     task_status: TaskStatus[tuple] = trio.TASK_STATUS_IGNORED,
 
 ) -> int:
 
-        # TODO: *** THIS IS A BUG ***
-        # we need to only broadcast to subscribers for this fqme..
-        # otherwise all fsps get reset on every chart..
-        # await sampler_stream.send('broadcast_all')
-
-        # signal that backfilling to tsdb's end datum is complete
-        bf_done = trio.Event()
-
         # let caller unblock and deliver latest history frame
-        task_status.started( #(
-            # mr_start_dt,
-            # mr_end_dt,
-            bf_done,
-        )# )
+        # and use to signal that backfilling the shm gap until
+        # the tsdb end is complete!
+        bf_done = trio.Event()
+        task_status.started(bf_done)
 
         # based on the sample step size, maybe load a certain amount history
         update_start_on_prepend: bool = False
         if backfill_until_dt is None:
 
-            if timeframe not in (1, 60):
-                raise ValueError(
-                    '`piker` only needs to support 1m and 1s sampling '
-                    'but ur api is trying to deliver a longer '
-                    f'timeframe of {timeframe} seconds..\n'
-                    'So yuh.. dun do dat brudder.'
-                )
-
+            # TODO: drop this right and just expose the backfill
+            # limits inside a [storage] section in conf.toml?
             # when no tsdb "last datum" is provided, we just load
             # some near-term history.
             # periods = {
@@ -266,7 +167,6 @@ async def start_backfill(
             #     60: {'days': 14},
             # }
 
-            # if tsdb_is_up:
             # do a decently sized backfill and load it into storage.
             periods = {
                 1: {'days': 6},
@@ -281,38 +181,33 @@ async def start_backfill(
             # settings above when the tsdb is detected as being empty.
             backfill_until_dt = backfill_from_dt.subtract(**period_duration)
 
+
+        # TODO: can we drop this? without conc i don't think this
+        # is necessary any more?
         # configure async query throttling
         # rate = config.get('rate', 1)
         # XXX: legacy from ``trimeter`` code but unsupported now.
         # erlangs = config.get('erlangs', 1)
-
         # avoid duplicate history frames with a set of datetime frame
         # starts and associated counts of how many duplicates we see
         # per time stamp.
         starts: Counter[datetime] = Counter()
 
-        # conduct "backward history filling" since
-        # no tsdb history yet exists.
-
-        # implemented via a simple inline sequential loop where we
-        # simply pass the last retrieved start dt to the next
-        # request as it's end dt.
-        # while end_dt < backfill_until_dt:
-        # while (
-        #     end_dt is None  # init case
-        #     or end_dt < mr_start_dt
-        # ):
-
-        # conduct "forward filling" from the last time step
-        # loaded from the tsdb until the first step loaded
-        # just above
-        end_dt: datetime = backfill_from_dt
-        # start_dt: datetime = backfill_until_dt
+        # conduct "backward history gap filling" where we push to
+        # the shm buffer until we have history back until the
+        # latest entry loaded from the tsdb's table B)
+        last_start_dt: datetime = backfill_from_dt
         next_prepend_index: int = backfill_from_shm_index
 
-        while end_dt > backfill_until_dt:
+        while last_start_dt > backfill_until_dt:
+
+            # if timeframe == 60:
+            #     await tractor.breakpoint()
+            # else:
+            #     return
+
             log.debug(
-                f'Requesting {timeframe}s frame ending in {end_dt}'
+                f'Requesting {timeframe}s frame ending in {last_start_dt}'
             )
 
             try:
@@ -322,8 +217,7 @@ async def start_backfill(
                     next_end_dt,
                 ) = await get_hist(
                     timeframe,
-                    end_dt=end_dt,
-                    # start_dt=start_dt,
+                    end_dt=last_start_dt,
                 )
 
             # broker says there never was or is no more history to pull
@@ -338,22 +232,23 @@ async def start_backfill(
                 # request loop until the condition is resolved?
                 return
 
-            if (
-                next_start_dt in starts
-                and starts[next_start_dt] <= 6
-            ):
-                start_dt = min(starts)
-                log.warning(
-                    f"{mkt.fqme}: skipping duplicate frame @ {next_start_dt}"
-                )
-                starts[start_dt] += 1
-                continue
-
-            elif starts[next_start_dt] > 6:
-                log.warning(
-                    f'NO-MORE-DATA: backend {mod.name} before {next_start_dt}?'
-                )
-                return
+            # TODO: drop this? see todo above..
+            # if (
+            #     next_start_dt in starts
+            #     and starts[next_start_dt] <= 6
+            # ):
+            #     start_dt = min(starts)
+            #     log.warning(
+            #         f"{mkt.fqme}: skipping duplicate frame @ {next_start_dt}"
+            #     )
+            #     starts[start_dt] += 1
+            #     continue
+
+            # elif starts[next_start_dt] > 6:
+            #     log.warning(
+            #         f'NO-MORE-DATA: backend {mod.name} before {next_start_dt}?'
+            #     )
+            #     return
 
             # only update new start point if not-yet-seen
             start_dt: datetime = next_start_dt
@@ -361,7 +256,7 @@ async def start_backfill(
 
             assert array['time'][0] == start_dt.timestamp()
 
-            diff = end_dt - start_dt
+            diff = last_start_dt - start_dt
             frame_time_diff_s = diff.seconds
 
             # frame's worth of sample-period-steps, in seconds
@@ -374,48 +269,31 @@ async def start_backfill(
                 # history gap (eg. market closed period, outage, etc.)
                 # so just report it to console for now.
                 log.warning(
-                    f'History frame ending @ {end_dt} appears to have a gap:\n'
+                    f'History frame ending @ {last_start_dt} appears to have a gap:\n'
                     f'{diff} ~= {frame_time_diff_s} seconds'
                 )
 
             to_push = diff_history(
                 array,
-                # timeframe,
-                # start_dt,
-                # end_dt,
                 prepend_until_dt=backfill_until_dt,
             )
             ln = len(to_push)
             if ln:
-                log.info(f'{ln} bars for {start_dt} -> {end_dt}')
+                log.info(f'{ln} bars for {start_dt} -> {last_start_dt}')
 
             else:
                 log.warning(
-                    f'{ln} BARS TO PUSH after diff?!: {start_dt} -> {end_dt}'
+                    f'{ln} BARS TO PUSH after diff?!: {start_dt} -> {last_start_dt}'
                 )
 
             # bail gracefully on shm allocation overrun/full
             # condition
             try:
-                shm.push(
+                await shm_push_in_between(
+                    shm,
                     to_push,
-                    prepend=True,
-
-                    # XXX: only update the ._first index if no tsdb
-                    # segment was previously prepended by the
-                    # parent task.
-                    update_first=update_start_on_prepend,
-
-                    # XXX: only prepend from a manually calculated shm
-                    # index if there was already a tsdb history
-                    # segment prepended (since then the
-                    # ._first.value is going to be wayyy in the
-                    # past!)
-                    start=(
-                        next_prepend_index
-                        if not update_start_on_prepend
-                        else None
-                    ),
+                    prepend_index=next_prepend_index,
+                    update_start_on_prepend=update_start_on_prepend,
                 )
                 await sampler_stream.send({
                     'broadcast_all': {
@@ -425,34 +303,40 @@ async def start_backfill(
 
                 # decrement next prepend point
                 next_prepend_index = next_prepend_index - ln
-                end_dt = next_start_dt
-
-                # XXX: extremely important, there can be no checkpoints
-                # in the block above to avoid entering new ``frames``
-                # values while we're pipelining the current ones to
-                # memory...
-                array = shm.array
-                zeros = array[array['low'] == 0]
-                if (
-                    0 < zeros.size < 10
-                ):
-                    await tractor.breakpoint()
-
+                last_start_dt = next_start_dt
 
             except ValueError as ve:
                 _ve = ve
-                log.info(
-                    f'Shm buffer overrun on: {start_dt} -> {end_dt}?'
+                log.error(
+                    f'Shm buffer prepend OVERRUN on: {start_dt} -> {last_start_dt}?'
                 )
 
-                await tractor.breakpoint()
+                if next_prepend_index < ln:
+                    log.warning(
+                        f'Shm buffer can only hold {next_prepend_index} more rows..\n'
+                        f'Appending those from recent {ln}-sized frame, no more!'
+                    )
+
+                to_push = to_push[-next_prepend_index + 1:]
+                await shm_push_in_between(
+                    shm,
+                    to_push,
+                    prepend_index=next_prepend_index,
+                    update_start_on_prepend=update_start_on_prepend,
+                )
+                await sampler_stream.send({
+                    'broadcast_all': {
+                        'backfilling': True
+                    },
+                })
+
                 # can't push the entire frame? so
                 # push only the amount that can fit..
                 break
 
             log.info(
                 f'Shm pushed {ln} frame:\n'
-                f'{start_dt} -> {end_dt}'
+                f'{start_dt} -> {last_start_dt}'
             )
 
             # FINALLY, maybe write immediately to the tsdb backend for
@@ -460,11 +344,10 @@ async def start_backfill(
             if (
                 storage is not None
                 and write_tsdb
-                and False
             ):
                 log.info(
                     f'Writing {ln} frame to storage:\n'
-                    f'{start_dt} -> {end_dt}'
+                    f'{start_dt} -> {last_start_dt}'
                 )
 
                 if mkt.dst.atype not in {'crypto', 'crypto_currency'}:
@@ -477,11 +360,59 @@ async def start_backfill(
                 else:
                     col_sym_key: str = mkt.get_fqme(delim_char='')
 
+                # TODO: implement parquet append!?
                 await storage.write_ohlcv(
                     col_sym_key,
                     shm.array,
                     timeframe,
                 )
+        else:
+            # finally filled gap
+            log.info(
+                f'Finished filling gap to tsdb start @ {backfill_until_dt}!'
+            )
+            # conduct tsdb timestamp gap detection and backfill any
+            # seemingly missing portions!
+
+            from ._timeseries import detect_null_time_gap
+
+            indices: tuple | None = detect_null_time_gap(shm)
+            if indices:
+                (
+                    istart,
+                    start,
+                    end,
+                    iend,
+                ) = indices
+                (
+                    array,
+                    next_start_dt,
+                    next_end_dt,
+                ) = await get_hist(
+                    timeframe,
+                    start_dt=from_timestamp(start),
+                    end_dt=from_timestamp(end),
+                )
+                await shm_push_in_between(
+                    shm,
+                    array,
+                    prepend_index=iend,
+                    update_start_on_prepend=False,
+                )
+                await sampler_stream.send({
+                    'broadcast_all': {
+                        'backfilling': True
+                    },
+                })
+            indices: tuple | None = detect_null_time_gap(shm)
+            if indices:
+                (
+                    istart,
+                    start,
+                    end,
+                    iend,
+                ) = indices
+                await tractor.breakpoint()
 
         # TODO: can we only trigger this if the respective
         # history in "in view"?!?
@@ -496,43 +427,11 @@ async def start_backfill(
         bf_done.set()
 
 
-def push_tsdb_history_to_shm(
-    storemod: ModuleType,
-    shm: ShmArray,
-    tsdb_history: np.ndarray,
-    time_field_key: str,
-    prepend: bool = False,
-
-) -> datetime:
-
-    # TODO: see if there's faster multi-field reads:
-    # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields
-    # re-index  with a `time` and index field
-    prepend_start = shm._first.value
-
-    to_push = tsdb_history[-prepend_start:]
-    shm.push(
-        to_push,
-
-        # insert the history pre a "days worth" of samples
-        # to leave some real-time buffer space at the end.
-        prepend=prepend,
-        # update_first=False,
-        # start=prepend_start,
-        field_map=storemod.ohlc_key_map,
-    )
-
-    log.info(f'Loaded {to_push.shape} datums from storage')
-    tsdb_last_frame_start = tsdb_history[time_field_key][0]
-    return from_timestamp(tsdb_last_frame_start)
-
-
 async def back_load_from_tsdb(
     storemod: ModuleType,
     storage: StorageClient,
 
     fqme: str,
-    # dts_per_tf: dict[int, datetime],
 
     tsdb_history: np.ndarray,
 
@@ -631,13 +530,26 @@ async def back_load_from_tsdb(
         else:
             tsdb_last_frame_start = next_start
 
-        tsdb_last_frame_start: datetime = push_tsdb_history_to_shm(
-            storemod,
-            shm,
-            tsdb_history,
-            time_key,
+        # TODO: see if there's faster multi-field reads:
+        # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields
+        # re-index  with a `time` and index field
+        prepend_start = shm._first.value
+
+        to_push = tsdb_history[-prepend_start:]
+        shm.push(
+            to_push,
+
+            # insert the history pre a "days worth" of samples
+            # to leave some real-time buffer space at the end.
+            prepend=True,
+            # update_first=False,
+            # start=prepend_start,
+            field_map=storemod.ohlc_key_map,
         )
 
+        log.info(f'Loaded {to_push.shape} datums from storage')
+        tsdb_last_frame_start = tsdb_history[time_key][0]
+
         # manually trigger step update to update charts/fsps
         # which need an incremental update.
         # NOTE: the way this works is super duper
@@ -651,22 +563,18 @@ async def back_load_from_tsdb(
         #   graphics loop cycle.
         # await sampler_stream.send('broadcast_all')
 
-        # TODO: write new data to tsdb to be ready to for next read.
-
 
 async def tsdb_backfill(
     mod: ModuleType,
     storemod: ModuleType,
-    # bus: _FeedsBus,
     tn: trio.Nursery,
+
     storage: StorageClient,
     mkt: MktPair,
-    # shms: dict[int, ShmArray],
     shm: ShmArray,
     timeframe: float,
 
     sampler_stream: tractor.MsgStream,
-    # feed_is_live: trio.Event,
 
     task_status: TaskStatus[
         tuple[ShmArray, ShmArray]
@@ -674,22 +582,11 @@ async def tsdb_backfill(
 
 ) -> None:
 
-    # TODO: this should be used verbatim for the pure
-    # shm backfiller approach below.
-    # dts_per_tf: dict[int, datetime] = {}
-    fqme: str = mkt.fqme
-
-    # time_key: str = 'time'
-    # if getattr(storemod, 'ohlc_key_map', False):
-    #     keymap: bidict = storemod.ohlc_key_map
-        # time_key: str = keymap.inverse['time']
-
     get_hist: Callable[
         [int, datetime, datetime],
         tuple[np.ndarray, str]
     ]
     config: dict[str, int]
-
     async with mod.open_history_client(
         mkt,
     ) as (get_hist, config):
@@ -733,24 +630,18 @@ async def tsdb_backfill(
         shm.push(
             array,
             prepend=True,  # append on first frame
-            # start=
         )
         backfill_gap_from_shm_index: int = shm._first.value + 1
 
         # tell parent task to continue
         task_status.started()
 
-        # start history anal and load missing new data via backend.
-        # backfill_until_dt: datetime | None = None
-        # started_after_tsdb_load: bool = False
-
-        # for timeframe, shm in shms.items():
-
         # loads a (large) frame of data from the tsdb depending
         # on the db's query size limit; our "nativedb" (using
         # parquet) generally can load the entire history into mem
         # but if not then below the remaining history can be lazy
         # loaded?
+        fqme: str = mkt.fqme
         tsdb_entry: tuple | None =  await storage.load(
             fqme,
             timeframe=timeframe,
@@ -777,45 +668,36 @@ async def tsdb_backfill(
             # re-index  with a `time` and index field
             prepend_start = shm._first.value - offset_samples + 1
 
-            to_push = tsdb_history[-prepend_start:]
-            shm.push(
-                to_push,
-
-                # insert the history pre a "days worth" of samples
-                # to leave some real-time buffer space at the end.
-                prepend=True,
-                # update_first=False,
-                start=prepend_start,
-                field_map=storemod.ohlc_key_map,
-            )
-
-            log.info(f'Loaded {to_push.shape} datums from storage')
+            # tsdb history is so far in the past we can't fit it in
+            # shm buffer space so simply don't load it!
+            if prepend_start > 0:
+                to_push = tsdb_history[-prepend_start:]
+                shm.push(
+                    to_push,
 
-            # tsdb_last_frame_start: datetime = push_tsdb_history_to_shm(
-            #     storemod,
-            #     shm,
-            #     tsdb_history,
-            #     time_key,
-            #     prepend=True,
-            # )
-            # assert tsdb_last_frame_start == first_tsdb_dt
+                    # insert the history pre a "days worth" of samples
+                    # to leave some real-time buffer space at the end.
+                    prepend=True,
+                    # update_first=False,
+                    start=prepend_start,
+                    field_map=storemod.ohlc_key_map,
+                )
 
-            # unblock the feed bus management task
-            # assert len(shms[1].array)
-            # if not started_after_tsdb_load:
-            #     task_status.started()
-            #     started_after_tsdb_load = True
+                log.info(f'Loaded {to_push.shape} datums from storage')
 
-            # begin backfiller task ASAP
-            # try:
+        # TODO: maybe start history anal and load missing "history
+        # gaps" via backend..
 
+        if timeframe not in (1, 60):
+            raise ValueError(
+                '`piker` only needs to support 1m and 1s sampling '
+                'but ur api is trying to deliver a longer '
+                f'timeframe of {timeframe} seconds..\n'
+                'So yuh.. dun do dat brudder.'
+            )
         # if there is a gap to backfill from the first
         # history frame until the last datum loaded from the tsdb
         # continue that now in the background
-        # try:
-            # (
-                # latest_start_dt,
-                # latest_end_dt,
         bf_done = await tn.start(
             partial(
                 start_backfill,
@@ -827,46 +709,24 @@ async def tsdb_backfill(
 
                 backfill_from_shm_index=backfill_gap_from_shm_index,
                 backfill_from_dt=mr_start_dt,
-                backfill_until_dt=last_tsdb_dt,
-                sampler_stream=sampler_stream,
 
-                # feed_is_live,
+                sampler_stream=sampler_stream,
 
+                backfill_until_dt=last_tsdb_dt,
                 storage=storage,
-                # tsdb_is_up=True,
             )
         )
 
-                # if tsdb_entry:
-                #     dts_per_tf[timeframe] = (
-                #         tsdb_history,
-                #         last_tsdb_dt,
-                #         latest_start_dt,
-                #         latest_end_dt,
-                #         bf_done,
-                #     )
-                # elif not started_after_tsdb_load:
-                #     task_status.started()
-                #     started_after_tsdb_load = True
 
-        # XXX: timeframe not supported for backend (since
-        # above exception type), terminate immediately since
-        # there's no backfilling possible.
-        # except DataUnavailable:
-        #     return
-            # continue
-
-            # tsdb_history = series.get(timeframe)
-
-            # if len(hist_shm.array) < 2:
-            # TODO: there's an edge case here to solve where if the last
-            # frame before market close (at least on ib) was pushed and
-            # there was only "1 new" row pushed from the first backfill
-            # query-iteration, then the sample step sizing calcs will
-            # break upstream from here since you can't diff on at least
-            # 2 steps... probably should also add logic to compute from
-            # the tsdb series and stash that somewhere as meta data on
-            # the shm buffer?.. no se.
+        # if len(hist_shm.array) < 2:
+        # TODO: there's an edge case here to solve where if the last
+        # frame before market close (at least on ib) was pushed and
+        # there was only "1 new" row pushed from the first backfill
+        # query-iteration, then the sample step sizing calcs will
+        # break upstream from here since you can't diff on at least
+        # 2 steps... probably should also add logic to compute from
+        # the tsdb series and stash that somewhere as meta data on
+        # the shm buffer?.. no se.
 
         # backload any further data from tsdb (concurrently per
         # timeframe) if not all data was able to be loaded (in memory)
@@ -875,7 +735,6 @@ async def tsdb_backfill(
             await trio.sleep_forever()
         finally:
             return
-            # write_ohlcv
 
         # IF we need to continue backloading incrementall from the
         # tsdb client..
@@ -895,29 +754,6 @@ async def tsdb_backfill(
             timeframe,
             shm,
         )
-        # async with trio.open_nursery() as nurse:
-            # for timeframe, shm in shms.items():
-
-                # entry = dts_per_tf.get(timeframe)
-                # if not entry:
-                #     continue
-
-                # (
-                #     tsdb_history,
-                #     last_tsdb_dt,
-                #     latest_start_dt,
-                #     latest_end_dt,
-                #     bf_done,
-                # ) = entry
-
-                # if not tsdb_history.size:
-                #     continue
-
-
-        # try:
-        #     await trio.sleep_forever()
-        # finally:
-        #     write_ohlcv
 
 
 async def manage_history(
@@ -1079,7 +915,6 @@ async def manage_history(
                     timeframe,
 
                     sample_stream,
-                    # feed_is_live,
                 )
 
             # indicate to caller that feed can be delivered to

From fda711130554e08780d8ebf67154b497379c8ca8 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 8 Jun 2023 11:52:59 -0400
Subject: [PATCH 34/85] Import from new `.data._timeseries` mod for anal

---
 piker/storage/cli.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/piker/storage/cli.py b/piker/storage/cli.py
index b0ec881a6..f7deb92d9 100644
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@@ -150,8 +150,9 @@ async def main():
             assert first_dt < last_dt
 
             src_df = await client.as_df(fqme, period)
-            df = mod.with_dts(src_df)
-            gaps: pl.DataFrame = mod.detect_time_gaps(df)
+            from piker.data import _timeseries as tsmod
+            df = tsmod.with_dts(src_df)
+            gaps: pl.DataFrame = tsmod.detect_time_gaps(df)
 
             # TODO: something better with tab completion..
             # is there something more minimal but nearly as

From 34dd6ffc22d8f62ecce03582bf61d5cb07dae94d Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 8 Jun 2023 11:53:37 -0400
Subject: [PATCH 35/85] Add a configurable timeout around backend live feed
 startup

For now make it a larger value but ideally in the long run we can tune
it to specific backends and expose it in the config(s).
---
 piker/data/feed.py | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/piker/data/feed.py b/piker/data/feed.py
index 0cfdb8487..775e8fc67 100644
--- a/piker/data/feed.py
+++ b/piker/data/feed.py
@@ -226,6 +226,7 @@ async def allocate_persistent_feed(
 
     loglevel: str,
     start_stream: bool = True,
+    init_timeout: float = 616,
 
     task_status: TaskStatus[FeedInit] = trio.TASK_STATUS_IGNORED,
 
@@ -267,22 +268,23 @@ async def allocate_persistent_feed(
     # TODO: probably make a struct msg type for this as well
     # since eventually we do want to have more efficient IPC..
     first_quote: dict[str, Any]
-    (
-        init_msgs,
-        first_quote,
-    ) = await bus.nursery.start(
-        partial(
-            mod.stream_quotes,
-            send_chan=send,
-            feed_is_live=feed_is_live,
-
-            # NOTE / TODO: eventualy we may support providing more then
-            # one input here such that a datad daemon can multiplex
-            # multiple live feeds from one task, instead of getting
-            # a new request (and thus new task) for each subscription.
-            symbols=[symstr],
+    with trio.fail_after(init_timeout):
+        (
+            init_msgs,
+            first_quote,
+        ) = await bus.nursery.start(
+            partial(
+                mod.stream_quotes,
+                send_chan=send,
+                feed_is_live=feed_is_live,
+
+                # NOTE / TODO: eventualy we may support providing more then
+                # one input here such that a datad daemon can multiplex
+                # multiple live feeds from one task, instead of getting
+                # a new request (and thus new task) for each subscription.
+                symbols=[symstr],
+            )
         )
-    )
 
     # TODO: this is indexed by symbol for now since we've planned (for
     # some time) to expect backends to handle single
@@ -908,6 +910,7 @@ async def open_feed(
 
                 for fqme, flume_msg in flumes_msg_dict.items():
                     flume = Flume.from_msg(flume_msg)
+
                     # assert flume.mkt.fqme == fqme
                     feed.flumes[fqme] = flume
 

From a575e67fab810a0a8ca1494065a1f9c61080420c Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 8 Jun 2023 18:45:37 -0400
Subject: [PATCH 36/85] Go back to just opening sampler stream inside history
 update task?

---
 piker/ui/_display.py | 44 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/piker/ui/_display.py b/piker/ui/_display.py
index 7b1728153..775e6e08c 100644
--- a/piker/ui/_display.py
+++ b/piker/ui/_display.py
@@ -33,7 +33,6 @@
 import trio
 import pyqtgraph as pg
 # import pendulum
-
 from msgspec import field
 
 # from .. import brokers
@@ -208,6 +207,7 @@ class DisplayState(Struct):
 
 
 async def increment_history_view(
+    # min_istream: tractor.MsgStream,
     ds: DisplayState,
 ):
     hist_chart = ds.hist_chart
@@ -221,8 +221,16 @@ async def increment_history_view(
     #   wakeups/ctx switches verus logic checks (as normal)
     # - we need increment logic that only does the view shift
     #   call when the uppx permits/needs it
-    async with open_sample_stream(1.) as istream:
-        async for msg in istream:
+
+    async with open_sample_stream(1.) as min_istream:
+
+        # draw everything from scratch on first entry!
+        for curve_name, hist_viz in hist_chart._vizs.items():
+            log.info(f'FORCING CURVE REDRAW -> {curve_name}')
+            hist_viz.update_graphics(force_redraw=True)
+
+        async for msg in min_istream:
+            # print(f'SAMPLER MSG: {msg}')
 
             profiler = Profiler(
                 msg=f'History chart cycle for: `{ds.fqme}`',
@@ -232,6 +240,13 @@ async def increment_history_view(
                 # ms_threshold=4,
             )
 
+            if (
+                'backfilling' in msg
+            ):
+                # for curve_name, hist_viz in hist_chart._vizs.items():
+                print(f'FORCING REDRAW!! {hist_viz.name}')
+                hist_viz.update_graphics(force_redraw=True)
+
             # l3 = ds.viz.shm.array[-3:]
             # print(
             #     f'fast step for {ds.flume.mkt.fqme}:\n'
@@ -272,7 +287,7 @@ async def increment_history_view(
                 hist_chart.increment_view(datums=append_diff)
                 profiler('hist tread view')
 
-            profiler.finish()
+        profiler.finish()
 
 
 async def graphics_update_loop(
@@ -280,6 +295,8 @@ async def graphics_update_loop(
     nurse: trio.Nursery,
     godwidget: GodWidget,
     feed: Feed,
+    # min_istream: tractor.MsgStream,
+
     pis: dict[str, list[pgo.PlotItem, pgo.PlotItem]] = {},
     wap_in_history: bool = False,
     vlm_charts: dict[str, ChartPlotWidget] = {},
@@ -429,8 +446,10 @@ async def graphics_update_loop(
 
         nurse.start_soon(
             increment_history_view,
+            # min_istream,
             ds,
         )
+        await trio.sleep(0)
 
         if ds.hist_vars['i_last'] < ds.hist_vars['i_last_append']:
             breakpoint()
@@ -1214,12 +1233,15 @@ async def display_symbol_data(
     )
 
     feed: Feed
-    async with open_feed(
-        fqmes,
-        loglevel=loglevel,
-        tick_throttle=cycles_per_feed,
-
-    ) as feed:
+    async with (
+        # open_sample_stream(1.) as min_istream,
+        open_feed(
+            fqmes,
+            loglevel=loglevel,
+            tick_throttle=cycles_per_feed,
+
+        ) as feed,
+    ):
 
         # use expanded contract symbols passed back from feed layer.
         fqmes = list(feed.flumes.keys())
@@ -1491,6 +1513,8 @@ async def display_symbol_data(
                 ln,
                 godwidget,
                 feed,
+                # min_istream,
+
                 pis,
                 wap_in_history,
                 vlm_charts,

From c1201c164c6ed7dc8d9a659828c1c76783cf7809 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 8 Jun 2023 18:46:14 -0400
Subject: [PATCH 37/85] Parametrize index margin around gap detection segment

---
 piker/data/_timeseries.py | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/piker/data/_timeseries.py b/piker/data/_timeseries.py
index 81d380c7a..f43e0c73a 100644
--- a/piker/data/_timeseries.py
+++ b/piker/data/_timeseries.py
@@ -194,16 +194,33 @@ def slice_from_time(
     return read_slc
 
 
-def detect_null_time_gap(shm: ShmArray) -> tuple[float, float] | None:
-    # detect if there are any zero-epoch stamped rows
+def detect_null_time_gap(
+    shm: ShmArray,
+    imargin: int = 1,
+
+) -> tuple[float, float] | None:
+    '''
+    Detect if there are any zero-epoch stamped rows in
+    the presumed 'time' field-column.
+
+    Filter to the gap and return a surrounding index range.
+
+    NOTE: for now presumes only ONE gap XD
+
+    '''
     zero_pred: np.ndarray = shm.array['time'] == 0
     zero_t: np.ndarray = shm.array[zero_pred]
     if zero_t.size:
         istart, iend = zero_t['index'][[0, -1]]
         start, end = shm._array['time'][
-            [istart - 2, iend + 2]
+            [istart - imargin, iend + imargin]
         ]
-        return istart - 2, start, end, iend + 2
+        return (
+            istart - imargin,
+            start,
+            end,
+            iend + imargin,
+        )
 
     return None
 

From f8ab3bde3597d4b6b33c903d98a3a2141b608af6 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 8 Jun 2023 18:46:50 -0400
Subject: [PATCH 38/85] Allow sampler step events to overrun; only 1s period

---
 piker/data/_sampling.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/piker/data/_sampling.py b/piker/data/_sampling.py
index 98a7603ff..01c5a35e6 100644
--- a/piker/data/_sampling.py
+++ b/piker/data/_sampling.py
@@ -529,7 +529,9 @@ async def open_sample_stream(
             assert len(first) > 1
 
         async with (
-            ctx.open_stream() as istream,
+            ctx.open_stream(
+                allow_overruns=True,
+            ) as istream,
 
             # TODO: we DO need this task-bcasting so that
             # for eg. the history chart update loop eventually

From c1546eb043248952db84f56329707d7d937232fd Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 8 Jun 2023 18:47:22 -0400
Subject: [PATCH 39/85] Add note about appending parquet files on write

---
 piker/storage/nativedb.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/piker/storage/nativedb.py b/piker/storage/nativedb.py
index e96856d17..9561d4e9a 100644
--- a/piker/storage/nativedb.py
+++ b/piker/storage/nativedb.py
@@ -296,7 +296,11 @@ def _write_ohlcv(
         else:
             df = ohlcv
 
-        # TODO: use a proper profiler
+        # TODO: in terms of managing the ultra long term data
+        # - use a proper profiler to measure all this IO and
+        #   roundtripping!
+        # - try out ``fastparquet``'s append writing:
+        # https://fastparquet.readthedocs.io/en/latest/api.html#fastparquet.write
         start = time.time()
         df.write_parquet(path)
         delay: float = round(

From c8f8724887527a9ad3ad9420d31495c7ae1100c0 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 8 Jun 2023 18:50:15 -0400
Subject: [PATCH 40/85] Mask out all the duplicate frame detection

---
 piker/data/history.py | 75 +++++++++++++++++++++----------------------
 1 file changed, 37 insertions(+), 38 deletions(-)

diff --git a/piker/data/history.py b/piker/data/history.py
index eea6e83f4..fce81063d 100644
--- a/piker/data/history.py
+++ b/piker/data/history.py
@@ -19,9 +19,9 @@
 
 '''
 from __future__ import annotations
-from collections import (
-    Counter,
-)
+# from collections import (
+#     Counter,
+# )
 from datetime import datetime
 from functools import partial
 # import time
@@ -86,6 +86,7 @@ def diff_history(
     else:
         return array[times >= prepend_until_dt.timestamp()]
 
+
 async def shm_push_in_between(
     shm: ShmArray,
     to_push: np.ndarray,
@@ -191,7 +192,7 @@ async def start_backfill(
         # avoid duplicate history frames with a set of datetime frame
         # starts and associated counts of how many duplicates we see
         # per time stamp.
-        starts: Counter[datetime] = Counter()
+        # starts: Counter[datetime] = Counter()
 
         # conduct "backward history gap filling" where we push to
         # the shm buffer until we have history back until the
@@ -201,11 +202,6 @@ async def start_backfill(
 
         while last_start_dt > backfill_until_dt:
 
-            # if timeframe == 60:
-            #     await tractor.breakpoint()
-            # else:
-            #     return
-
             log.debug(
                 f'Requesting {timeframe}s frame ending in {last_start_dt}'
             )
@@ -242,6 +238,7 @@ async def start_backfill(
             #         f"{mkt.fqme}: skipping duplicate frame @ {next_start_dt}"
             #     )
             #     starts[start_dt] += 1
+            #     await tractor.breakpoint()
             #     continue
 
             # elif starts[next_start_dt] > 6:
@@ -250,13 +247,12 @@ async def start_backfill(
             #     )
             #     return
 
-            # only update new start point if not-yet-seen
-            start_dt: datetime = next_start_dt
-            starts[start_dt] += 1
+            # # only update new start point if not-yet-seen
+            # starts[next_start_dt] += 1
 
-            assert array['time'][0] == start_dt.timestamp()
+            assert array['time'][0] == next_start_dt.timestamp()
 
-            diff = last_start_dt - start_dt
+            diff = last_start_dt - next_start_dt
             frame_time_diff_s = diff.seconds
 
             # frame's worth of sample-period-steps, in seconds
@@ -279,11 +275,12 @@ async def start_backfill(
             )
             ln = len(to_push)
             if ln:
-                log.info(f'{ln} bars for {start_dt} -> {last_start_dt}')
+                log.info(f'{ln} bars for {next_start_dt} -> {last_start_dt}')
 
             else:
                 log.warning(
-                    f'{ln} BARS TO PUSH after diff?!: {start_dt} -> {last_start_dt}'
+                    '0 BARS TO PUSH after diff!?\n'
+                    f'{next_start_dt} -> {last_start_dt}'
                 )
 
             # bail gracefully on shm allocation overrun/full
@@ -308,7 +305,7 @@ async def start_backfill(
             except ValueError as ve:
                 _ve = ve
                 log.error(
-                    f'Shm buffer prepend OVERRUN on: {start_dt} -> {last_start_dt}?'
+                    f'Shm prepend OVERRUN on: {next_start_dt} -> {last_start_dt}?'
                 )
 
                 if next_prepend_index < ln:
@@ -336,7 +333,7 @@ async def start_backfill(
 
             log.info(
                 f'Shm pushed {ln} frame:\n'
-                f'{start_dt} -> {last_start_dt}'
+                f'{next_start_dt} -> {last_start_dt}'
             )
 
             # FINALLY, maybe write immediately to the tsdb backend for
@@ -347,7 +344,7 @@ async def start_backfill(
             ):
                 log.info(
                     f'Writing {ln} frame to storage:\n'
-                    f'{start_dt} -> {last_start_dt}'
+                    f'{next_start_dt} -> {last_start_dt}'
                 )
 
                 if mkt.dst.atype not in {'crypto', 'crypto_currency'}:
@@ -372,50 +369,52 @@ async def start_backfill(
                 f'Finished filling gap to tsdb start @ {backfill_until_dt}!'
             )
             # conduct tsdb timestamp gap detection and backfill any
-            # seemingly missing portions!
-
+            # seemingly missing sequence segments..
+            # TODO: ideally these never exist but somehow it seems
+            # sometimes we're writing zero-ed segments on certain
+            # (teardown) cases?
             from ._timeseries import detect_null_time_gap
 
-            indices: tuple | None = detect_null_time_gap(shm)
-            if indices:
+            gap_indices: tuple | None = detect_null_time_gap(shm)
+            while gap_indices:
                 (
                     istart,
                     start,
                     end,
                     iend,
-                ) = indices
+                ) = gap_indices
+
+                start_dt = from_timestamp(start)
+                end_dt = from_timestamp(end)
                 (
                     array,
                     next_start_dt,
                     next_end_dt,
                 ) = await get_hist(
                     timeframe,
-                    start_dt=from_timestamp(start),
-                    end_dt=from_timestamp(end),
+                    start_dt=start_dt,
+                    end_dt=end_dt,
                 )
+
                 await shm_push_in_between(
                     shm,
                     array,
                     prepend_index=iend,
                     update_start_on_prepend=False,
                 )
+
+                # TODO: UI side needs IPC event to update..
+                # - make sure the UI actually always handles
+                #  this update!
+                # - remember that in the display side, only refersh this
+                #   if the respective history is actually "in view".
+                #   loop
                 await sampler_stream.send({
                     'broadcast_all': {
                         'backfilling': True
                     },
                 })
-            indices: tuple | None = detect_null_time_gap(shm)
-            if indices:
-                (
-                    istart,
-                    start,
-                    end,
-                    iend,
-                ) = indices
-                await tractor.breakpoint()
-
-        # TODO: can we only trigger this if the respective
-        # history in "in view"?!?
+                gap_indices: tuple | None = detect_null_time_gap(shm)
 
         # XXX: extremely important, there can be no checkpoints
         # in the block above to avoid entering new ``frames``

From 75ff3921b6a0decaf7352d231c6ec98e44524caf Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 8 Jun 2023 19:34:34 -0400
Subject: [PATCH 41/85] ib: fix mega borked hist queries on gappy assets

Explains why stuff always seemed wrong before XD

Previously whenever a time-gappy asset (like a stock due to it's venue
operating hours) was being loaded, we weren't querying for a "durations
worth" of bars and this was causing all sorts of actual gaps in our
data set that shouldn't exist..

Fix that by always attempting to retrieve a min aggregate-time's
worth/duration of bars/datums in the history manager. Actually,
i implemented this in both the feed and api layers for this backend
since it doesn't seem to strictly work just implementing it at the
`Client.bars()` level, not sure why but..

Also, buncha `ruff` linting cleanups and fix the logger nameeee, lel.
---
 piker/brokers/ib/_util.py | 14 +++-------
 piker/brokers/ib/api.py   | 34 +++++++++++++++++++++---
 piker/brokers/ib/feed.py  | 54 +++++++++++++++++++++++++++------------
 3 files changed, 71 insertions(+), 31 deletions(-)

diff --git a/piker/brokers/ib/_util.py b/piker/brokers/ib/_util.py
index 585ea18dc..5e64ab0b9 100644
--- a/piker/brokers/ib/_util.py
+++ b/piker/brokers/ib/_util.py
@@ -21,22 +21,14 @@
 '''
 from __future__ import annotations
 from functools import partial
-from typing import (
-    Literal,
-    TYPE_CHECKING,
-)
+from typing import Literal
 import subprocess
 
 import tractor
 
-from .._util import log
-
-if TYPE_CHECKING:
-    from .api import (
-        MethodProxy,
-        ib_Client
-    )
+from .._util import get_logger
 
+log = get_logger('piker.brokers.ib')
 
 _reset_tech: Literal[
     'vnc',
diff --git a/piker/brokers/ib/api.py b/piker/brokers/ib/api.py
index a5069e95d..5d49b14e9 100644
--- a/piker/brokers/ib/api.py
+++ b/piker/brokers/ib/api.py
@@ -423,7 +423,7 @@ async def bars(
 
         # optional "duration of time" equal to the
         # length of the returned history frame.
-        duration: Optional[str] = None,
+        duration: str | None = None,
 
         **kwargs,
 
@@ -475,6 +475,8 @@ async def bars(
             # whatToShow='MIDPOINT',
             # whatToShow='TRADES',
         )
+
+        # tail case if no history for range or none prior.
         if not bars:
             # NOTE: there's 2 cases here to handle (and this should be
             # read alongside the implementation of
@@ -489,6 +491,32 @@ async def bars(
             # rewrite the method in the first case? right now there's no
             # way to detect a timeout.
 
+        # NOTE XXX: ensure minimum duration in bars B)
+        # => we recursively call this method until we get at least
+        # as many bars such that they sum in aggregate to the the
+        # desired total time (duration) at most.
+        elif (
+            end_dt
+            and (
+                (len(bars) * sample_period_s) < dt_duration.in_seconds()
+            )
+        ):
+            log.warning(
+                f'Recursing to get more bars from {end_dt} for {dt_duration}'
+            )
+            end_dt -= dt_duration
+            (
+                r_bars,
+                r_arr,
+                r_duration,
+            ) = await self.bars(
+                fqme,
+                start_dt=start_dt,
+                end_dt=end_dt,
+            )
+            r_bars.extend(bars)
+            bars = r_bars
+
         nparr = bars_to_np(bars)
         return bars, nparr, dt_duration
 
@@ -921,7 +949,7 @@ async def get_quote(
 
                 done, pending = await asyncio.wait(
                     [ready],
-                    timeout=0.1,
+                    timeout=0.01,
                 )
                 if ready in done:
                     break
@@ -1401,7 +1429,7 @@ async def open_client_proxies() -> tuple[
             # TODO: maybe this should be the default in tractor?
             key=tractor.current_actor().uid,
 
-        ) as (cache_hit, (clients, from_aio)),
+        ) as (cache_hit, (clients, _)),
 
         AsyncExitStack() as stack
     ):
diff --git a/piker/brokers/ib/feed.py b/piker/brokers/ib/feed.py
index b4edae17f..d855539a7 100644
--- a/piker/brokers/ib/feed.py
+++ b/piker/brokers/ib/feed.py
@@ -30,8 +30,8 @@
 from math import isnan
 import time
 from typing import (
+    Any,
     Callable,
-    Optional,
     Awaitable,
 )
 
@@ -180,8 +180,8 @@ async def open_history_client(
 
         async def get_hist(
             timeframe: float,
-            end_dt: Optional[datetime] = None,
-            start_dt: Optional[datetime] = None,
+            end_dt: datetime | None = None,
+            start_dt: datetime | None = None,
 
         ) -> tuple[np.ndarray, str]:
             nonlocal max_timeout, mean, count
@@ -192,6 +192,7 @@ async def get_hist(
                 fqme,
                 timeframe,
                 end_dt=end_dt,
+                start_dt=start_dt,
             )
             latency = time.time() - query_start
             if (
@@ -325,6 +326,7 @@ async def wait_on_data_reset(
 _data_resetter_task: trio.Task | None = None
 _failed_resets: int = 0
 
+
 async def get_bars(
 
     proxy: MethodProxy,
@@ -333,6 +335,7 @@ async def get_bars(
 
     # blank to start which tells ib to look up the latest datum
     end_dt: str = '',
+    start_dt: str | None = '',
 
     # TODO: make this more dynamic based on measured frame rx latency?
     # how long before we trigger a feed reset (seconds)
@@ -387,15 +390,31 @@ async def query():
 
                 bars, bars_array, dt_duration = out
 
+                # not enough bars signal, likely due to venue
+                # operational gaps.
+                too_little: bool = False
                 if (
-                    not bars
-                    and end_dt
-                ):
-                    log.warning(
-                        f'History is blank for {dt_duration} from {end_dt}'
+                    end_dt
+                    and (
+                        not bars
+                        or (too_little :=
+                            start_dt
+                            and (len(bars) * timeframe)
+                                < dt_duration.in_seconds()
+                        )
                     )
-                    end_dt -= dt_duration
-                    continue
+                ):
+                    if (
+                        end_dt
+                        or too_little
+                    ):
+                        log.warning(
+                            f'History is blank for {dt_duration} from {end_dt}'
+                        )
+                        end_dt -= dt_duration
+                        continue
+
+                    raise NoData(f'{end_dt}')
 
                 if bars_array is None:
                     raise SymbolNotFound(fqme)
@@ -544,6 +563,7 @@ async def query():
             await reset_done.wait()
 
     _data_resetter_task = None if unset_resetter else _data_resetter_task
+    assert result
     return result, data_cs is not None
 
 
@@ -602,13 +622,12 @@ async def _setup_quote_stream(
     '''
     global _quote_streams
 
-    to_trio.send_nowait(None)
-
     async with load_aio_clients(
         disconnect_on_exit=False,
     ) as accts2clients:
         caccount_name, client = get_preferred_data_client(accts2clients)
         contract = contract or (await client.find_contract(symbol))
+        to_trio.send_nowait(contract)  # cuz why not
         ticker: Ticker = client.ib.reqMktData(contract, ','.join(opts))
 
         # NOTE: it's batch-wise and slow af but I guess could
@@ -700,7 +719,9 @@ async def open_aio_quote_stream(
         symbol=symbol,
         contract=contract,
 
-    ) as (first, from_aio):
+    ) as (contract, from_aio):
+
+        assert contract
 
         # cache feed for later consumers
         _quote_streams[symbol] = from_aio
@@ -783,7 +804,6 @@ async def get_mkt_info(
     # bs_fqme, _, broker = fqme.partition('.')
 
     proxy: MethodProxy
-    get_details: bool = False
     if proxy is not None:
         client_ctx = nullcontext(proxy)
     else:
@@ -800,7 +820,6 @@ async def get_mkt_info(
             raise
 
     # TODO: more consistent field translation
-    init_info: dict = {}
     atype = _asset_type_map[con.secType]
 
     if atype == 'commodity':
@@ -912,7 +931,8 @@ async def stream_quotes(
         con: Contract = details.contract
         first_ticker: Ticker = await proxy.get_quote(contract=con)
         first_quote: dict = normalize(first_ticker)
-        log.runtime(f'FIRST QUOTE: {first_quote}')
+
+        log.warning(f'FIRST QUOTE: {first_quote}')
 
         # TODO: we should instead spawn a task that waits on a feed to start
         # and let it wait indefinitely..instead of this hard coded stuff.
@@ -1045,7 +1065,7 @@ async def open_symbol_search(
     await ctx.started({})
 
     async with (
-        open_client_proxies() as (proxies, clients),
+        open_client_proxies() as (proxies, _),
         open_data_client() as data_proxy,
     ):
         async with ctx.open_stream() as stream:

From 937d8c410df121be3fdeb0b6cfebb1640f8d7687 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 8 Jun 2023 19:54:24 -0400
Subject: [PATCH 42/85] binance: add futes API link, freeze the agg tradez
 struct

---
 piker/brokers/binance.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/piker/brokers/binance.py b/piker/brokers/binance.py
index 6242d0ba9..03840568c 100644
--- a/piker/brokers/binance.py
+++ b/piker/brokers/binance.py
@@ -55,7 +55,7 @@
     DataUnavailable,
 )
 from ._util import (
-    log,
+    get_logger,
     get_console_log,
 )
 from piker.data.types import Struct
@@ -67,6 +67,9 @@
 )
 
 
+log = get_logger(__name__)
+
+
 _url = 'https://api.binance.com'
 
 
@@ -351,7 +354,7 @@ async def get_client() -> Client:
 
 
 # validation type
-class AggTrade(Struct):
+class AggTrade(Struct, frozen=True):
     e: str  # Event type
     E: int  # Event time
     s: str  # Symbol
@@ -455,10 +458,16 @@ async def stream_messages(
 
 
 def make_sub(pairs: list[str], sub_name: str, uid: int) -> dict[str, str]:
-    """Create a request subscription packet dict.
+    '''
+    Create a request subscription packet dict.
 
-    https://binance-docs.github.io/apidocs/spot/en/#live-subscribing-unsubscribing-to-streams
-    """
+    - spot:
+      https://binance-docs.github.io/apidocs/spot/en/#live-subscribing-unsubscribing-to-streams
+
+    - futes:
+      https://binance-docs.github.io/apidocs/futures/en/#websocket-market-streams
+
+    '''
     return {
         'method': 'SUBSCRIBE',
         'params': [

From 5251561e20a8c7e96ee8631e26f2dbb6dee5999a Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 13 Jun 2023 15:23:46 -0400
Subject: [PATCH 43/85] TOCHERRY: into #486, add polars/apache deps for nix

---
 develop.nix | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/develop.nix b/develop.nix
index 30ae2f08e..205ea1143 100644
--- a/develop.nix
+++ b/develop.nix
@@ -6,12 +6,22 @@ stdenv.mkDerivation {
     # System requirements.
     readline
 
-    # Python requirements (enough to get a virtualenv going).
-    python310Full
+    # TODO: hacky non-poetry install stuff we need to get rid of!!
     virtualenv
     setuptools
-    pyqt5
     pip
+
+    # obviously, and see below for hacked linking
+    pyqt5
+
+    # Python requirements (enough to get a virtualenv going).
+    python310Full
+
+    # numerics deps
+    python310Packages.python-Levenshtein
+    python310Packages.fastparquet
+    python310Packages.polars
+
   ];
   src = null;
   shellHook = ''

From 0484e9738264b8960336fd3125e4455dc1568f3b Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Wed, 14 Jun 2023 14:29:53 -0400
Subject: [PATCH 44/85] Try to not overrun shm during gap backfilling..

---
 piker/data/history.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/piker/data/history.py b/piker/data/history.py
index fce81063d..b2cf6b680 100644
--- a/piker/data/history.py
+++ b/piker/data/history.py
@@ -396,9 +396,15 @@ async def start_backfill(
                     end_dt=end_dt,
                 )
 
+                # XXX TODO: pretty sure if i plot tsla, btcusdt.binance
+                # and mnq.cme.ib this causes a Qt crash XXDDD
+
+                # make sure we don't overrun the buffer start
+                len_to_push: int = min(iend, array.size)
+                to_push: np.ndarray = array[-len_to_push:]
                 await shm_push_in_between(
                     shm,
-                    array,
+                    to_push,
                     prepend_index=iend,
                     update_start_on_prepend=False,
                 )

From 89479322898a1d2940281504fce8125d1db937c5 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Wed, 14 Jun 2023 14:38:50 -0400
Subject: [PATCH 45/85] Use last 16 steps in period detection, not first 16..

---
 piker/ui/_dataviz.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/piker/ui/_dataviz.py b/piker/ui/_dataviz.py
index 242386fa0..9da45f448 100644
--- a/piker/ui/_dataviz.py
+++ b/piker/ui/_dataviz.py
@@ -371,8 +371,8 @@ def index_step(
         # the source data.
         if self._index_step is None:
 
-            index = self.shm.array[self.index_field]
-            isample = index[:16]
+            index: np.ndarray = self.shm.array[self.index_field]
+            isample: np.ndarray = index[-16:]
 
             mxdiff: None | float = None
             for step in np.diff(isample):

From 2a1835843fed8d0624212f75e2d791530d1bb0f2 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Wed, 14 Jun 2023 15:25:41 -0400
Subject: [PATCH 46/85] Drop `wap_in_history` stuff from display loop

It's no longer part of the default OHLCV array-buffer schema and just
generally we should be processing and managing **any** non source data
in the FSP subsystem(s) despite it maybe being provided as a default by
some backends.
---
 piker/ui/_display.py | 26 +-------------------------
 1 file changed, 1 insertion(+), 25 deletions(-)

diff --git a/piker/ui/_display.py b/piker/ui/_display.py
index 775e6e08c..367023b80 100644
--- a/piker/ui/_display.py
+++ b/piker/ui/_display.py
@@ -203,7 +203,6 @@ class DisplayState(Struct):
 
     vlm_chart: ChartPlotWidget | None = None
     vlm_sticky: YAxisLabel | None = None
-    wap_in_history: bool = False
 
 
 async def increment_history_view(
@@ -298,7 +297,6 @@ async def graphics_update_loop(
     # min_istream: tractor.MsgStream,
 
     pis: dict[str, list[pgo.PlotItem, pgo.PlotItem]] = {},
-    wap_in_history: bool = False,
     vlm_charts: dict[str, ChartPlotWidget] = {},
 
 ) -> None:
@@ -452,7 +450,7 @@ async def graphics_update_loop(
         await trio.sleep(0)
 
         if ds.hist_vars['i_last'] < ds.hist_vars['i_last_append']:
-            breakpoint()
+            await tractor.breakpoint()
 
     # main real-time quotes update loop
     stream: tractor.MsgStream
@@ -478,7 +476,6 @@ async def graphics_update_loop(
             for fqme, quote in quotes.items():
                 ds = dss[fqme]
                 ds.quotes = quote
-
                 rt_pi, hist_pi = pis[fqme]
 
                 # chart isn't active/shown so skip render cycle and
@@ -509,7 +506,6 @@ def graphics_update_cycle(
     ds: DisplayState,
     quote: dict,
 
-    wap_in_history: bool = False,
     trigger_all: bool = False,  # flag used by prepend history updates
     prepend_update_index: int | None = None,
 
@@ -674,10 +670,6 @@ def graphics_update_cycle(
             ds.last_price_sticky.update_from_data(*end_ic)
             ds.hist_last_price_sticky.update_from_data(*end_ic)
 
-            # update vwap overlay line
-            # if wap_in_history:
-            #     chart.get_viz('bar_wap').update_graphics()
-
             # update OHLC chart last bars
             # TODO: fix the only last uppx stuff....
             main_viz.draw_last()  # only_last_uppx=True)
@@ -1378,21 +1370,6 @@ async def display_symbol_data(
                 loglevel,
             )
 
-            # XXX: FOR SOME REASON THIS IS CAUSING HANGZ!?!
-            # plot historical vwap if available
-            wap_in_history = False
-            # if (
-            #     brokermod._show_wap_in_history
-            #     and 'bar_wap' in bars.dtype.fields
-            # ):
-            #     wap_in_history = True
-            #     rt_chart.draw_curve(
-            #         name='bar_wap',
-            #         shm=ohlcv,
-            #         color='default_light',
-            #         add_label=False,
-            #     )
-
             godwidget.resize_all()
             await trio.sleep(0)
 
@@ -1516,7 +1493,6 @@ async def display_symbol_data(
                 # min_istream,
 
                 pis,
-                wap_in_history,
                 vlm_charts,
             )
 

From dd3e4b5a1f95ce6c0ec139f61bd59efce5912caf Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 15 Jun 2023 11:43:58 -0400
Subject: [PATCH 47/85] Emit backfill details in broadcasts

Send both the `Viz.name` and `timeframe: int` so that the UI side can
match against them and only update a lone curve in a single plot.
---
 piker/data/history.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/piker/data/history.py b/piker/data/history.py
index b2cf6b680..d6f5fb5fd 100644
--- a/piker/data/history.py
+++ b/piker/data/history.py
@@ -294,7 +294,7 @@ async def start_backfill(
                 )
                 await sampler_stream.send({
                     'broadcast_all': {
-                        'backfilling': True
+                        'backfilling': (mkt.fqme, timeframe),
                     },
                 })
 
@@ -323,7 +323,7 @@ async def start_backfill(
                 )
                 await sampler_stream.send({
                     'broadcast_all': {
-                        'backfilling': True
+                        'backfilling': (mkt.fqme, timeframe),
                     },
                 })
 
@@ -417,7 +417,7 @@ async def start_backfill(
                 #   loop
                 await sampler_stream.send({
                     'broadcast_all': {
-                        'backfilling': True
+                        'backfilling': (mkt.fqme, timeframe),
                     },
                 })
                 gap_indices: tuple | None = detect_null_time_gap(shm)

From e1be098406d98618e645cc40a38223d0be4542b2 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 15 Jun 2023 12:06:58 -0400
Subject: [PATCH 48/85] Only hard re-render `Viz`s matching backfill deats

Avoid unnecessarily re-rendering the wrong (1min OHLC history) chart
and/or other such charts with update tasks listening to the sampler
stream. Instead only redraw in tasks which are updating vizs which match
the actual details of the backfill event.

We can probably also eventually match against a range tuple (emitted in
the msg) and then have the task further only update the formatter layer
unless the range is actually in view?
---
 piker/data/_sampling.py |  6 +++++-
 piker/ui/_display.py    | 47 ++++++++++++++++++++++++-----------------
 2 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/piker/data/_sampling.py b/piker/data/_sampling.py
index 01c5a35e6..b42d0d8f1 100644
--- a/piker/data/_sampling.py
+++ b/piker/data/_sampling.py
@@ -296,7 +296,11 @@ async def broadcast_all(
         self,
         info: dict | None = None,
     ) -> None:
-        for period_s in self.subscribers:
+
+        # NOTE: take a copy of subs since removals can happen
+        # during the broadcast checkpoint which can cause
+        # a `RuntimeError` on interation of the underlying `dict`.
+        for period_s in list(self.subscribers):
             await self.broadcast(
                 period_s,
                 info=info,
diff --git a/piker/ui/_display.py b/piker/ui/_display.py
index 367023b80..e3d06e1ee 100644
--- a/piker/ui/_display.py
+++ b/piker/ui/_display.py
@@ -209,9 +209,11 @@ async def increment_history_view(
     # min_istream: tractor.MsgStream,
     ds: DisplayState,
 ):
-    hist_chart = ds.hist_chart
-    hist_viz = ds.hist_viz
+    hist_chart: ChartPlotWidget = ds.hist_chart
+    hist_viz: Viz = ds.hist_viz
+    viz: Viz = ds.viz
     assert 'hist' in hist_viz.shm.token['shm_name']
+    name: str = hist_viz.name
 
     # TODO: seems this is more reliable at keeping the slow
     # chart incremented in view more correctly?
@@ -221,15 +223,13 @@ async def increment_history_view(
     # - we need increment logic that only does the view shift
     #   call when the uppx permits/needs it
 
-    async with open_sample_stream(1.) as min_istream:
-
-        # draw everything from scratch on first entry!
-        for curve_name, hist_viz in hist_chart._vizs.items():
-            log.info(f'FORCING CURVE REDRAW -> {curve_name}')
-            hist_viz.update_graphics(force_redraw=True)
+    # draw everything from scratch on first entry!
+    for curve_name, hist_viz in hist_chart._vizs.items():
+        log.info(f'Forcing hard redraw -> {curve_name}')
+        hist_viz.update_graphics(force_redraw=True)
 
+    async with open_sample_stream(1.) as min_istream:
         async for msg in min_istream:
-            # print(f'SAMPLER MSG: {msg}')
 
             profiler = Profiler(
                 msg=f'History chart cycle for: `{ds.fqme}`',
@@ -239,19 +239,28 @@ async def increment_history_view(
                 # ms_threshold=4,
             )
 
+            # NOTE: when a backfill msg is broadcast from the
+            # history mgmt layer, we match against the equivalent
+            # `Viz` and "hard re-render" (i.e. re-allocate the
+            # in-mem xy-array formats managed in
+            # `.data._formatters) its curve graphics to fill
+            # on-chart gaps.
+            # TODO: specifically emit/handle range tuples?
+            # - samplerd could emit the actual update range via
+            #   tuple and then we only enter the below block if that
+            #   range is detected as in-view?
             if (
-                'backfilling' in msg
+                (bf_wut := msg.get('backfilling', False))
             ):
-                # for curve_name, hist_viz in hist_chart._vizs.items():
-                print(f'FORCING REDRAW!! {hist_viz.name}')
-                hist_viz.update_graphics(force_redraw=True)
+                viz_name, timeframe = bf_wut
+                if viz_name == name:
+                    log.info(f'Forcing hard redraw -> {name}@{timeframe}')
+                    match timeframe:
+                        case 60:
+                            hist_viz.update_graphics(force_redraw=True)
+                        case 1:
+                            viz.update_graphics(force_redraw=True)
 
-            # l3 = ds.viz.shm.array[-3:]
-            # print(
-            #     f'fast step for {ds.flume.mkt.fqme}:\n'
-            #     f'{list(l3["time"])}\n'
-            #     f'{l3}\n'
-            # )
             # check if slow chart needs an x-domain shift and/or
             # y-range resize.
             (

From 33ec27715b5ef13503156eb7e879d29936537047 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 15 Jun 2023 12:59:50 -0400
Subject: [PATCH 49/85] Sync shm mod with dev version in `tractor`, drop buffer
 sizing vars, require `size: int` to all allocators

---
 piker/data/_sharedmem.py | 56 +++++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 24 deletions(-)

diff --git a/piker/data/_sharedmem.py b/piker/data/_sharedmem.py
index 3366621b2..78f66f634 100644
--- a/piker/data/_sharedmem.py
+++ b/piker/data/_sharedmem.py
@@ -37,15 +37,6 @@
 from .types import Struct
 
 
-# how  much is probably dependent on lifestyle
-_secs_in_day = int(60 * 60 * 24)
-# we try for a buncha times, but only on a run-every-other-day kinda week.
-_days_worth = 16
-_default_size = _days_worth * _secs_in_day
-# where to start the new data append index
-_rt_buffer_start = int((_days_worth - 1) * _secs_in_day)
-
-
 def cuckoff_mantracker():
     '''
     Disable all ``multiprocessing``` "resource tracking" machinery since
@@ -70,7 +61,6 @@ def ensure_running(self):
     mantracker._resource_tracker = ManTracker()
     mantracker.register = mantracker._resource_tracker.register
     mantracker.ensure_running = mantracker._resource_tracker.ensure_running
-    # ensure_running = mantracker._resource_tracker.ensure_running
     mantracker.unregister = mantracker._resource_tracker.unregister
     mantracker.getfd = mantracker._resource_tracker.getfd
 
@@ -442,10 +432,10 @@ def flush(self) -> None:
 
 
 def open_shm_array(
-
-    key: Optional[str] = None,
-    size: int = _default_size,  # see above
-    dtype: Optional[np.dtype] = None,
+    size: int,
+    key: str | None = None,
+    dtype: np.dtype | None = None,
+    append_start_index: int | None = None,
     readonly: bool = False,
 
 ) -> ShmArray:
@@ -510,10 +500,13 @@ def open_shm_array(
     # ``ShmArray._start.value: int = 0`` and the yet-to-be written
     # real-time section will start at ``ShmArray.index: int``.
 
-    # this sets the index to 3/4 of the length of the buffer
-    # leaving a "days worth of second samples" for the real-time
-    # section.
-    last.value = first.value = _rt_buffer_start
+    # this sets the index to nearly 2/3rds into the the length of
+    # the buffer leaving at least a "days worth of second samples"
+    # for the real-time section.
+    if append_start_index is None:
+        append_start_index = round(size * 0.616)
+
+    last.value = first.value = append_start_index
 
     shmarr = ShmArray(
         array,
@@ -527,7 +520,6 @@ def open_shm_array(
 
     # "unlink" created shm on process teardown by
     # pushing teardown calls onto actor context stack
-
     stack = tractor.current_actor().lifetime_stack
     stack.callback(shmarr.close)
     stack.callback(shmarr.destroy)
@@ -622,7 +614,10 @@ def attach_shm_array(
 
 def maybe_open_shm_array(
     key: str,
-    dtype: Optional[np.dtype] = None,
+    size: int,
+    dtype: np.dtype | None = None,
+    append_start_index: int | None = None,
+    readonly: bool = False,
     **kwargs,
 
 ) -> tuple[ShmArray, bool]:
@@ -643,11 +638,16 @@ def maybe_open_shm_array(
     use ``attach_shm_array``.
 
     '''
-    size = kwargs.pop('size', _default_size)
     try:
         # see if we already know this key
         token = _known_tokens[key]
-        return attach_shm_array(token=token, **kwargs), False
+        return (
+            attach_shm_array(
+                token=token,
+                readonly=readonly,
+            ),
+            False,
+        )
     except KeyError:
         log.debug(f"Could not find {key} in shms cache")
         if dtype:
@@ -666,8 +666,16 @@ def maybe_open_shm_array(
         # Attempt to open a block and expect
         # to fail if a block has been allocated
         # on the OS by someone else.
-        return open_shm_array(key=key, dtype=dtype, **kwargs), True
-
+        return (
+            open_shm_array(
+                key=key,
+                size=size,
+                dtype=dtype,
+                append_start_index=append_start_index,
+                readonly=readonly,
+            ),
+            True,
+        )
 
 def try_read(
     array: np.ndarray

From 9eeea51165d5c58e21dfb6c23db486909bc5d188 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 15 Jun 2023 13:04:21 -0400
Subject: [PATCH 50/85] Define shm buffer sizing in `.data.history`

Also adjust sizing such that the history buffer will backfill the last
six years by default (in 1m OHLC) and the hft buffer will do only 3 days
worth. Also ensure the fsp layer passes the src shm's buffer size when
allocating since the size is now required by allocators in the shm apis.
---
 piker/data/history.py | 27 +++++++++++++++++++++++++--
 piker/fsp/_api.py     |  6 ++++--
 piker/ui/_fsp.py      |  8 +++++++-
 3 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/piker/data/history.py b/piker/data/history.py
index d6f5fb5fd..99fd425df 100644
--- a/piker/data/history.py
+++ b/piker/data/history.py
@@ -49,7 +49,6 @@
 from ._sharedmem import (
     maybe_open_shm_array,
     ShmArray,
-    _secs_in_day,
 )
 from ._source import def_iohlcv_fields
 from ._sampling import (
@@ -65,6 +64,26 @@
     from .feed import _FeedsBus
 
 
+# `ShmArray` buffer sizing configuration:
+_mins_in_day = int(60 * 24)
+# how much is probably dependent on lifestyle
+# but we reco a buncha times (but only on a
+# run-every-other-day kinda week).
+_secs_in_day = int(60 * _mins_in_day)
+_days_in_week: int = 7
+
+_days_worth: int = 3
+_default_hist_size: int = 6 * 365 * _mins_in_day
+_hist_buffer_start = int(
+    _default_hist_size - round(7 * _mins_in_day)
+)
+
+_default_rt_size: int = _days_worth * _secs_in_day
+# NOTE: start the append index in rt buffer such that 1 day's worth
+# can be appenened before overrun.
+_rt_buffer_start = int((_days_worth - 1) * _secs_in_day)
+
+
 def diff_history(
     array: np.ndarray,
     append_until_dt: datetime | None = None,
@@ -812,6 +831,9 @@ async def manage_history(
     # (maybe) allocate shm array for this broker/symbol which will
     # be used for fast near-term history capture and processing.
     hist_shm, opened = maybe_open_shm_array(
+        size=_default_hist_size,
+        append_start_index=_hist_buffer_start,
+
         key=f'piker.{service}[{uuid[:16]}].{fqme}.hist',
 
         # use any broker defined ohlc dtype:
@@ -829,6 +851,8 @@ async def manage_history(
         )
 
     rt_shm, opened = maybe_open_shm_array(
+        size=_default_rt_size,
+        append_start_index=_rt_buffer_start,
         key=f'piker.{service}[{uuid[:16]}].{fqme}.rt',
 
         # use any broker defined ohlc dtype:
@@ -836,7 +860,6 @@ async def manage_history(
 
         # we expect the sub-actor to write
         readonly=False,
-        size=3*_secs_in_day,
     )
 
     # (for now) set the rt (hft) shm array with space to prepend
diff --git a/piker/fsp/_api.py b/piker/fsp/_api.py
index 11d1e7dca..92f8f2711 100644
--- a/piker/fsp/_api.py
+++ b/piker/fsp/_api.py
@@ -177,6 +177,7 @@ def fsp(
 def maybe_mk_fsp_shm(
     sym: str,
     target: Fsp,
+    size: int,
     readonly: bool = True,
 
 ) -> (str, ShmArray, bool):
@@ -185,7 +186,8 @@ def maybe_mk_fsp_shm(
     exists, otherwise load the shm already existing for that token.
 
     '''
-    assert isinstance(sym, str), '`sym` should be file-name-friendly `str`'
+    if not isinstance(sym, str):
+        raise ValueError('`sym: str` should be file-name-friendly')
 
     # TODO: load output types from `Fsp`
     # - should `index` be a required internal field?
@@ -204,7 +206,7 @@ def maybe_mk_fsp_shm(
 
     shm, opened = maybe_open_shm_array(
         key,
-        # TODO: create entry for each time frame
+        size=size,
         dtype=fsp_dtype,
         readonly=True,
     )
diff --git a/piker/ui/_fsp.py b/piker/ui/_fsp.py
index b4aa2b106..f00b1e3d8 100644
--- a/piker/ui/_fsp.py
+++ b/piker/ui/_fsp.py
@@ -377,7 +377,7 @@ def __init__(
         # TODO: make this a `.src_flume` and add
         # a `dst_flume`?
         # (=> but then wouldn't this be the most basic `Viz`?)
-        self.flume = flume
+        self.flume: Flume = flume
 
     def rr_next_portal(self) -> tractor.Portal:
         name, portal = next(self._rr_next_actor)
@@ -479,9 +479,15 @@ async def start_engine_task(
         fqme: str = src_mkt.get_fqme(delim_char='')
 
         # allocate an output shm array
+
+        # NOTE: rn we assume the HFT 1s period chart
+        # is always used!
+        src_shm: ShmArray = self.flume._rt_shm
+
         key, dst_shm, opened = maybe_mk_fsp_shm(
             fqme,
             target=target,
+            size=src_shm._token.size,
             readonly=True,
         )
 

From 58c096bfad2c299fba45b80c77973e85dc8a0082 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Mon, 19 Jun 2023 13:36:06 -0400
Subject: [PATCH 51/85] Bleh go back to using pdbp for REPL in anal

---
 piker/storage/cli.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/piker/storage/cli.py b/piker/storage/cli.py
index f7deb92d9..8cb394401 100644
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@@ -132,10 +132,16 @@ def anal(
 
 ) -> np.ndarray:
 
-    # import tractor
+    import tractor
 
     async def main():
-        async with open_storage_client() as (mod, client):
+        async with (
+            open_piker_runtime(
+                'tsdb_polars_anal',
+                # enable_modules=['piker.service._ahab']
+            ),
+            open_storage_client() as (mod, client),
+        ):
             syms: list[str] = await client.list_keys()
             print(f'{len(syms)} FOUND for {mod.name}')
 
@@ -154,14 +160,13 @@ async def main():
             df = tsmod.with_dts(src_df)
             gaps: pl.DataFrame = tsmod.detect_time_gaps(df)
 
+            if gaps:
+                print(f'Gaps found:\n{gaps}')
+
             # TODO: something better with tab completion..
             # is there something more minimal but nearly as
             # functional as ipython?
-            import code
-            code.interact(
-                f'df: {df}\ngaps: {gaps}\n',
-                local=locals()
-            )
+            await tractor.breakpoint()
 
     trio.run(main)
 

From d704d631bac3e7e13f1047d9c61e28fdc116ac92 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Mon, 19 Jun 2023 14:29:05 -0400
Subject: [PATCH 52/85] Add `store ldshm` subcmd

Changed from the old `store clone` to instead simply load any shm buffer
matching a user provided `FQME: str` pattern; writing to parquet file is
only done if an explicit option flag is passed by user.

Implement new `iter_dfs_from_shms()` generator which allows interatively
loading both 1m and 1s buffers delivering the `Path`, `ShmArray` and
`polars.DataFrame` instances per matching file B)

Also add a todo for a `NativeStorageClient.clear_range()` method.
---
 piker/storage/cli.py      | 226 ++++++++++++++++++++++++--------------
 piker/storage/nativedb.py |  27 ++++-
 2 files changed, 170 insertions(+), 83 deletions(-)

diff --git a/piker/storage/cli.py b/piker/storage/cli.py
index 8cb394401..73cf737ee 100644
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@@ -20,10 +20,13 @@
 """
 from __future__ import annotations
 from pathlib import Path
+import time
+from typing import Generator
 # from typing import TYPE_CHECKING
 
 import polars as pl
 import numpy as np
+import tractor
 # import pendulum
 from rich.console import Console
 import trio
@@ -32,6 +35,16 @@
 
 from piker.service import open_piker_runtime
 from piker.cli import cli
+from piker.config import get_conf_dir
+from piker.data import (
+    maybe_open_shm_array,
+    def_iohlcv_fields,
+    ShmArray,
+)
+from piker.data.history import (
+    _default_hist_size,
+    _default_rt_size,
+)
 from . import (
     log,
 )
@@ -132,8 +145,6 @@ def anal(
 
 ) -> np.ndarray:
 
-    import tractor
-
     async def main():
         async with (
             open_piker_runtime(
@@ -171,25 +182,90 @@ async def main():
     trio.run(main)
 
 
+def iter_dfs_from_shms(fqme: str) -> Generator[
+    tuple[Path, ShmArray, pl.DataFrame],
+    None,
+    None,
+]:
+    # shm buffer size table based on known sample rates
+    sizes: dict[str, int] = {
+        'hist': _default_hist_size,
+        'rt': _default_rt_size,
+    }
+
+    # load all detected shm buffer files which have the
+    # passed FQME pattern in the file name.
+    shmfiles: list[Path] = []
+    shmdir = Path('/dev/shm/')
+
+    for shmfile in shmdir.glob(f'*{fqme}*'):
+        filename: str = shmfile.name
+
+        # skip index files
+        if (
+            '_first' in filename
+            or '_last' in filename
+        ):
+            continue
+
+        assert shmfile.is_file()
+        log.debug(f'Found matching shm buffer file: {filename}')
+        shmfiles.append(shmfile)
+
+    for shmfile in shmfiles:
+
+        # lookup array buffer size based on file suffix
+        # being either .rt or .hist
+        size: int = sizes[shmfile.name.rsplit('.')[-1]]
+
+        # attach to any shm buffer, load array into polars df,
+        # write to local parquet file.
+        shm, opened = maybe_open_shm_array(
+            key=shmfile.name,
+            size=size,
+            dtype=def_iohlcv_fields,
+            readonly=True,
+        )
+        assert not opened
+        ohlcv = shm.array
+
+        start = time.time()
+
+        # XXX: thanks to this SO answer for this conversion tip:
+        # https://stackoverflow.com/a/72054819
+        df = pl.DataFrame({
+            field_name: ohlcv[field_name]
+            for field_name in ohlcv.dtype.fields
+        })
+        delay: float = round(
+            time.time() - start,
+            ndigits=6,
+        )
+        log.info(
+            f'numpy -> polars conversion took {delay} secs\n'
+            f'polars df: {df}'
+        )
+
+        yield (
+            shmfile,
+            shm,
+            df,
+        )
+
+
 @store.command()
-def clone(
+def ldshm(
     fqme: str,
+
+    write_parquet: bool = False,
+
 ) -> None:
-    import time
-    from piker.config import get_conf_dir
-    from piker.data import (
-        maybe_open_shm_array,
-        def_iohlcv_fields,
-    )
-    import polars as pl
-
-    # TODO: actually look up an existing shm buf (set) from
-    # an fqme and file name parsing..
-    # open existing shm buffer for kucoin backend
-    key: str = 'piker.brokerd[3595d316-3c15-46].xmrusdt.kucoin.hist'
-    shmpath: Path = Path('/dev/shm') / key
-    assert shmpath.is_file()
+    '''
+    Linux ONLY: load any fqme file name matching shm buffer from
+    /dev/shm/ into an OHLCV numpy array and polars DataFrame,
+    optionally write to .parquet file.
 
+    '''
     async def main():
         async with (
             open_piker_runtime(
@@ -197,73 +273,59 @@ async def main():
                 enable_modules=['piker.data._sharedmem'],
             ),
         ):
-            # attach to any shm buffer, load array into polars df,
-            # write to local parquet file.
-            shm, opened = maybe_open_shm_array(
-                key=key,
-                dtype=def_iohlcv_fields,
-            )
-            assert not opened
-            ohlcv = shm.array
-
-            start = time.time()
-
-            # XXX: thanks to this SO answer for this conversion tip:
-            # https://stackoverflow.com/a/72054819
-            df = pl.DataFrame({
-                field_name: ohlcv[field_name]
-                for field_name in ohlcv.dtype.fields
-            })
-            delay: float = round(
-                time.time() - start,
-                ndigits=6,
-            )
-            print(
-                f'numpy -> polars conversion took {delay} secs\n'
-                f'polars df: {df}'
-            )
 
-            # compute ohlc properties for naming
-            times: np.ndarray = ohlcv['time']
-            secs: float = times[-1] - times[-2]
-            if secs < 1.:
-                breakpoint()
-                raise ValueError(
-                    f'Something is wrong with time period for {shm}:\n{ohlcv}'
-                )
-
-            timeframe: str = f'{secs}s'
-
-            # write to parquet file
-            datadir: Path = get_conf_dir() / 'parqdb'
-            if not datadir.is_dir():
-                datadir.mkdir()
-
-            path: Path = datadir / f'{fqme}.{timeframe}.parquet'
-
-            # write to fs
-            start = time.time()
-            df.write_parquet(path)
-            delay: float = round(
-                time.time() - start,
-                ndigits=6,
-            )
-            print(
-                f'parquet write took {delay} secs\n'
-                f'file path: {path}'
-            )
+            df: pl.DataFrame | None = None
+            for shmfile, shm, df in iter_dfs_from_shms(fqme):
 
-            # read back from fs
-            start = time.time()
-            read_df: pl.DataFrame = pl.read_parquet(path)
-            delay: float = round(
-                time.time() - start,
-                ndigits=6,
-            )
-            print(
-                f'parquet read took {delay} secs\n'
-                f'polars df: {read_df}'
-            )
+                # compute ohlc properties for naming
+                times: np.ndarray = shm.array['time']
+                secs: float = times[-1] - times[-2]
+                if secs < 1.:
+                    breakpoint()
+                    raise ValueError(
+                        f'Something is wrong with time period for {shm}:\n{times}'
+                    )
+
+                # TODO: maybe only optionally enter this depending
+                # on some CLI flags and/or gap detection?
+                await tractor.breakpoint()
+
+                # write to parquet file?
+                if write_parquet:
+                    timeframe: str = f'{secs}s'
+
+                    datadir: Path = get_conf_dir() / 'nativedb'
+                    if not datadir.is_dir():
+                        datadir.mkdir()
+
+                    path: Path = datadir / f'{fqme}.{timeframe}.parquet'
+
+                    # write to fs
+                    start = time.time()
+                    df.write_parquet(path)
+                    delay: float = round(
+                        time.time() - start,
+                        ndigits=6,
+                    )
+                    log.info(
+                        f'parquet write took {delay} secs\n'
+                        f'file path: {path}'
+                    )
+
+                    # read back from fs
+                    start = time.time()
+                    read_df: pl.DataFrame = pl.read_parquet(path)
+                    delay: float = round(
+                        time.time() - start,
+                        ndigits=6,
+                    )
+                    print(
+                        f'parquet read took {delay} secs\n'
+                        f'polars df: {read_df}'
+                    )
+
+            if df is None:
+                log.error(f'No matching shm buffers for {fqme} ?')
 
     trio.run(main)
 
diff --git a/piker/storage/nativedb.py b/piker/storage/nativedb.py
index 9561d4e9a..ff914245d 100644
--- a/piker/storage/nativedb.py
+++ b/piker/storage/nativedb.py
@@ -137,6 +137,14 @@ def mk_ohlcv_shm_keyed_filepath(
     return path
 
 
+def unpack_fqme_from_parquet_filepath(path: Path) -> str:
+
+    filename: str = str(path.name)
+    fqme, fmt_descr, suffix = filename.split('.')
+    assert suffix == 'parquet'
+    return fqme
+
+
 ohlc_key_map = None
 
 
@@ -347,10 +355,27 @@ async def delete_ts(
             path.unlink()
             log.warning(f'Deleting parquet entry:\n{path}')
         else:
-            log.warning(f'No path exists:\n{path}')
+            log.error(f'No path exists:\n{path}')
 
         return path
 
+    # TODO: allow wiping and refetching a segment of the OHLCV timeseries
+    # data.
+    # def clear_range(
+    #     self,
+    #     key: str,
+    #     start_dt: datetime,
+    #     end_dt: datetime,
+    #     timeframe: int | None = None,
+    # ) -> pl.DataFrame:
+    #     '''
+    #     Clear and re-fetch a range of datums for the OHLCV time series.
+
+    #     Useful for series editing from a chart B)
+
+    #     '''
+    #     ...
+
 
 @acm
 async def get_client(

From cc3037149c87d07f95465326127c30b98bd90818 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Mon, 12 Jun 2023 19:51:55 -0400
Subject: [PATCH 53/85] Factor `brokerd` trade dialog init into acm

Connecting to a `brokerd` daemon's trading dialog via a helper `@acm`
func is handy so that arbitrary trading middleware clients **and** the
ems can setup a trading dialog and, at the least, query existing
position state; this is in fact our immediate need when simply querying
for an account's position status in the `.accounting.cli.ledger` cli.

It's now exposed (for now) as `.clearing._ems.open_brokerd_dialog()` and
is called by the `Router.maybe_open_brokerd_dialog()` for every new
relay allocation or paper-account engine instance.
---
 piker/clearing/__init__.py |   4 +
 piker/clearing/_ems.py     | 304 +++++++++++++++++++++++--------------
 2 files changed, 191 insertions(+), 117 deletions(-)

diff --git a/piker/clearing/__init__.py b/piker/clearing/__init__.py
index b2cc5fa7e..ec796ac9d 100644
--- a/piker/clearing/__init__.py
+++ b/piker/clearing/__init__.py
@@ -23,11 +23,15 @@
     open_ems,
     OrderClient,
 )
+from ._ems import (
+    open_brokerd_dialog,
+)
 
 
 __all__ = [
     'open_ems',
     'OrderClient',
+    'open_brokerd_dialog',
 
 ]
 
diff --git a/piker/clearing/_ems.py b/piker/clearing/_ems.py
index e41ddbf17..7abd4a61b 100644
--- a/piker/clearing/_ems.py
+++ b/piker/clearing/_ems.py
@@ -34,6 +34,7 @@
     Callable,
     Hashable,
     Optional,
+    TYPE_CHECKING,
 )
 
 from bidict import bidict
@@ -50,14 +51,8 @@
     unpack_fqme,
     float_digits,
 )
-from ..data.feed import (
-    Feed,
-    Flume,
-    maybe_open_feed,
-)
 from ..ui._notify import notify_from_ems_status_msg
 from ..data.types import Struct
-from . import _paper_engine as paper
 from ._messages import (
     Order,
     Status,
@@ -70,6 +65,12 @@
     BrokerdPosition,
 )
 
+if TYPE_CHECKING:
+    from ..data.feed import (
+        Feed,
+        Flume,
+    )
+
 
 # TODO: numba all of this
 def mk_check(
@@ -307,15 +308,175 @@ class TradesRelay(Struct):
 
     # map of symbols to dicts of accounts to pp msgs
     positions: dict[
-        # brokername, acctid
+        # brokername, acctid ->
         tuple[str, str],
-        list[BrokerdPosition],
+        # fqme -> msg
+        dict[str, BrokerdPosition],
     ]
 
     # allowed account names
     accounts: tuple[str]
 
 
+@acm
+async def open_brokerd_dialog(
+    brokermod: ModuleType,
+    portal: tractor.Portal,
+    exec_mode: str,
+    fqme: str | None = None,
+    loglevel: str | None = None,
+
+) -> tuple[
+    tractor.MsgStream,
+    # {(brokername, accountname) -> {fqme -> msg}}
+    dict[(str, str), dict[str, BrokerdPosition]],
+    list[str],
+]:
+    '''
+    Open either a live trades control dialog or a dialog with a new
+    paper engine instance depending on live trading support for the
+    broker backend, configuration, or client code usage.
+
+    '''
+    broker: str = brokermod.name
+
+    def mk_paper_ep():
+        from . import _paper_engine as paper_mod
+
+        nonlocal brokermod, exec_mode
+
+        # for logging purposes
+        brokermod = paper_mod
+
+        # for paper mode we need to mock this trades response feed
+        # so we load bidir stream to a new sub-actor running
+        # a paper-simulator clearing engine.
+
+        # load the paper trading engine
+        exec_mode = 'paper'
+        log.info(f'{broker}: Entering `paper` trading mode')
+
+        # load the paper trading engine as a subactor of this emsd
+        # actor to simulate the real IPC load it'll have when also
+        # pulling data from feeds
+        if not fqme:
+            log.warning(
+                f'Paper engine activate for {broker} but no fqme provided?'
+            )
+
+        return paper_mod.open_paperboi(
+            fqme=fqme,
+            broker=broker,
+            loglevel=loglevel,
+        )
+
+    # TODO: ideally choose only one of these ep names..
+    trades_endpoint: Callable
+    for ep_name in [
+        'trades_dialogue',
+        'open_trade_dialog',
+    ]:
+        trades_endpoint = getattr(
+            brokermod,
+            ep_name,
+            None,
+        )
+        break
+
+    if (
+        trades_endpoint is not None
+        or exec_mode != 'paper'
+    ):
+        # open live brokerd trades endpoint
+        open_trades_endpoint = portal.open_context(
+            trades_endpoint,
+            loglevel=loglevel,
+        )
+
+    else:
+        exec_mode: str = 'paper'
+
+    @acm
+    async def maybe_open_paper_ep():
+        if exec_mode == 'paper':
+            async with mk_paper_ep() as msg:
+                yield msg
+                return
+
+        # open trades-dialog endpoint with backend broker
+        async with open_trades_endpoint as msg:
+            ctx, first = msg
+
+            # runtime indication that the backend can't support live
+            # order ctrl yet, so boot the paperboi B0
+            if first == 'paper':
+                async with mk_paper_ep() as msg:
+                    yield msg
+                    return
+            else:
+                # working live ep case B)
+                yield msg
+                return
+
+    pps_by_broker_account: dict[(str, str), BrokerdPosition] = {}
+
+    async with (
+        maybe_open_paper_ep() as (
+            brokerd_ctx,
+            (position_msgs, accounts),
+        ),
+        brokerd_ctx.open_stream() as brokerd_trades_stream,
+    ):
+        # XXX: really we only want one stream per `emsd`
+        # actor to relay global `brokerd` order events
+        # unless we're going to expect each backend to
+        # relay only orders affiliated with a particular
+        # ``trades_dialogue()`` session (seems annoying
+        # for implementers). So, here we cache the relay
+        # task and instead of running multiple tasks
+        # (which will result in multiples of the same
+        # msg being relayed for each EMS client) we just
+        # register each client stream to this single
+        # relay loop in the dialog table.
+
+        # begin processing order events from the target
+        # brokerd backend by receiving order submission
+        # response messages, normalizing them to EMS
+        # messages and relaying back to the piker order
+        # client set.
+
+        # locally cache and track positions per account with
+        # a nested table of msgs:
+        #  tuple(brokername, acctid) ->
+        #      (fqme: str ->
+        #           `BrokerdPosition`)
+        for msg in position_msgs:
+
+            msg = BrokerdPosition(**msg)
+            log.info(
+                f'loading pp for {brokermod.__name__}:\n'
+                f'{pformat(msg.to_dict())}',
+            )
+
+            # TODO: state any mismatch here?
+            account: str = msg.account
+            assert account in accounts
+
+            pps_by_broker_account.setdefault(
+                (broker, account),
+                {},
+            )[msg.symbol] = msg
+
+        # should be unique entries, verdad!
+        assert len(set(accounts)) == len(accounts)
+
+        yield (
+            brokerd_trades_stream,
+            pps_by_broker_account,
+            accounts,
+        )
+
+
 class Router(Struct):
     '''
     Order router which manages and tracks per-broker dark book,
@@ -407,118 +568,25 @@ async def maybe_open_brokerd_dialog(
             yield relay
             return
 
-        def mk_paper_ep():
-            nonlocal brokermod, exec_mode
-
-            # for logging purposes
-            brokermod = paper
-
-            # for paper mode we need to mock this trades response feed
-            # so we load bidir stream to a new sub-actor running
-            # a paper-simulator clearing engine.
-
-            # load the paper trading engine
-            exec_mode = 'paper'
-            log.info(f'{broker}: Entering `paper` trading mode')
-
-            # load the paper trading engine as a subactor of this emsd
-            # actor to simulate the real IPC load it'll have when also
-            # pulling data from feeds
-            return paper.open_paperboi(
-                fqme=fqme,
-                loglevel=loglevel,
-            )
-
-        trades_endpoint = getattr(brokermod, 'trades_dialogue', None)
-        if (
-            trades_endpoint is not None
-            or exec_mode != 'paper'
-        ):
-            # open live brokerd trades endpoint
-            open_trades_endpoint = portal.open_context(
-                trades_endpoint,
-                loglevel=loglevel,
-            )
-
-        else:
-            exec_mode: str = 'paper'
-
-        @acm
-        async def maybe_open_paper_ep():
-            if exec_mode == 'paper':
-                async with mk_paper_ep() as msg:
-                    yield msg
-                    return
-
-            # open trades-dialog endpoint with backend broker
-            async with open_trades_endpoint as msg:
-                ctx, first = msg
-
-                # runtime indication that the backend can't support live
-                # order ctrl yet, so boot the paperboi B0
-                if first == 'paper':
-                    async with mk_paper_ep() as msg:
-                        yield msg
-                        return
-                else:
-                    # working live ep case B)
-                    yield msg
-                    return
-
-        positions: list[BrokerdPosition]
-        accounts: tuple[str]
-        async with (
-            maybe_open_paper_ep() as (
-                brokerd_ctx,
-                (positions, accounts),
-            ),
-            brokerd_ctx.open_stream() as brokerd_trades_stream,
+        async with open_brokerd_dialog(
+            brokermod=brokermod,
+            portal=portal,
+            exec_mode=exec_mode,
+            fqme=fqme,
+            loglevel=loglevel,
+
+        ) as (
+            brokerd_stream,
+            pp_msg_table,
+            accounts,
         ):
-            # XXX: really we only want one stream per `emsd`
-            # actor to relay global `brokerd` order events
-            # unless we're going to expect each backend to
-            # relay only orders affiliated with a particular
-            # ``trades_dialogue()`` session (seems annoying
-            # for implementers). So, here we cache the relay
-            # task and instead of running multiple tasks
-            # (which will result in multiples of the same
-            # msg being relayed for each EMS client) we just
-            # register each client stream to this single
-            # relay loop in the dialog table.
-
-            # begin processing order events from the target
-            # brokerd backend by receiving order submission
-            # response messages, normalizing them to EMS
-            # messages and relaying back to the piker order
-            # client set.
-
-            # locally cache and track positions per account with
-            # a nested table of msgs:
-            #  tuple(brokername, acctid) ->
-            #      (fqme: str ->
-            #           `BrokerdPosition`)
+            # create a new relay and sync it's state according
+            # to brokerd-backend reported position msgs.
             relay = TradesRelay(
-                brokerd_stream=brokerd_trades_stream,
-                positions={},
-                accounts=accounts,
+                brokerd_stream=brokerd_stream,
+                positions=pp_msg_table,
+                accounts=tuple(accounts),
             )
-            for msg in positions:
-
-                msg = BrokerdPosition(**msg)
-                log.info(
-                    f'loading pp for {brokermod.__name__}:\n'
-                    f'{pformat(msg.to_dict())}',
-                )
-
-                # TODO: state any mismatch here?
-                account = msg.account
-                assert account in accounts
-
-                relay.positions.setdefault(
-                    (broker, account),
-                    {},
-                )[msg.symbol] = msg
-
             self.relays[broker] = relay
 
             # this context should block here indefinitely until
@@ -550,6 +618,8 @@ async def open_trade_relays(
         indefinitely.
 
         '''
+        from ..data.feed import maybe_open_feed
+
         async with (
             maybe_open_feed(
                 [fqme],

From b15e736e3e40b45797843dc7249dba32de737607 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Mon, 12 Jun 2023 20:19:18 -0400
Subject: [PATCH 54/85] Change `piker symbol-info` -> `mkt-info`

As part of bringing the brokerd agnostic APIs up to date and modernizing
wrapping CLIs, this adds a new sub-cmd to allow more or less directly
calling the `.get_mkt_info()` broker mod endpoint and dumping the both
the backend specific `Pair`-ish and `.accounting.MktPair` normalized
version to console.

Deatz:
- make the click config's `brokermods` entry a `dict`
- make `.brokers.core.mkt_info()` strip the broker name part from the
  input fqme before calling the backend.
---
 piker/brokers/cli.py  | 77 ++++++++++++++++++++++++++++++++++---------
 piker/brokers/core.py | 19 +++++++----
 piker/cli/__init__.py |  6 ++--
 3 files changed, 77 insertions(+), 25 deletions(-)

diff --git a/piker/brokers/cli.py b/piker/brokers/cli.py
index 1bfb05d67..5f4543858 100644
--- a/piker/brokers/cli.py
+++ b/piker/brokers/cli.py
@@ -21,6 +21,7 @@
 from functools import partial
 from operator import attrgetter
 from operator import itemgetter
+from types import ModuleType
 
 import click
 import trio
@@ -241,7 +242,7 @@ def quote(config, tickers):
 
     '''
     # global opts
-    brokermod = config['brokermods'][0]
+    brokermod = list(config['brokermods'].values())[0]
 
     quotes = trio.run(partial(core.stocks_quote, brokermod, tickers))
     if not quotes:
@@ -268,7 +269,7 @@ def bars(config, symbol, count):
 
     '''
     # global opts
-    brokermod = config['brokermods'][0]
+    brokermod = list(config['brokermods'].values())[0]
 
     # broker backend should return at the least a
     # list of candle dictionaries
@@ -303,7 +304,7 @@ def record(config, rate, name, dhost, filename):
 
     '''
     # global opts
-    brokermod = config['brokermods'][0]
+    brokermod = list(config['brokermods'].values())[0]
     loglevel = config['loglevel']
     log = config['log']
 
@@ -368,7 +369,7 @@ def optsquote(config, symbol, date):
 
     '''
     # global opts
-    brokermod = config['brokermods'][0]
+    brokermod = list(config['brokermods'].values())[0]
 
     quotes = trio.run(
         partial(
@@ -385,26 +386,70 @@ def optsquote(config, symbol, date):
 @cli.command()
 @click.argument('tickers', nargs=-1, required=True)
 @click.pass_obj
-def symbol_info(config, tickers):
+def mkt_info(
+    config: dict,
+    tickers: list[str],
+):
     '''
     Print symbol quotes to the console
 
     '''
-    # global opts
-    brokermod = config['brokermods'][0]
+    from msgspec.json import encode, decode
+    from ..accounting import MktPair
+    from ..service import (
+        open_piker_runtime,
+    )
 
-    quotes = trio.run(partial(core.symbol_info, brokermod, tickers))
-    if not quotes:
-        log.error(f"No quotes could be found for {tickers}?")
+    # global opts
+    brokermods: dict[str, ModuleType] = config['brokermods']
+
+    mkts: list[MktPair] = []
+    async def main():
+
+        async with open_piker_runtime(
+            name='mkt_info_query',
+            # loglevel=loglevel,
+            # debug_mode=True,
+
+        ) as (_, _):
+            for fqme in tickers:
+                bs_fqme, _, broker = fqme.partition('.')
+                brokermod: ModuleType = brokermods[broker]
+                mkt, bs_pair = await core.mkt_info(
+                    brokermod,
+                    bs_fqme,
+                )
+                mkts.append((mkt, bs_pair))
+
+    trio.run(main)
+
+    if not mkts:
+        log.error(
+            f'No market info could be found for {tickers}'
+        )
         return
 
-    if len(quotes) < len(tickers):
-        syms = tuple(map(itemgetter('symbol'), quotes))
+    if len(mkts) < len(tickers):
+        syms = tuple(map(itemgetter('fqme'), mkts))
         for ticker in tickers:
             if ticker not in syms:
-                brokermod.log.warn(f"Could not find symbol {ticker}?")
-
-    click.echo(colorize_json(quotes))
+                log.warn(f"Could not find symbol {ticker}?")
+
+
+    # TODO: use ``rich.Table`` intead here!
+    for mkt, bs_pair in mkts:
+        click.echo(
+            '\n'
+            '----------------------------------------------------\n'
+            f'{type(bs_pair)}\n'
+            '----------------------------------------------------\n'
+            f'{colorize_json(bs_pair.to_dict())}\n'
+            '----------------------------------------------------\n'
+            f'as piker `MktPair` with fqme: {mkt.fqme}\n'
+            '----------------------------------------------------\n'
+            # NOTE: roundtrip to json codec for console print
+            f'{colorize_json(decode(encode(mkt)))}'
+        )
 
 
 @cli.command()
@@ -416,7 +461,7 @@ def search(config, pattern):
 
     '''
     # global opts
-    brokermods = config['brokermods']
+    brokermods = list(config['brokermods'].values())
 
     # define tractor entrypoint
     async def main(func):
diff --git a/piker/brokers/core.py b/piker/brokers/core.py
index b3651c1d4..f4d670dc7 100644
--- a/piker/brokers/core.py
+++ b/piker/brokers/core.py
@@ -30,6 +30,7 @@
 from . import get_brokermod
 from ..service import maybe_spawn_brokerd
 from .._cacheables import open_cached_client
+from ..accounting import MktPair
 
 
 async def api(brokername: str, methname: str, **kwargs) -> dict:
@@ -116,15 +117,19 @@ async def bars(
         return await client.bars(symbol, **kwargs)
 
 
-async def symbol_info(
+async def mkt_info(
     brokermod: ModuleType,
-    symbol: str,
+    fqme: str,
     **kwargs,
-) -> Dict[str, Dict[str, Dict[str, Any]]]:
-    """Return symbol info from broker.
-    """
-    async with brokermod.get_client() as client:
-        return await client.symbol_info(symbol, **kwargs)
+
+) -> MktPair:
+    '''
+    Return MktPair info from broker including src and dst assets.
+
+    '''
+    return await brokermod.get_mkt_info(
+        fqme.replace(brokermod.name, '')
+    )
 
 
 async def search_w_brokerd(name: str, pattern: str) -> dict:
diff --git a/piker/cli/__init__.py b/piker/cli/__init__.py
index a51fab3a2..a812555e7 100644
--- a/piker/cli/__init__.py
+++ b/piker/cli/__init__.py
@@ -20,6 +20,7 @@
 '''
 import os
 from contextlib import AsyncExitStack
+from types import ModuleType
 
 import click
 import trio
@@ -100,7 +101,6 @@ async def main():
                 registry_addr=reg_addr,
 
             ) as service_mngr,  # normally delivers a ``Services`` handle
-            trio.open_nursery() as n,
 
             AsyncExitStack() as stack,
         ):
@@ -163,7 +163,9 @@ def cli(
         from piker.brokers import __brokers__
         brokers = __brokers__
 
-    brokermods = [get_brokermod(broker) for broker in brokers]
+    brokermods: dict[str, ModuleType] = {
+        broker: get_brokermod(broker) for broker in brokers
+    }
     assert brokermods
 
     reg_addr: None | tuple[str, int] = None

From bb02775cabee7a6a9c424500e567160bb33777eb Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Mon, 12 Jun 2023 20:23:01 -0400
Subject: [PATCH 55/85] Change `ledger` CLI to use new `open_brokerd_dialog()`

Instead of effectively (and poorly) duplicating the trade dialog setup
logic, just use the new helper we exposed in the EMS module B)
Also, handle paper accounts that have no ledger / positions existing.
---
 piker/accounting/cli.py | 204 ++++++++++++++++++++++------------------
 1 file changed, 111 insertions(+), 93 deletions(-)

diff --git a/piker/accounting/cli.py b/piker/accounting/cli.py
index ee91d1b32..c655d8b5d 100644
--- a/piker/accounting/cli.py
+++ b/piker/accounting/cli.py
@@ -19,8 +19,9 @@
 
 '''
 from typing import (
-    Any,
+    AsyncContextManager,
 )
+from types import ModuleType
 
 from rich.console import Console
 from rich.markdown import Markdown
@@ -45,7 +46,11 @@ def broker_init(
 
     **start_actor_kwargs,
 
-) -> dict:
+) -> tuple[
+    ModuleType,
+    dict,
+    AsyncContextManager,
+]:
     '''
     Given an input broker name, load all named arguments
     which can be passed to a daemon + context spawn for
@@ -83,13 +88,9 @@ def broker_init(
     from ..brokers._daemon import _setup_persistent_brokerd
 
     return (
+        brokermod,
         start_actor_kwargs,  # to `ActorNursery.start_actor()`
-        _setup_persistent_brokerd,  # service task ep
-        getattr(  # trades endpoint
-            brokermod,
-            'trades_dialogue',
-            None,
-        ),
+        _setup_persistent_brokerd,  # deamon service task ep
     )
 
 
@@ -119,10 +120,11 @@ def sync(
         console.print(md)
         return
 
-    start_kwargs, _, trades_ep = broker_init(
+    brokermod, start_kwargs, deamon_ep = broker_init(
         brokername,
         loglevel=loglevel,
     )
+    brokername: str = brokermod.name
 
     async def main():
 
@@ -136,96 +138,112 @@ async def main():
 
             tractor.open_nursery() as an,
         ):
-            log.info(
-                f'Piker runtime up as {actor.uid}@{sockaddr}'
-            )
-
-            portal = await an.start_actor(
-                loglevel=loglevel,
-                debug_mode=pdb,
-                **start_kwargs,
-            )
-
-            if (
-                brokername == 'paper'
-                or trades_ep is None
-            ):
-                from ..clearing import _paper_engine as paper
-                open_trades_endpoint = paper.open_paperboi(
-                    fqme=None,  # tell paper to not start clearing loop
-                    broker=brokername,
-                    loglevel=loglevel,
+            try:
+                log.info(
+                    f'Piker runtime up as {actor.uid}@{sockaddr}'
                 )
-            else:
-                # open live brokerd trades endpoint
-                open_trades_endpoint = portal.open_context(
-                    trades_ep,
+
+                portal = await an.start_actor(
                     loglevel=loglevel,
+                    debug_mode=pdb,
+                    **start_kwargs,
                 )
 
-            positions: dict[str, Any]
-            accounts: list[str]
-            async with (
-                open_trades_endpoint as (
-                    brokerd_ctx,
-                    (positions, accounts),
-                ),
-            ):
-                assert len(accounts) == 1
-                summary: str = (
-                    '[dim underline]Piker Position Summary[/] '
-                    f'[dim blue underline]{brokername}[/]'
-                    '[dim].[/]'
-                    f'[blue underline]{account}[/]'
-                    f'[dim underline] -> total pps: [/]'
-                    f'[green]{len(positions)}[/]\n'
+                from ..clearing import (
+                    open_brokerd_dialog,
                 )
-                for ppdict in positions:
-                    ppmsg = BrokerdPosition(**ppdict)
-                    size = ppmsg.size
-                    if size:
-                        ppu: float = round(
-                            ppmsg.avg_price,
-                            ndigits=2,
-                        )
-                        cost_basis: str = humanize(size * ppu)
-                        h_size: str = humanize(size)
-
-                        if size < 0:
-                            pcolor = 'red'
-                        else:
-                            pcolor = 'green'
-
-                        # sematic-highlight of fqme
-                        fqme = ppmsg.symbol
-                        tokens = fqme.split('.')
-                        styled_fqme = f'[blue underline]{tokens[0]}[/]'
-                        for tok in tokens[1:]:
-                            styled_fqme += '[dim].[/]'
-                            styled_fqme += f'[dim blue underline]{tok}[/]'
-
-                        # TODO: instead display in a ``rich.Table``?
-                        summary += (
-                            styled_fqme +
-                            '[dim]: [/]'
-                            f'[{pcolor}]{h_size}[/]'
-                            '[dim blue]u @[/]'
-                            f'[{pcolor}]{ppu}[/]'
-                            '[dim blue] = [/]'
-                            f'[{pcolor}]$ {cost_basis}\n[/]'
+                brokerd_stream: tractor.MsgStream
+
+                async with open_brokerd_dialog(
+                    brokermod,
+                    portal,
+                    exec_mode=(
+                        'paper' if account == 'paper'
+                        else 'live'
+                    ),
+                    loglevel=loglevel,
+                ) as (
+                    brokerd_stream,
+                    pp_msg_table,
+                    accounts,
+                ):
+                    try:
+                        assert len(accounts) == 1
+                        if (
+                            not pp_msg_table
+                            and account == 'paper'
+                        ):
+                            console.print(
+                                '[yellow underline]'
+                                f'No pps found for `{brokername}.paper` account!\n'
+                                'Do you even have any paper ledger files?'
+                            )
+                            return
+
+                        pps_by_symbol: dict[str, BrokerdPosition] = pp_msg_table[
+                            brokername,
+                            account,
+                        ]
+
+                        summary: str = (
+                            '[dim underline]Piker Position Summary[/] '
+                            f'[dim blue underline]{brokername}[/]'
+                            '[dim].[/]'
+                            f'[blue underline]{account}[/]'
+                            f'[dim underline] -> total pps: [/]'
+                            f'[green]{len(pps_by_symbol)}[/]\n'
                         )
-
-                console.print(summary)
-
-                # exit via ctx cancellation.
-                await brokerd_ctx.cancel(timeout=1)
-                # TODO: once ported to newer tractor branch we should
-                # be able to do a loop like this:
-                # while brokerd_ctx.cancel_called_remote is None:
-                #     await trio.sleep(0.01)
-                #     await brokerd_ctx.cancel()
-
-            await portal.cancel_actor()
+                        # for ppdict in positions:
+                        for fqme, ppmsg in pps_by_symbol.items():
+                            # ppmsg = BrokerdPosition(**ppdict)
+                            size = ppmsg.size
+                            if size:
+                                ppu: float = round(
+                                    ppmsg.avg_price,
+                                    ndigits=2,
+                                )
+                                cost_basis: str = humanize(size * ppu)
+                                h_size: str = humanize(size)
+
+                                if size < 0:
+                                    pcolor = 'red'
+                                else:
+                                    pcolor = 'green'
+
+                                # sematic-highlight of fqme
+                                fqme = ppmsg.symbol
+                                tokens = fqme.split('.')
+                                styled_fqme = f'[blue underline]{tokens[0]}[/]'
+                                for tok in tokens[1:]:
+                                    styled_fqme += '[dim].[/]'
+                                    styled_fqme += f'[dim blue underline]{tok}[/]'
+
+                                # TODO: instead display in a ``rich.Table``?
+                                summary += (
+                                    styled_fqme +
+                                    '[dim]: [/]'
+                                    f'[{pcolor}]{h_size}[/]'
+                                    '[dim blue]u @[/]'
+                                    f'[{pcolor}]{ppu}[/]'
+                                    '[dim blue] = [/]'
+                                    f'[{pcolor}]$ {cost_basis}\n[/]'
+                                )
+
+                        console.print(summary)
+
+                    finally:
+                        # exit via ctx cancellation.
+                        brokerd_ctx: tractor.Context = brokerd_stream._ctx
+                        await brokerd_ctx.cancel(timeout=1)
+
+                    # TODO: once ported to newer tractor branch we should
+                    # be able to do a loop like this:
+                    # while brokerd_ctx.cancel_called_remote is None:
+                    #     await trio.sleep(0.01)
+                    #     await brokerd_ctx.cancel()
+
+            finally:
+                await portal.cancel_actor()
 
     trio.run(main)
 

From ebbfa7f48d2098832ef38b4bebaef3138b7928a1 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Mon, 12 Jun 2023 21:58:19 -0400
Subject: [PATCH 56/85] Passthrough kwargs to `open_cached_client()`

---
 piker/_cacheables.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/piker/_cacheables.py b/piker/_cacheables.py
index 9be4d079e..279f31884 100644
--- a/piker/_cacheables.py
+++ b/piker/_cacheables.py
@@ -110,6 +110,8 @@ async def decorated(
 @acm
 async def open_cached_client(
     brokername: str,
+    **kwargs,
+
 ) -> 'Client':  # noqa
     '''
     Get a cached broker client from the current actor's local vars.
@@ -120,5 +122,11 @@ async def open_cached_client(
     brokermod = get_brokermod(brokername)
     async with maybe_open_context(
         acm_func=brokermod.get_client,
+        kwargs=kwargs,
+
     ) as (cache_hit, client):
+
+        if cache_hit:
+            log.info(f'Reusing existing {client}')
+
         yield client

From 90810dcffd110400d800e03f794329a2ec81a625 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 13 Jun 2023 09:17:00 -0400
Subject: [PATCH 57/85] Right partition the fqme to remove broker part in
 mkt-info cli

---
 piker/brokers/cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/piker/brokers/cli.py b/piker/brokers/cli.py
index 5f4543858..5ebca3e71 100644
--- a/piker/brokers/cli.py
+++ b/piker/brokers/cli.py
@@ -409,11 +409,11 @@ async def main():
         async with open_piker_runtime(
             name='mkt_info_query',
             # loglevel=loglevel,
-            # debug_mode=True,
+            debug_mode=True,
 
         ) as (_, _):
             for fqme in tickers:
-                bs_fqme, _, broker = fqme.partition('.')
+                bs_fqme, _, broker = fqme.rpartition('.')
                 brokermod: ModuleType = brokermods[broker]
                 mkt, bs_pair = await core.mkt_info(
                     brokermod,

From c0552fa3527d412ce799ae634fe0c2a6676d622b Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 13 Jun 2023 09:20:15 -0400
Subject: [PATCH 58/85] Just use brokermods dict directly in chart entrypoint
 now

---
 piker/ui/_app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/piker/ui/_app.py b/piker/ui/_app.py
index ee4faf570..644e7567d 100644
--- a/piker/ui/_app.py
+++ b/piker/ui/_app.py
@@ -185,7 +185,7 @@ def _main(
         func=_async_main,
         args=(
             syms,
-            {mod.name: mod for mod in brokermods},
+            brokermods,
             piker_loglevel,
         ),
         main_widget_type=GodWidget,

From 921e18728c578e119285d0ccaed2453cfcca7675 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 13 Jun 2023 15:22:51 -0400
Subject: [PATCH 59/85] Move `._cacheables.open_cached_client()` into
 `.brokers` pkg mod

---
 piker/_cacheables.py          | 33 -----------------------
 piker/brokers/__init__.py     | 51 ++++++++++++++++++++++++++++++++++-
 piker/brokers/core.py         |  2 +-
 piker/brokers/deribit/feed.py |  2 +-
 piker/brokers/kraken/feed.py  |  4 ++-
 piker/brokers/kucoin.py       |  4 ++-
 piker/brokers/questrade.py    |  3 ++-
 piker/ui/_fsp.py              |  4 +--
 8 files changed, 62 insertions(+), 41 deletions(-)

diff --git a/piker/_cacheables.py b/piker/_cacheables.py
index 279f31884..434573648 100644
--- a/piker/_cacheables.py
+++ b/piker/_cacheables.py
@@ -20,9 +20,6 @@
 '''
 
 from collections import OrderedDict
-from contextlib import (
-    asynccontextmanager as acm,
-)
 from typing import (
     Awaitable,
     Callable,
@@ -30,12 +27,8 @@
     TypeVar,
 )
 
-from tractor.trionics import maybe_open_context
-
-from .brokers import get_brokermod
 from .log import get_logger
 
-
 log = get_logger(__name__)
 
 T = TypeVar("T")
@@ -104,29 +97,3 @@ async def decorated(
         return decorated
 
     return decorator
-
-
-# TODO: move this to `.brokers.utils`..
-@acm
-async def open_cached_client(
-    brokername: str,
-    **kwargs,
-
-) -> 'Client':  # noqa
-    '''
-    Get a cached broker client from the current actor's local vars.
-
-    If one has not been setup do it and cache it.
-
-    '''
-    brokermod = get_brokermod(brokername)
-    async with maybe_open_context(
-        acm_func=brokermod.get_client,
-        kwargs=kwargs,
-
-    ) as (cache_hit, client):
-
-        if cache_hit:
-            log.info(f'Reusing existing {client}')
-
-        yield client
diff --git a/piker/brokers/__init__.py b/piker/brokers/__init__.py
index 93393654b..986905e2c 100644
--- a/piker/brokers/__init__.py
+++ b/piker/brokers/__init__.py
@@ -17,10 +17,34 @@
 """
 Broker clients, daemons and general back end machinery.
 """
+from contextlib import (
+    asynccontextmanager as acm,
+)
 from importlib import import_module
 from types import ModuleType
 
-__brokers__ = [
+from tractor.trionics import maybe_open_context
+
+from ._util import (
+    log,
+    BrokerError,
+    SymbolNotFound,
+    NoData,
+    DataUnavailable,
+    DataThrottle,
+    resproc,
+)
+
+__all__: list[str] = [
+    'BrokerError',
+    'SymbolNotFound',
+    'NoData',
+    'DataUnavailable',
+    'DataThrottle',
+    'resproc',
+]
+
+__brokers__: list[str] = [
     'binance',
     'ib',
     'kraken',
@@ -58,3 +82,28 @@ def iter_brokermods():
     '''
     for name in __brokers__:
         yield get_brokermod(name)
+
+
+@acm
+async def open_cached_client(
+    brokername: str,
+    **kwargs,
+
+) -> 'Client':  # noqa
+    '''
+    Get a cached broker client from the current actor's local vars.
+
+    If one has not been setup do it and cache it.
+
+    '''
+    brokermod = get_brokermod(brokername)
+    async with maybe_open_context(
+        acm_func=brokermod.get_client,
+        kwargs=kwargs,
+
+    ) as (cache_hit, client):
+
+        if cache_hit:
+            log.info(f'Reusing existing {client}')
+
+        yield client
diff --git a/piker/brokers/core.py b/piker/brokers/core.py
index f4d670dc7..a9a83e7cd 100644
--- a/piker/brokers/core.py
+++ b/piker/brokers/core.py
@@ -29,7 +29,7 @@
 from ._util import log
 from . import get_brokermod
 from ..service import maybe_spawn_brokerd
-from .._cacheables import open_cached_client
+from . import open_cached_client
 from ..accounting import MktPair
 
 
diff --git a/piker/brokers/deribit/feed.py b/piker/brokers/deribit/feed.py
index 04357ef84..ca6a3f54d 100644
--- a/piker/brokers/deribit/feed.py
+++ b/piker/brokers/deribit/feed.py
@@ -30,7 +30,7 @@
 import numpy as np
 import tractor
 
-from piker._cacheables import open_cached_client
+from piker.brokers import open_cached_client
 from piker.log import get_logger, get_console_log
 from piker.data import ShmArray
 from piker.brokers._util import (
diff --git a/piker/brokers/kraken/feed.py b/piker/brokers/kraken/feed.py
index 02b2866af..dc70672fc 100644
--- a/piker/brokers/kraken/feed.py
+++ b/piker/brokers/kraken/feed.py
@@ -42,8 +42,10 @@
     Asset,
     MktPair,
 )
-from piker._cacheables import (
+from piker.brokers import (
     open_cached_client,
+)
+from piker._cacheables import (
     async_lifo_cache,
 )
 from piker.brokers._util import (
diff --git a/piker/brokers/kucoin.py b/piker/brokers/kucoin.py
index eebbc5f7c..b2953467b 100755
--- a/piker/brokers/kucoin.py
+++ b/piker/brokers/kucoin.py
@@ -56,8 +56,10 @@
     MktPair,
 )
 from piker import config
-from piker._cacheables import (
+from piker.brokers import (
     open_cached_client,
+)
+from piker._cacheables import (
     async_lifo_cache,
 )
 from piker.log import get_logger
diff --git a/piker/brokers/questrade.py b/piker/brokers/questrade.py
index 1d447b23b..31133f232 100644
--- a/piker/brokers/questrade.py
+++ b/piker/brokers/questrade.py
@@ -40,7 +40,8 @@
 import asks
 
 from ..calc import humanize, percent_change
-from .._cacheables import open_cached_client, async_lifo_cache
+from . import open_cached_client
+from piker._cacheables import async_lifo_cache
 from .. import config
 from ._util import resproc, BrokerError, SymbolNotFound
 from ..log import (
diff --git a/piker/ui/_fsp.py b/piker/ui/_fsp.py
index f00b1e3d8..5202ea975 100644
--- a/piker/ui/_fsp.py
+++ b/piker/ui/_fsp.py
@@ -30,14 +30,14 @@
 )
 
 import msgspec
-import tractor
 import pyqtgraph as pg
+import tractor
+from tractor.trionics import maybe_open_context
 import trio
 from trio_typing import TaskStatus
 
 from piker.data.types import Struct
 from ._axes import PriceAxis
-from .._cacheables import maybe_open_context
 from ..calc import humanize
 from ..data._sharedmem import (
     ShmArray,

From 0f8c685735cd7803e74fe657a9a83b2c2112c27d Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Wed, 14 Jun 2023 13:53:31 -0400
Subject: [PATCH 60/85] .clearing._client: return early on cancel-dead-dialog
 attempts

---
 piker/clearing/_client.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/piker/clearing/_client.py b/piker/clearing/_client.py
index 65a21fef7..f5e2a5e31 100644
--- a/piker/clearing/_client.py
+++ b/piker/clearing/_client.py
@@ -132,6 +132,8 @@ def _mk_cancel_msg(
                 f'Maybe there is a stale entry or line?\n'
                 f'You should report this as a bug!'
             )
+            return
+
         fqme = str(cmd.symbol)
         return Cancel(
             oid=uuid,

From 6a1c49be4e359f5a5539e28e3e698206e91bef2c Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Wed, 14 Jun 2023 13:53:55 -0400
Subject: [PATCH 61/85] view_mode: handle duplicate overlay dispersions

Discovered due to originally having a history loading bug between
btcusdt futes display where the same time series was being loaded into
the graphics system, this avoids the issue where 2 (or more) curves are
measured to have the same dispersion and thus do not get added as unique
entries to the `overlay_table: dict[float, tuple]` during the scaling
phase..

Practically speaking this should never really be a problem if the curves
(and their backing timeseries) are indeed unique but keying the
overlay table by the dispersion and the `Viz` is a minimal performance
hit when looping the sorted table and is a lot nicer then you **do want
to show** duplicate curves then having one overlay just not be ranged
correctly at all XD
---
 piker/ui/view_mode.py | 43 ++++++++++++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/piker/ui/view_mode.py b/piker/ui/view_mode.py
index 78e58f7af..313dd8d3a 100644
--- a/piker/ui/view_mode.py
+++ b/piker/ui/view_mode.py
@@ -19,6 +19,7 @@
 
 '''
 from __future__ import annotations
+from operator import itemgetter
 from typing import (
     Any,
     Literal,
@@ -197,15 +198,17 @@ def overlay_viewlists(
 
 ) -> None:
     '''
-    Calculate and apply y-domain (axis y-range) multi-curve overlay adjustments
-    a set of ``plots`` based on the requested ``method``.
+    Calculate and apply y-domain (axis y-range) multi-curve overlay
+    adjustments a set of ``plots`` based on the requested
+    ``method``.
 
     '''
     chart_name: str
     chart: ChartPlotWidget
+
     for chart_name, chart in plots.items():
 
-        overlay_viz_items = chart._vizs.items()
+        overlay_viz_items: dict = chart._vizs
 
         # Common `PlotItem` maxmin table; presumes that some path
         # graphics (and thus their backing data sets) are in the
@@ -271,6 +274,7 @@ def overlay_viewlists(
         # determine auto-ranging input for `._set_yrange()`.
         # this is primarly used for our so called "log-linearized
         # multi-plot" overlay technique.
+        # vizs_by_disp: list[tuple[float, Viz]] = []
         overlay_table: dict[
             float,
             tuple[
@@ -288,7 +292,7 @@ def overlay_viewlists(
         ] = {}
 
         # multi-curve overlay processing stage
-        for name, viz in overlay_viz_items:
+        for name, viz in overlay_viz_items.items():
 
             out = _maybe_calc_yrange(
                 viz,
@@ -356,7 +360,7 @@ def overlay_viewlists(
                 # returns scalars
                 r_up = (ymx - y_ref) / y_ref
                 r_down = (ymn - y_ref) / y_ref
-                disp = r_up - r_down
+                disp = round(r_up - r_down, ndigits=16)
 
                 msg = (
                     f'Viz[{viz.name}][{key}]: @{chart_name}\n'
@@ -489,7 +493,15 @@ def overlay_viewlists(
                 # register curves by a "full" dispersion metric for
                 # later sort order in the overlay (technique
                 # ) application loop below.
-                overlay_table[disp] = (
+                pair: tuple[float, Viz] = (disp, viz)
+
+                # time series are so similar they have same
+                # dispersion with `float` precision..
+                if entry := overlay_table.get(pair):
+                    raise RuntimeError('Duplicate entry!? -> {entry}')
+
+                # vizs_by_disp.append(pair)
+                overlay_table[pair] = (
                     viz.plot.vb,
                     viz,
                     y_ref,
@@ -540,6 +552,7 @@ def overlay_viewlists(
             mxmns_by_common_pi
             and not overlay_table
         ):
+            print("WAATT THE FUCK")
             # move to next chart in linked set since
             # no overlay transforming is needed.
             continue
@@ -548,7 +561,7 @@ def overlay_viewlists(
 
         r_up_mx: float
         r_dn_mn: float
-        mx_disp = max(overlay_table)
+        mx_pair: tuple = max(overlay_table, key=itemgetter(0))
 
         if debug_print:
             # print overlay table in descending dispersion order
@@ -564,11 +577,11 @@ def overlay_viewlists(
             )
 
         if method == 'loglin_ref_to_curve':
-            mx_entry = overlay_table.pop(mx_disp)
+            mx_entry = overlay_table.pop(mx_pair)
         else:
-            # TODO: for pin to first-in-view we need to no pop this from the
+            # TODO: for pin to first-in-view we need to NOT pop this from the
             # table, but can we simplify below code even more?
-            mx_entry = overlay_table[mx_disp]
+            mx_entry = overlay_table[mx_pair]
 
         (
             mx_view,  # viewbox
@@ -599,7 +612,11 @@ def overlay_viewlists(
             tuple[Viz, float, float, float, float]
         ] = {}
 
-        for full_disp in reversed(overlay_table):
+        for pair in sorted(
+            overlay_table,
+            key=itemgetter(0),
+            reverse=True,
+        ):
             (
                 view,
                 viz,
@@ -610,7 +627,7 @@ def overlay_viewlists(
                 minor_in_view,
                 r_up,
                 r_dn,
-            ) = overlay_table[full_disp]
+            ) = overlay_table[pair]
 
             key = 'open' if viz.is_ohlc else viz.name
             xref = minor_in_view[0]['time']
@@ -839,7 +856,7 @@ def overlay_viewlists(
                 print(
                     'SCALING PHASE' + '-'*100 + '\n\n'
                     '_________MAJOR INFO___________\n'
-                    f'SIGMA MAJOR C: {mx_viz.name} -> {mx_disp}\n'
+                    f'SIGMA MAJOR C: {mx_viz.name} -> {mx_pair[0]}\n'
                     f'UP MAJOR C: {upt.viz.name} with disp: {upt.rng}\n'
                     f'DOWN MAJOR C: {dnt.viz.name} with disp: {dnt.rng}\n'
                     f'xref: {mx_xref}\n'

From 6b2e85e4b391f8bd3acbad72eab597db96d217af Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Wed, 14 Jun 2023 14:46:23 -0400
Subject: [PATCH 62/85] Add type-annots to sampler subscription method
 internals

---
 piker/data/feed.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/piker/data/feed.py b/piker/data/feed.py
index 775e8fc67..2e2a63b52 100644
--- a/piker/data/feed.py
+++ b/piker/data/feed.py
@@ -145,8 +145,7 @@ def get_subs(
         key: str,
     ) -> set[
         tuple[
-            Union[tractor.MsgStream, trio.MemorySendChannel],
-            # tractor.Context,
+            tractor.MsgStream | trio.MemorySendChannel,
             float | None,  # tick throttle in Hz
         ]
     ]:
@@ -161,7 +160,6 @@ def add_subs(
         key: str,
         subs: set[tuple[
             tractor.MsgStream | trio.MemorySendChannel,
-            # tractor.Context,
             float | None,  # tick throttle in Hz
         ]],
     ) -> set[tuple]:
@@ -169,7 +167,7 @@ def add_subs(
         Add a ``set`` of consumer subscription entries for the given key.
 
         '''
-        _subs = self._subscribers[key]
+        _subs: set[tuple] = self._subscribers[key]
         _subs.update(subs)
         return _subs
 
@@ -183,7 +181,7 @@ def remove_subs(
         Remove a ``set`` of consumer subscription entries for key.
 
         '''
-        _subs = self.get_subs(key)
+        _subs: set[tuple] = self.get_subs(key)
         _subs.difference_update(subs)
         return _subs
 

From 2e878ca52abc40004d8dbfb5ed529c9ec7013241 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Wed, 14 Jun 2023 18:29:40 -0400
Subject: [PATCH 63/85] Don't pass loglevel to trade dialog endpoint

It's been getting setup in the `brokerd` daemon-actor spawn task for
a while now and worker tasks already get a ref to that global log
instance so they don't need to care (in data or trading) task spawn
endpoints.

Also move to the new `open_trade_dialog()` naming for working broker
backends B)
---
 piker/brokers/_daemon.py          |  4 ++++
 piker/brokers/deribit/__init__.py |  8 ++++----
 piker/brokers/ib/broker.py        | 11 ++++-------
 piker/brokers/kraken/broker.py    |  1 -
 piker/clearing/_ems.py            |  5 ++---
 piker/clearing/_paper_engine.py   |  8 ++++----
 6 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/piker/brokers/_daemon.py b/piker/brokers/_daemon.py
index 8a81b1d67..34cbfca43 100644
--- a/piker/brokers/_daemon.py
+++ b/piker/brokers/_daemon.py
@@ -58,6 +58,10 @@ async def _setup_persistent_brokerd(
     the broker backend as needed.
 
     '''
+    # NOTE: we only need to setup logging once (and only) here
+    # since all hosted daemon tasks will reference this same
+    # log instance's (actor local) state and thus don't require
+    # any further (level) configuration on their own B)
     log = _util.get_console_log(
         loglevel or tractor.current_actor().loglevel,
         name=f'{_util.subsys}.{brokername}',
diff --git a/piker/brokers/deribit/__init__.py b/piker/brokers/deribit/__init__.py
index f5c48b58d..4c0c18507 100644
--- a/piker/brokers/deribit/__init__.py
+++ b/piker/brokers/deribit/__init__.py
@@ -21,8 +21,6 @@
 
 from piker.log import get_logger
 
-log = get_logger(__name__)
-
 from .api import (
     get_client,
 )
@@ -30,13 +28,15 @@
     open_history_client,
     open_symbol_search,
     stream_quotes,
-    backfill_bars
+    # backfill_bars,
 )
 # from .broker import (
-    # trades_dialogue,
+    # open_trade_dialog,
     # norm_trade_records,
 # )
 
+log = get_logger(__name__)
+
 __all__ = [
     'get_client',
 #    'trades_dialogue',
diff --git a/piker/brokers/ib/broker.py b/piker/brokers/ib/broker.py
index 73477c0ad..a5c68c36c 100644
--- a/piker/brokers/ib/broker.py
+++ b/piker/brokers/ib/broker.py
@@ -64,7 +64,6 @@
     open_pps,
     PpTable,
 )
-from .._util import get_console_log
 from piker.clearing._messages import (
     Order,
     Status,
@@ -217,7 +216,7 @@ async def recv_trade_updates(
     client.inline_errors(to_trio)
 
     # sync with trio task
-    to_trio.send_nowait(None)
+    to_trio.send_nowait(client.ib)
 
     def push_tradesies(
         eventkit_obj,
@@ -513,8 +512,9 @@ async def open_trade_event_stream(
     async with tractor.to_asyncio.open_channel_from(
         recv_trade_updates,
         client=client,
-    ) as (first, trade_event_stream):
+    ) as (ibclient, trade_event_stream):
 
+        assert ibclient is client.ib
         task_status.started(trade_event_stream)
         await trio.sleep_forever()
 
@@ -523,13 +523,10 @@ async def open_trade_event_stream(
 async def trades_dialogue(
 
     ctx: tractor.Context,
-    loglevel: str = None,
+    # loglevel: str = None,
 
 ) -> AsyncIterator[dict[str, Any]]:
 
-    # XXX: required to propagate ``tractor`` loglevel to piker logging
-    get_console_log(loglevel or tractor.current_actor().loglevel)
-
     accounts_def = config.load_accounts(['ib'])
 
     global _client_cache
diff --git a/piker/brokers/kraken/broker.py b/piker/brokers/kraken/broker.py
index 96ab77ae2..fc2eff62d 100644
--- a/piker/brokers/kraken/broker.py
+++ b/piker/brokers/kraken/broker.py
@@ -422,7 +422,6 @@ def trades2pps(
 @tractor.context
 async def trades_dialogue(
     ctx: tractor.Context,
-    loglevel: str = None,
 
 ) -> AsyncIterator[dict[str, Any]]:
 
diff --git a/piker/clearing/_ems.py b/piker/clearing/_ems.py
index 7abd4a61b..ffd5882b0 100644
--- a/piker/clearing/_ems.py
+++ b/piker/clearing/_ems.py
@@ -373,8 +373,8 @@ def mk_paper_ep():
     # TODO: ideally choose only one of these ep names..
     trades_endpoint: Callable
     for ep_name in [
-        'trades_dialogue',
-        'open_trade_dialog',
+        'trades_dialogue',  # legacy
+        'open_trade_dialog',  # probably final name?
     ]:
         trades_endpoint = getattr(
             brokermod,
@@ -390,7 +390,6 @@ def mk_paper_ep():
         # open live brokerd trades endpoint
         open_trades_endpoint = portal.open_context(
             trades_endpoint,
-            loglevel=loglevel,
         )
 
     else:
diff --git a/piker/clearing/_paper_engine.py b/piker/clearing/_paper_engine.py
index fd998e97d..4220bf63e 100644
--- a/piker/clearing/_paper_engine.py
+++ b/piker/clearing/_paper_engine.py
@@ -527,7 +527,7 @@ async def handle_order_requests(
 
 
 @tractor.context
-async def trades_dialogue(
+async def open_trade_dialog(
 
     ctx: tractor.Context,
     broker: str,
@@ -695,21 +695,21 @@ async def open_paperboi(
 
     async with (
         tractor.find_actor(service_name) as portal,
-        tractor.open_nursery() as tn,
+        tractor.open_nursery() as an,
     ):
         # NOTE: only spawn if no paperboi already is up since we likely
         # don't need more then one actor for simulated order clearing
         # per broker-backend.
         if portal is None:
             log.info('Starting new paper-engine actor')
-            portal = await tn.start_actor(
+            portal = await an.start_actor(
                 service_name,
                 enable_modules=[__name__]
             )
             we_spawned = True
 
         async with portal.open_context(
-            trades_dialogue,
+            open_trade_dialog,
             broker=broker,
             fqme=fqme,
             loglevel=loglevel,

From f81ea64cab21cbf1baaa169820479ba5fab8bf73 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Wed, 14 Jun 2023 19:02:54 -0400
Subject: [PATCH 64/85] Drop unused `Union`

---
 piker/data/feed.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/piker/data/feed.py b/piker/data/feed.py
index 2e2a63b52..1871db7d6 100644
--- a/piker/data/feed.py
+++ b/piker/data/feed.py
@@ -39,7 +39,6 @@
     Optional,
     Awaitable,
     Sequence,
-    Union,
 )
 
 import trio

From e7437cb722925593482b501d7dec50d9176951d2 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 15 Jun 2023 10:29:48 -0400
Subject: [PATCH 65/85] Facepalm, break on first matching trades ep..

---
 piker/clearing/_ems.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/piker/clearing/_ems.py b/piker/clearing/_ems.py
index ffd5882b0..151e10c88 100644
--- a/piker/clearing/_ems.py
+++ b/piker/clearing/_ems.py
@@ -381,7 +381,12 @@ def mk_paper_ep():
             ep_name,
             None,
         )
-        break
+        if trades_endpoint:
+            break
+    else:
+        raise RuntimeError(
+            f'No live trading EP found: {brokermod.name}?'
+        )
 
     if (
         trades_endpoint is not None

From a4b8fb2d6b026e701b2c08252f82c65061aefbdf Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Thu, 15 Jun 2023 14:32:51 -0400
Subject: [PATCH 66/85] Woops, drop paper mode detection on client side..

---
 piker/clearing/_client.py | 8 --------
 piker/clearing/_ems.py    | 6 +++---
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/piker/clearing/_client.py b/piker/clearing/_client.py
index f5e2a5e31..436b4f8e1 100644
--- a/piker/clearing/_client.py
+++ b/piker/clearing/_client.py
@@ -37,7 +37,6 @@
     Cancel,
     BrokerdPosition,
 )
-from ..brokers import get_brokermod
 
 if TYPE_CHECKING:
     from ._messages import (
@@ -246,13 +245,6 @@ async def open_ems(
         loglevel=loglevel,
     ) as portal:
 
-        mod = get_brokermod(broker)
-        if (
-            not getattr(mod, 'trades_dialogue', None)
-            or mode == 'paper'
-        ):
-            mode = 'paper'
-
         from ._ems import _emsd_main
         async with (
             # connect to emsd
diff --git a/piker/clearing/_ems.py b/piker/clearing/_ems.py
index 151e10c88..68639ae87 100644
--- a/piker/clearing/_ems.py
+++ b/piker/clearing/_ems.py
@@ -353,7 +353,6 @@ def mk_paper_ep():
         # a paper-simulator clearing engine.
 
         # load the paper trading engine
-        exec_mode = 'paper'
         log.info(f'{broker}: Entering `paper` trading mode')
 
         # load the paper trading engine as a subactor of this emsd
@@ -370,11 +369,12 @@ def mk_paper_ep():
             loglevel=loglevel,
         )
 
-    # TODO: ideally choose only one of these ep names..
+    # take the first supported ep we detect
+    # on the backend mod.
     trades_endpoint: Callable
     for ep_name in [
-        'trades_dialogue',  # legacy
         'open_trade_dialog',  # probably final name?
+        'trades_dialogue',  # legacy
     ]:
         trades_endpoint = getattr(
             brokermod,

From 81d5ca9bc2d08226faf4ce3a8dfdc056724f42a1 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Fri, 16 Jun 2023 17:00:37 -0400
Subject: [PATCH 67/85] ib: drop `ibis` import and use fq object imports
 instead

---
 piker/brokers/ib/api.py | 48 ++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/piker/brokers/ib/api.py b/piker/brokers/ib/api.py
index 5d49b14e9..c6ad0b502 100644
--- a/piker/brokers/ib/api.py
+++ b/piker/brokers/ib/api.py
@@ -51,9 +51,18 @@
 from tractor import to_asyncio
 import pendulum
 from eventkit import Event
-import ib_insync as ibis
-from ib_insync.contract import (
+from ib_insync import (
+    client as ib_client,
+    IB,
     Contract,
+    Crypto,
+    Commodity,
+    Forex,
+    Future,
+    ContFuture,
+    Stock,
+)
+from ib_insync.contract import (
     ContractDetails,
     Option,
 )
@@ -70,7 +79,6 @@
     Wrapper,
     RequestError,
 )
-from ib_insync.client import Client as ib_Client
 import numpy as np
 
 # TODO: in hindsight, probably all imports should be
@@ -161,7 +169,7 @@ def execDetails(
         return super().execDetails(reqId, contract, execu)
 
 
-class NonShittyIB(ibis.IB):
+class NonShittyIB(IB):
     '''
     The beginning of overriding quite a few decisions in this lib.
 
@@ -180,7 +188,7 @@ def __init__(self):
 
         # XXX: just to override this wrapper
         self.wrapper = NonShittyWrapper(self)
-        self.client = ib_Client(self.wrapper)
+        self.client = ib_client.Client(self.wrapper)
         self.client._logger = get_logger(
             'ib_insync.client',
         )
@@ -376,7 +384,7 @@ class Client:
     def __init__(
         self,
 
-        ib: ibis.IB,
+        ib: IB,
 
     ) -> None:
         self.ib = ib
@@ -633,7 +641,7 @@ async def search_symbols(
 
                     # try get all possible contracts for symbol as per,
                     # https://interactivebrokers.github.io/tws-api/basic_contracts.html#fut
-                    con = ibis.Future(
+                    con = Future(
                         symbol=sym,
                         exchange=exch,
                     )
@@ -681,11 +689,11 @@ async def get_fute(
         # it's the "front" contract returned here
         if front:
             con = (await self.ib.qualifyContractsAsync(
-                ibis.ContFuture(symbol, exchange=exchange)
+                ContFuture(symbol, exchange=exchange)
             ))[0]
         else:
             con = (await self.ib.qualifyContractsAsync(
-                ibis.Future(
+                Future(
                     symbol,
                     exchange=exchange,
                     lastTradeDateOrContractMonth=expiry,
@@ -704,7 +712,7 @@ async def get_con(
             return self._cons[conid]
         except KeyError:
             con: Contract = await self.ib.qualifyContractsAsync(
-                ibis.Contract(conId=conid)
+                Contract(conId=conid)
             )
             self._cons[conid] = con
             return con
@@ -815,7 +823,7 @@ async def find_contracts(
             # if '/' in symbol:
             #     currency = ''
             #     symbol, currency = symbol.split('/')
-            con = ibis.Forex(
+            con = Forex(
                 pair=''.join((symbol, currency)),
                 currency=currency,
             )
@@ -824,12 +832,12 @@ async def find_contracts(
         # commodities
         elif exch == 'CMDTY':  # eg. XAUUSD.CMDTY
             con_kwargs, bars_kwargs = _adhoc_symbol_map[symbol]
-            con = ibis.Commodity(**con_kwargs)
+            con = Commodity(**con_kwargs)
             con.bars_kwargs = bars_kwargs
 
         # crypto$
         elif exch == 'PAXOS':  # btc.paxos
-            con = ibis.Crypto(
+            con = Crypto(
                 symbol=symbol,
                 currency=currency,
             )
@@ -851,7 +859,7 @@ async def find_contracts(
                 primaryExchange = exch
                 exch = 'SMART'
 
-            con = ibis.Stock(
+            con = Stock(
                 symbol=symbol,
                 exchange=exch,
                 primaryExchange=primaryExchange,
@@ -1157,9 +1165,9 @@ def con2fqme(
             symbol = con.localSymbol.replace(' ', '')
 
         case (
-            ibis.Commodity()
+            Commodity()
             # search API endpoint returns std con box..
-            | ibis.Contract(secType='CMDTY')
+            | Contract(secType='CMDTY')
         ):
             # commodities and forex don't have an exchange name and
             # no real volume so we have to calculate the price
@@ -1168,7 +1176,7 @@ def con2fqme(
             # no real volume on this tract
             calc_price = True
 
-        case ibis.Forex() | ibis.Contract(secType='CASH'):
+        case Forex() | Contract(secType='CASH'):
             dst, src = con.localSymbol.split('.')
             symbol = ''.join([dst, src])
             suffix = con.exchange or 'idealpro'
@@ -1245,7 +1253,7 @@ async def load_aio_clients(
     # the API TCP in `ib_insync` connection can be flaky af so instead
     # retry a few times to get the client going..
     connect_retries: int = 3,
-    connect_timeout: float = 1,
+    connect_timeout: float = 10,
     disconnect_on_exit: bool = True,
 
 ) -> dict[str, Client]:
@@ -1310,7 +1318,7 @@ async def load_aio_clients(
         ):
             continue
 
-        ib = NonShittyIB()
+        ib: IB = NonShittyIB()
 
         for i in range(connect_retries):
             try:
@@ -1344,7 +1352,7 @@ async def load_aio_clients(
             ) as ce:
                 _err = ce
                 log.warning(
-                    f'Failed to connect on {port} for {i} time with,\n'
+                    f'Failed to connect on {host}:{port} for {i} time with,\n'
                     f'{ib.client.apiError.value()}\n'
                     'retrying with a new client id..')
 

From f2c1988536948a7d148b2d615a7df8dd71e5ec30 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Fri, 16 Jun 2023 21:06:28 -0400
Subject: [PATCH 68/85] Better empty account console msg styling

---
 piker/accounting/_mktinfo.py |  2 +-
 piker/accounting/cli.py      | 19 ++++++++++++-------
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/piker/accounting/_mktinfo.py b/piker/accounting/_mktinfo.py
index 046195caf..2d2ebccdc 100644
--- a/piker/accounting/_mktinfo.py
+++ b/piker/accounting/_mktinfo.py
@@ -121,7 +121,7 @@ class Asset(Struct, frozen=True):
 
     # NOTE: additional info optionally packed in by the backend, but
     # should not be explicitly required in our generic API.
-    info: dict = {}  # make it frozen?
+    info: dict | None = None
 
     # TODO?
     # _to_dict_skip = {'info'}
diff --git a/piker/accounting/cli.py b/piker/accounting/cli.py
index c655d8b5d..0b18a3eb0 100644
--- a/piker/accounting/cli.py
+++ b/piker/accounting/cli.py
@@ -34,6 +34,7 @@
     open_piker_runtime,
 )
 from ..clearing._messages import BrokerdPosition
+from ..config import load_ledger
 from ..calc import humanize
 
 
@@ -169,14 +170,18 @@ async def main():
                 ):
                     try:
                         assert len(accounts) == 1
-                        if (
-                            not pp_msg_table
-                            and account == 'paper'
-                        ):
+                        if not pp_msg_table:
+                            ld, fpath = load_ledger(brokername, account)
+                            assert not ld, f'WTF did we fail to parse ledger:\n{ld}'
+
                             console.print(
-                                '[yellow underline]'
-                                f'No pps found for `{brokername}.paper` account!\n'
-                                'Do you even have any paper ledger files?'
+                                '[yellow]'
+                                'No pps found for '
+                                f'`{brokername}.{account}` '
+                                'account!\n\n'
+                                '[/][underline]'
+                                'None of the following ledger files exist:\n\n[/]'
+                                f'{fpath.as_uri()}\n'
                             )
                             return
 

From 77dfeb4bf2af1b4dc5345ead2b26da7a39579815 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Fri, 16 Jun 2023 21:07:42 -0400
Subject: [PATCH 69/85] Update brokerd msgs with modern type annots, add a
 "closed" status

---
 piker/clearing/_messages.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/piker/clearing/_messages.py b/piker/clearing/_messages.py
index 126cd3476..240c340b9 100644
--- a/piker/clearing/_messages.py
+++ b/piker/clearing/_messages.py
@@ -23,7 +23,6 @@
 #     deque,
 # )
 from typing import (
-    Optional,
     Literal,
 )
 
@@ -140,7 +139,7 @@ class Status(Struct):
 
     # this maps normally to the ``BrokerdOrder.reqid`` below, an id
     # normally allocated internally by the backend broker routing system
-    reqid: Optional[int | str] = None
+    reqid: int | str | None = None
 
     # the (last) source order/request msg if provided
     # (eg. the Order/Cancel which causes this msg) and
@@ -153,7 +152,7 @@ class Status(Struct):
     # event that wasn't originated by piker's emsd (eg. some external
     # trading system which does it's own order control but that you
     # might want to "track" using piker UIs/systems).
-    src: Optional[str] = None
+    src: str | None = None
 
     # set when a cancel request msg was set for this order flow dialog
     # but the brokerd dialog isn't yet in a cancelled state.
@@ -181,7 +180,7 @@ class BrokerdCancel(Struct):
     # for setting a unique order id then this value will be relayed back
     # on the emsd order request stream as the ``BrokerdOrderAck.reqid``
     # field
-    reqid: Optional[int | str] = None
+    reqid: int | str | None = None
     action: str = 'cancel'
 
 
@@ -205,7 +204,7 @@ class BrokerdOrder(Struct):
     # for setting a unique order id then this value will be relayed back
     # on the emsd order request stream as the ``BrokerdOrderAck.reqid``
     # field
-    reqid: Optional[int | str] = None
+    reqid: int | str | None = None
 
 
 # ---------------
@@ -233,14 +232,14 @@ class BrokerdOrderAck(Struct):
 
 class BrokerdStatus(Struct):
 
-    reqid: int | str
     time_ns: int
+    reqid: int | str
     status: Literal[
         'open',
         'canceled',
-        'fill',
         'pending',
         'error',
+        'closed',
     ]
 
     account: str
@@ -259,24 +258,24 @@ class BrokerdStatus(Struct):
 
 class BrokerdFill(Struct):
     '''
-    A single message indicating a "fill-details" event from the broker
-    if avaiable.
+    A single message indicating a "fill-details" event from the
+    broker if avaiable.
 
     '''
     # brokerd timestamp required for order mode arrow placement on x-axis
     # TODO: maybe int if we force ns?
     # we need to normalize this somehow since backends will use their
     # own format and likely across many disparate epoch clocks...
+    time_ns: int
     broker_time: float
     reqid: int | str
-    time_ns: int
 
     # order exeuction related
     size: float
     price: float
 
     name: str = 'fill'
-    action: Optional[str] = None
+    action: str | None = None
     broker_details: dict = {}  # meta-data (eg. commisions etc.)
 
 
@@ -293,7 +292,7 @@ class BrokerdError(Struct):
 
     # if no brokerd order request was actually submitted (eg. we errored
     # at the ``pikerd`` layer) then there will be ``reqid`` allocated.
-    reqid: Optional[int | str] = None
+    reqid: int | str | None = None
 
     name: str = 'error'
     broker_details: dict = {}

From bc58e42a74a7c8dfb29dd685e0684d06c250d3f0 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Fri, 16 Jun 2023 21:08:35 -0400
Subject: [PATCH 70/85] Refine accounting related config loading routine doc
 strings

---
 piker/config.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/piker/config.py b/piker/config.py
index 97989fe2f..0220f3e6d 100644
--- a/piker/config.py
+++ b/piker/config.py
@@ -309,9 +309,9 @@ def load_account(
 ) -> tuple[dict, Path]:
     '''
     Load a accounting (with positions) file from
-    $PIKER_CONFIG_DIR/accounting/account.<brokername>.<acctid>.toml.
+    $CONFIG_DIR/accounting/account.<brokername>.<acctid>.toml
 
-    Where normally $PIKER_CONFIG_DIR = ~/.config/piker/
+    Where normally $CONFIG_DIR = ~/.config/piker/
     and we implicitly create a accounting subdir which should
     normally be linked to a git repo managed by the user B)
 
@@ -366,7 +366,13 @@ def load_ledger(
     acctid: str,
 
 ) -> tuple[dict, Path]:
+    '''
+    Load a ledger (TOML) file from user's config directory:
+    $CONFIG_DIR/accounting/ledgers/trades_<brokername>_<acctid>.toml
+
+    Return its `dict`-content and file path.
 
+    '''
     ldir: Path = _config_dir / 'accounting' / 'ledgers'
     if not ldir.is_dir():
         ldir.mkdir()

From 909f88021151c3f97691e414e79b30b7e1dbfd0a Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Sat, 17 Jun 2023 14:41:41 -0400
Subject: [PATCH 71/85] ib: prep for passing `Client` to data reset hacker

Since we want to be able to support user-configurable vnc socketaddrs,
this preps for passing the piker client direct into the vnc hacker
routine so that we can (eventually load) and read the ib brokers config
settings into the client and then read those in the `asyncvnc` task
spawner.
---
 piker/brokers/ib/_util.py  | 12 ++++++++++--
 piker/brokers/ib/broker.py |  4 ++--
 piker/brokers/ib/feed.py   | 11 +++++++----
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/piker/brokers/ib/_util.py b/piker/brokers/ib/_util.py
index 5e64ab0b9..4adb32eac 100644
--- a/piker/brokers/ib/_util.py
+++ b/piker/brokers/ib/_util.py
@@ -21,13 +21,20 @@
 '''
 from __future__ import annotations
 from functools import partial
-from typing import Literal
+from typing import (
+    Literal,
+    TYPE_CHECKING,
+)
 import subprocess
 
 import tractor
 
 from .._util import get_logger
 
+if TYPE_CHECKING:
+    from .api import Client
+    from ib_insync import IB
+
 log = get_logger('piker.brokers.ib')
 
 _reset_tech: Literal[
@@ -42,7 +49,8 @@
 
 
 async def data_reset_hack(
-    vnc_host: str,
+    # vnc_host: str,
+    client: Client,
     reset_type: Literal['data', 'connection'],
 
 ) -> None:
diff --git a/piker/brokers/ib/broker.py b/piker/brokers/ib/broker.py
index a5c68c36c..d6c361334 100644
--- a/piker/brokers/ib/broker.py
+++ b/piker/brokers/ib/broker.py
@@ -512,9 +512,9 @@ async def open_trade_event_stream(
     async with tractor.to_asyncio.open_channel_from(
         recv_trade_updates,
         client=client,
-    ) as (ibclient, trade_event_stream):
+    ) as (_, trade_event_stream):
 
-        assert ibclient is client.ib
+        # assert ibclient is client.ib
         task_status.started(trade_event_stream)
         await trio.sleep_forever()
 
diff --git a/piker/brokers/ib/feed.py b/piker/brokers/ib/feed.py
index d855539a7..f2a008252 100644
--- a/piker/brokers/ib/feed.py
+++ b/piker/brokers/ib/feed.py
@@ -37,6 +37,7 @@
 
 from async_generator import aclosing
 from fuzzywuzzy import process as fuzzy
+import ib_insync as ibis
 import numpy as np
 import pendulum
 import tractor
@@ -50,10 +51,10 @@
 )
 from .api import (
     # _adhoc_futes_set,
+    Client,
     con2fqme,
     log,
     load_aio_clients,
-    ibis,
     MethodProxy,
     open_client_proxies,
     get_preferred_data_client,
@@ -276,7 +277,8 @@ async def wait_on_data_reset(
     # )
     # try to wait on the reset event(s) to arrive, a timeout
     # will trigger a retry up to 6 times (for now).
-    client = proxy._aio_ns.ib.client
+    client: Client = proxy._aio_ns
+    ib_client: ibis.IB = client.ib
 
     done = trio.Event()
     with trio.move_on_after(timeout) as cs:
@@ -285,10 +287,11 @@ async def wait_on_data_reset(
 
         log.warning(
             'Sending DATA RESET request:\n'
-            f'{client}'
+            f'{ib_client.client}'
         )
         res = await data_reset_hack(
-            vnc_host=client.host,
+            # vnc_host=client.host,
+            ib_client=ib_client,
             reset_type=reset_type,
         )
 

From 84613cd5960ee4f48c10d6a70f0ec097e001726c Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Sat, 17 Jun 2023 16:11:40 -0400
Subject: [PATCH 72/85] clearing._messages: don't require `.symbol` in brokerd
 side error msgs

---
 piker/clearing/_messages.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/piker/clearing/_messages.py b/piker/clearing/_messages.py
index 240c340b9..219749716 100644
--- a/piker/clearing/_messages.py
+++ b/piker/clearing/_messages.py
@@ -242,7 +242,8 @@ class BrokerdStatus(Struct):
         'closed',
     ]
 
-    account: str
+    # TODO: do we need this?
+    account: str | None = None,
     name: str = 'status'
     filled: float = 0.0
     reason: str = ''
@@ -287,9 +288,11 @@ class BrokerdError(Struct):
 
     '''
     oid: str
-    symbol: str
     reason: str
 
+    # TODO: drop this right?
+    symbol: str | None = None
+
     # if no brokerd order request was actually submitted (eg. we errored
     # at the ``pikerd`` layer) then there will be ``reqid`` allocated.
     reqid: int | str | None = None

From b28b38afab89c154491fd36ebad81a598d009615 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Sat, 17 Jun 2023 16:50:56 -0400
Subject: [PATCH 73/85] Fix double cancel bug!

Not sure how this lasted so long without complaint (literally since we
added history 1m OHLC it seems; guess it means most backends are pretty
tolerant XD ) but we've been sending 2 cancels per order (dialog) due to
the mirrored lines on each chart: 1s and 1m. This fixes that by
reworking the `OrderMode` methods to be a bit more sane and less
conflated with the graphics (lines) layer.

Deatz:
- add new methods:
  - `.oids_from_lines()` line -> oid extraction,
  - `.cancel_orders()` which makes the order client cancel requests from
    a `oids: list[str]`.
- re-impl `.cancel_all_orders()` and `.cancel_orders_under_cursor()` to
  use the above methods thus fixing the original bug B)
---
 piker/ui/order_mode.py | 89 +++++++++++++++++++++++++-----------------
 1 file changed, 53 insertions(+), 36 deletions(-)

diff --git a/piker/ui/order_mode.py b/piker/ui/order_mode.py
index 2cd22610c..3e07eb5e9 100644
--- a/piker/ui/order_mode.py
+++ b/piker/ui/order_mode.py
@@ -31,6 +31,7 @@
 )
 import uuid
 
+from bidict import bidict
 import tractor
 import trio
 from PyQt5.QtCore import Qt
@@ -601,50 +602,65 @@ def on_cancel(
             )
 
     def cancel_orders_under_cursor(self) -> list[str]:
-        return self.cancel_orders_from_lines(
-            self.lines.lines_under_cursor()
-        )
-
-    def cancel_all_orders(self) -> list[str]:
-        '''
-        Cancel all orders for the current chart.
-
-        '''
-        return self.cancel_orders_from_lines(
-            self.lines.all_lines()
+        return self.cancel_orders(
+            self.oids_from_lines(
+                self.lines.lines_under_cursor()
+            )
         )
 
-    def cancel_orders_from_lines(
+    def oids_from_lines(
         self,
         lines: list[LevelLine],
 
-    ) -> list[str]:
+    ) -> list[Dialog]:
 
-        ids: list = []
-        if lines:
-            key = self.multistatus.open_status(
-                f'cancelling {len(lines)} orders',
-                final_msg=f'cancelled {len(lines)} orders',
-                group_key=True
-            )
+        oids: set[str] = set()
+        for line in lines:
+            dialog: Dialog = getattr(line, 'dialog', None)
+            oid: str = dialog.uuid
+            if (
+                dialog
+                and oid not in oids
+            ):
+                oids.add(oid)
 
-            # cancel all active orders and triggers
-            for line in lines:
-                dialog = getattr(line, 'dialog', None)
+        return oids
 
-                if dialog:
-                    oid = dialog.uuid
+    def cancel_orders(
+        self,
+        oids: list[str],
 
-                    cancel_status_close = self.multistatus.open_status(
-                        f'cancelling order {oid}',
-                        group_key=key,
-                    )
-                    dialog.last_status_close = cancel_status_close
+    ) -> None:
+        '''
+        Cancel all orders from a list of order ids: `oids`.
 
-                    ids.append(oid)
-                    self.client.cancel_nowait(uuid=oid)
+        '''
+        key = self.multistatus.open_status(
+            f'cancelling {len(oids)} orders',
+            final_msg=f'cancelled orders:\n{oids}',
+            group_key=True
+        )
+        for oid in oids:
+            dialog: Dialog = self.dialogs[oid]
+            self.client.cancel_nowait(uuid=oid)
+            cancel_status_close = self.multistatus.open_status(
+                f'cancelling order {oid}',
+                group_key=key,
+            )
+            dialog.last_status_close = cancel_status_close
 
-        return ids
+    def cancel_all_orders(self) -> None:
+        '''
+        Cancel all unique orders / executions by extracting unique
+        order ids from all order lines and then submitting cancel
+        requests for each dialog.
+
+        '''
+        return self.cancel_orders(
+            self.oids_from_lines(
+                self.lines.all_lines()
+            )
+        )
 
     def load_unknown_dialog_from_msg(
         self,
@@ -750,7 +766,7 @@ async def open_order_mode(
         trackers: dict[str, PositionTracker] = {}
 
         # load account names from ``brokers.toml``
-        accounts_def = config.load_accounts(
+        accounts_def: bidict[str, str | None] = config.load_accounts(
             providers=[mkt.broker],
         )
 
@@ -1135,8 +1151,9 @@ async def process_trade_msg(
             action = order.action
             details = msg.brokerd_msg
 
-            # TODO: put the actual exchange timestamp?
-            # TODO: some kinda progress system?
+            # TODO: state tracking:
+            # - put the actual exchange timestamp?
+            # - some kinda progress system?
 
             # NOTE: currently the ``kraken`` openOrders sub
             # doesn't deliver their engine timestamp as part of

From a149e71fb1012e698e35a43bb4f07f0801653ff2 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Mon, 19 Jun 2023 09:54:59 -0400
Subject: [PATCH 74/85] ib: pull vnc sockaddrs from brokers.toml config if
 defined

---
 piker/brokers/ib/_util.py | 30 ++++++++++++++++++++++++++----
 piker/brokers/ib/api.py   | 12 +++++++++---
 piker/brokers/ib/feed.py  |  6 ++----
 3 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/piker/brokers/ib/_util.py b/piker/brokers/ib/_util.py
index 4adb32eac..f23aa99b4 100644
--- a/piker/brokers/ib/_util.py
+++ b/piker/brokers/ib/_util.py
@@ -81,9 +81,20 @@ async def data_reset_hack(
           that need to be wrangle.
 
     '''
+    ib_client: IB = client.ib
+
+    # look up any user defined vnc socket address mapped from
+    # a particular API socket port.
+    api_port: str = str(ib_client.client.port)
+    vnc_host: str
+    vnc_port: int
+    vnc_host, vnc_port = client.conf['vnc_addrs'].get(
+        api_port,
+        ('localhost', 3003)
+    )
 
     no_setup_msg:str = (
-        'No data reset hack test setup for {vnc_host}!\n'
+        f'No data reset hack test setup for {vnc_host}!\n'
         'See setup @\n'
         'https://github.com/pikers/piker/tree/master/piker/brokers/ib'
     )
@@ -96,6 +107,7 @@ async def data_reset_hack(
                     partial(
                         vnc_click_hack,
                         host=vnc_host,
+                        port=vnc_port,
                     )
                 )
             except OSError:
@@ -104,7 +116,7 @@ async def data_reset_hack(
                     return False
 
                 try:
-                    import i3ipc
+                    import i3ipc  # noqa  (since a deps dynamic check)
                 except ModuleNotFoundError:
                     log.warning(no_setup_msg)
                     return False
@@ -128,7 +140,8 @@ async def data_reset_hack(
 
 
 async def vnc_click_hack(
-    host: str = 'localhost',
+    host: str,
+    port: int,
     reset_type: str = 'data'
 ) -> None:
     '''
@@ -154,8 +167,12 @@ async def vnc_click_hack(
 
     async with asyncvnc.connect(
         host,
-        port=3003,
+        port=port,
+
+        # TODO: doesn't work see:
+        # https://github.com/barneygale/asyncvnc/issues/7
         # password='ibcansmbz',
+
     ) as client:
 
         # move to middle of screen
@@ -169,6 +186,11 @@ async def vnc_click_hack(
 
 
 def i3ipc_xdotool_manual_click_hack() -> None:
+    '''
+    Do the data reset hack but expecting a local X-window using `xdotool`.
+
+    '''
+    import i3ipc
     i3 = i3ipc.Connection()
 
     # TODO: might be worth offering some kinda api for grabbing
diff --git a/piker/brokers/ib/api.py b/piker/brokers/ib/api.py
index c6ad0b502..171578aa9 100644
--- a/piker/brokers/ib/api.py
+++ b/piker/brokers/ib/api.py
@@ -385,8 +385,14 @@ def __init__(
         self,
 
         ib: IB,
+        config: dict[str, Any],
 
     ) -> None:
+
+        # stash `brokers.toml` config on client for user settings
+        # as needed throughout this backend (eg. vnc sockaddr).
+        self.conf = config
+
         self.ib = ib
         self.ib.RaiseRequestErrors = True
 
@@ -1267,7 +1273,7 @@ async def load_aio_clients(
     '''
     global _accounts2clients, _client_cache, _scan_ignore
 
-    conf = get_config()
+    conf: dict[str, Any] = get_config()
     ib = None
     client = None
 
@@ -1333,7 +1339,7 @@ async def load_aio_clients(
                     timeout=connect_timeout,
                 )
                 # create and cache client
-                client = Client(ib)
+                client = Client(ib=ib, config=conf)
 
                 # update all actor-global caches
                 log.info(f"Caching client for {sockaddr}")
@@ -1466,7 +1472,7 @@ def get_preferred_data_client(
 
     '''
     conf = get_config()
-    data_accounts = conf['prefer_data_account']
+    data_accounts: list[str] = conf['prefer_data_account']
 
     for name in data_accounts:
         client = clients.get(f'ib.{name}')
diff --git a/piker/brokers/ib/feed.py b/piker/brokers/ib/feed.py
index f2a008252..2ceb8acb0 100644
--- a/piker/brokers/ib/feed.py
+++ b/piker/brokers/ib/feed.py
@@ -278,7 +278,6 @@ async def wait_on_data_reset(
     # try to wait on the reset event(s) to arrive, a timeout
     # will trigger a retry up to 6 times (for now).
     client: Client = proxy._aio_ns
-    ib_client: ibis.IB = client.ib
 
     done = trio.Event()
     with trio.move_on_after(timeout) as cs:
@@ -287,11 +286,10 @@ async def wait_on_data_reset(
 
         log.warning(
             'Sending DATA RESET request:\n'
-            f'{ib_client.client}'
+            f'{client.ib.client}'
         )
         res = await data_reset_hack(
-            # vnc_host=client.host,
-            ib_client=ib_client,
+            client=client,
             reset_type=reset_type,
         )
 

From 80461e18a5cf04d07b7afc8324543ea818ed052f Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Mon, 19 Jun 2023 13:05:34 -0400
Subject: [PATCH 75/85] Use `MktPair.price_tick: Decimal` in dark triggers

This was actually incorrect prior, we were rounding triggered limit
orders with the `.size_tick` value's digits when we should have been
using the `.price_tick` (facepalm). So fix that and compute the rounding
number of digits (as passed to the round(<value>, ndigits=<here>)`
builtin) and store it in the `DarkBook.triggers` tuples so that at
trigger/match time the round call is done *just prior* to msg send to
`brokerd` given the last known live L1 queue price.
---
 piker/clearing/_ems.py | 66 ++++++++++++++++++++++++++----------------
 1 file changed, 41 insertions(+), 25 deletions(-)

diff --git a/piker/clearing/_ems.py b/piker/clearing/_ems.py
index 68639ae87..967821412 100644
--- a/piker/clearing/_ems.py
+++ b/piker/clearing/_ems.py
@@ -24,6 +24,7 @@
     # ChainMap,
 )
 from contextlib import asynccontextmanager as acm
+from decimal import Decimal
 from math import isnan
 from pprint import pformat
 import time
@@ -49,7 +50,7 @@
 from ..data._normalize import iterticks
 from ..accounting._mktinfo import (
     unpack_fqme,
-    float_digits,
+    dec_digits,
 )
 from ..ui._notify import notify_from_ems_status_msg
 from ..data.types import Struct
@@ -130,11 +131,16 @@ class DarkBook(Struct):
     triggers: dict[
         str,  # symbol
         dict[
-            str,  # uuid
+            str,  # uuid for triggerable execution
             tuple[
                 Callable[[float], bool],  # predicate
-                str,  # name
-                dict,  # cmd / msg type
+                tuple[str, ...],  # tickfilter
+                dict | Order,  # cmd / msg type
+
+                # live submission constraint parameters
+                float,  # percent_away max price diff
+                float,  # abs_diff_away max price diff
+                int,  # min_tick_digits to round the clearable price
             ]
         ]
     ] = {}
@@ -177,7 +183,8 @@ async def clear_dark_triggers(
     async for quotes in quote_stream:
         # start = time.time()
         for sym, quote in quotes.items():
-            execs = book.triggers.get(sym, {})
+            # TODO: make this a msg-compat struct
+            execs: tuple = book.triggers.get(sym, {})
             for tick in iterticks(
                 quote,
                 # dark order price filter(s)
@@ -200,7 +207,8 @@ async def clear_dark_triggers(
                     # TODO: send this msg instead?
                     cmd,
                     percent_away,
-                    abs_diff_away
+                    abs_diff_away,
+                    price_tick_digits,
                 ) in (
                     tuple(execs.items())
                 ):
@@ -233,8 +241,11 @@ async def clear_dark_triggers(
                             size=size,
                         ):
                             bfqme: str = symbol.replace(f'.{broker}', '')
-                            submit_price = price + abs_diff_away
-                            resp = 'triggered'  # hidden on client-side
+                            submit_price: float = round(
+                                price + abs_diff_away,
+                                ndigits=price_tick_digits,
+                            )
+                            resp: str = 'triggered'  # hidden on client-side
 
                             log.info(
                                 f'Dark order triggered for price {price}\n'
@@ -264,11 +275,11 @@ async def clear_dark_triggers(
                     )
 
                     # remove exec-condition from set
-                    log.info(f'removing pred for {oid}')
-                    pred = execs.pop(oid, None)
-                    if not pred:
+                    log.info(f'Removing trigger for {oid}')
+                    trigger: tuple | None = execs.pop(oid, None)
+                    if not trigger:
                         log.warning(
-                            f'pred for {oid} was already removed!?'
+                            f'trigger for {oid} was already removed!?'
                         )
 
                     # update actives
@@ -1215,14 +1226,15 @@ async def process_client_order_cmds(
                 and status.resp == 'dark_open'
             ):
                 # remove from dark book clearing
-                entry = dark_book.triggers[fqme].pop(oid, None)
+                entry: tuple | None = dark_book.triggers[fqme].pop(oid, None)
                 if entry:
                     (
                         pred,
                         tickfilter,
                         cmd,
                         percent_away,
-                        abs_diff_away
+                        abs_diff_away,
+                        min_tick_digits,
                     ) = entry
 
                     # tell client side that we've cancelled the
@@ -1357,33 +1369,36 @@ async def process_client_order_cmds(
                 # TODO: make this configurable from our top level
                 # config, prolly in a .clearing` section?
                 spread_slap: float = 5
-                min_tick = float(flume.mkt.size_tick)
-                min_tick_digits = float_digits(min_tick)
+                min_tick = Decimal(flume.mkt.price_tick)
+                min_tick_digits: int = dec_digits(min_tick)
+
+                tickfilter: tuple[str, ...]
+                percent_away: float
 
                 if action == 'buy':
                     tickfilter = ('ask', 'last', 'trade')
-                    percent_away = 0.005
+                    percent_away: float = 0.005
 
                     # TODO: we probably need to scale this based
                     # on some near term historical spread
                     # measure?
-                    abs_diff_away = round(
+                    abs_diff_away = float(round(
                         spread_slap * min_tick,
                         ndigits=min_tick_digits,
-                    )
+                    ))
 
                 elif action == 'sell':
                     tickfilter = ('bid', 'last', 'trade')
-                    percent_away = -0.005
-                    abs_diff_away = round(
+                    percent_away: float = -0.005
+                    abs_diff_away: float = float(round(
                         -spread_slap * min_tick,
                         ndigits=min_tick_digits,
-                    )
+                    ))
 
                 else:  # alert
                     tickfilter = ('trade', 'utrade', 'last')
-                    percent_away = 0
-                    abs_diff_away = 0
+                    percent_away: float = 0
+                    abs_diff_away: float = 0
 
                 # submit execution/order to EMS scan loop
                 # NOTE: this may result in an override of an existing
@@ -1395,7 +1410,8 @@ async def process_client_order_cmds(
                     tickfilter,
                     req,
                     percent_away,
-                    abs_diff_away
+                    abs_diff_away,
+                    min_tick_digits,
                 )
                 resp = 'dark_open'
 

From 736bbbff7746dc88e10a2ffec7bce899017ddc65 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Mon, 19 Jun 2023 14:34:47 -0400
Subject: [PATCH 76/85] view_mode: drop rounding dispersions and "debug print"

---
 piker/ui/view_mode.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/piker/ui/view_mode.py b/piker/ui/view_mode.py
index 313dd8d3a..d785c67a2 100644
--- a/piker/ui/view_mode.py
+++ b/piker/ui/view_mode.py
@@ -358,9 +358,9 @@ def overlay_viewlists(
                 start_t = row_start['time']
 
                 # returns scalars
-                r_up = (ymx - y_ref) / y_ref
-                r_down = (ymn - y_ref) / y_ref
-                disp = round(r_up - r_down, ndigits=16)
+                r_up: float = (ymx - y_ref) / y_ref
+                r_down: float = (ymn - y_ref) / y_ref
+                disp: float = r_up - r_down
 
                 msg = (
                     f'Viz[{viz.name}][{key}]: @{chart_name}\n'
@@ -552,7 +552,6 @@ def overlay_viewlists(
             mxmns_by_common_pi
             and not overlay_table
         ):
-            print("WAATT THE FUCK")
             # move to next chart in linked set since
             # no overlay transforming is needed.
             continue

From 020a3955d2fec37705af69680db31d489ae17d73 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 20 Jun 2023 12:29:50 -0400
Subject: [PATCH 77/85] Always use fully expanded FQME throughout `.clearing`

Since crypto backends now also may expand an FQME like `xbteur.kraken`
-> `xbteur.spot.kraken` (by filling in the venue token), we need to use
this identifier when looking up per-market order dialogs or submitting
new requests. The simple fix is to simply look up that expanded from
from the `Feed.flumes` table which is always keyed by the `MktPair.fqme:
str` - the expanded form.
---
 piker/clearing/_ems.py          | 22 ++++++++++++----------
 piker/clearing/_paper_engine.py |  2 +-
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/piker/clearing/_ems.py b/piker/clearing/_ems.py
index 967821412..1bb57ae79 100644
--- a/piker/clearing/_ems.py
+++ b/piker/clearing/_ems.py
@@ -395,9 +395,10 @@ def mk_paper_ep():
         if trades_endpoint:
             break
     else:
-        raise RuntimeError(
+        log.warning(
             f'No live trading EP found: {brokermod.name}?'
         )
+        exec_mode: str = 'paper'
 
     if (
         trades_endpoint is not None
@@ -408,9 +409,6 @@ def mk_paper_ep():
             trades_endpoint,
         )
 
-    else:
-        exec_mode: str = 'paper'
-
     @acm
     async def maybe_open_paper_ep():
         if exec_mode == 'paper':
@@ -523,6 +521,7 @@ class Router(Struct):
     ] = defaultdict(set)
 
     # TODO: mapping of ems dialog ids to msg flow history
+    # - use the new ._util.OrderDialogs?
     # msgflows: defaultdict[
     #     str,
     #     ChainMap[dict[str, dict]],
@@ -641,6 +640,9 @@ async def open_trade_relays(
                 loglevel=loglevel,
             ) as feed,
         ):
+            # extract expanded fqme in case input was of a less
+            # qualified form, eg. xbteur.kraken -> xbteur.spot.kraken
+            fqme: str = list(feed.flumes.keys())[0]
             brokername, _, _, _ = unpack_fqme(fqme)
             brokermod = feed.mods[brokername]
             broker = brokermod.name
@@ -675,7 +677,7 @@ async def open_trade_relays(
 
                 client_ready = trio.Event()
                 task_status.started(
-                    (relay, feed, client_ready)
+                    (fqme, relay, feed, client_ready)
                 )
 
                 # sync to the client side by waiting for the stream
@@ -1468,13 +1470,13 @@ async def cached_mngr(
         loglevel: str = 'info',
     ):
 
-        relay, feed, client_ready = await _router.nursery.start(
+        fqme, relay, feed, client_ready = await _router.nursery.start(
             _router.open_trade_relays,
             fqme,
             exec_mode,
             loglevel,
         )
-        yield relay, feed, client_ready
+        yield fqme, relay, feed, client_ready
 
     async with tractor.trionics.maybe_open_context(
         acm_func=cached_mngr,
@@ -1487,13 +1489,13 @@ async def cached_mngr(
         key=cache_on_fqme_unless_paper,
     ) as (
         cache_hit,
-        (relay, feed, client_ready)
+        (fqme, relay, feed, client_ready)
     ):
         if cache_hit:
             log.info(f'Reusing existing trades relay for {fqme}:\n'
                      f'{relay}\n')
 
-        yield relay, feed, client_ready
+        yield fqme, relay, feed, client_ready
 
 
 @tractor.context
@@ -1576,7 +1578,7 @@ async def _emsd_main(
         fqme,
         exec_mode,
         loglevel,
-    ) as (relay, feed, client_ready):
+    ) as (fqme, relay, feed, client_ready):
 
         brokerd_stream = relay.brokerd_stream
         dark_book = _router.get_dark_book(broker)
diff --git a/piker/clearing/_paper_engine.py b/piker/clearing/_paper_engine.py
index 4220bf63e..34e7ec58e 100644
--- a/piker/clearing/_paper_engine.py
+++ b/piker/clearing/_paper_engine.py
@@ -574,7 +574,7 @@ async def open_trade_dialog(
         if fqme:
             bs_fqme, _, broker = fqme.rpartition('.')
             mkt, _ = await brokermod.get_mkt_info(bs_fqme)
-            mkt_by_fqme[fqme] = mkt
+            mkt_by_fqme[mkt.fqme] = mkt
 
         # for each sym in the ledger load it's `MktPair` info
         for tid, txdict in ledger.data.items():

From ad31631a8f2d227e79ba58542277d11865010094 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 20 Jun 2023 12:37:48 -0400
Subject: [PATCH 78/85] Always round order pane $limit to 3 digits

---
 piker/ui/_position.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/piker/ui/_position.py b/piker/ui/_position.py
index a2e6c19ea..0cf181369 100644
--- a/piker/ui/_position.py
+++ b/piker/ui/_position.py
@@ -294,7 +294,10 @@ def apply_setting(
                             f'limit must > then current pp: {dsize}'
                         )
                         # reset position size value
-                        alloc.currency_limit = dsize
+                        alloc.currency_limit = round(
+                            dsize,
+                            ndigits=3,
+                        )
                         return False
 
                     alloc.currency_limit = value

From 6c10c2f6234d5dd0d447181435bf05a788cbaf9e Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 20 Jun 2023 14:40:57 -0400
Subject: [PATCH 79/85] order_mode: add comment around `Order` being a dict bug

---
 piker/ui/order_mode.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/piker/ui/order_mode.py b/piker/ui/order_mode.py
index 3e07eb5e9..0b3a18970 100644
--- a/piker/ui/order_mode.py
+++ b/piker/ui/order_mode.py
@@ -1143,13 +1143,17 @@ async def process_trade_msg(
         case Status(resp='fill'):
 
             # handle out-of-piker fills reporting?
-            order: Order = client._sent_orders.get(oid)
-            if not order:
+            order: Order | None
+            if not (order := client._sent_orders.get(oid)):
+
+                # set it from last known request msg
                 log.warning(f'order {oid} is unknown')
                 order = msg.req
 
-            action = order.action
-            details = msg.brokerd_msg
+            # XXX TODO: have seen order be a dict here!?
+            # that should never happen tho?
+            action: str = order.action
+            details: dict = msg.brokerd_msg
 
             # TODO: state tracking:
             # - put the actual exchange timestamp?

From c57d4b2181be87b444c64fe45bfaf6586794999f Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 20 Jun 2023 15:24:07 -0400
Subject: [PATCH 80/85] ib: map some tick types particulary "volumeRate" to
 avoid auto-range issue

---
 piker/brokers/ib/feed.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/piker/brokers/ib/feed.py b/piker/brokers/ib/feed.py
index 2ceb8acb0..07e9b44f2 100644
--- a/piker/brokers/ib/feed.py
+++ b/piker/brokers/ib/feed.py
@@ -73,6 +73,7 @@
 from piker.data.validate import FeedInit
 
 
+# XXX NOTE: See available types table docs:
 # https://interactivebrokers.github.io/tws-api/tick_types.html
 tick_types = {
     77: 'trade',
@@ -92,9 +93,9 @@
 
     # ``ib_insync`` already packs these into
     # quotes under the following fields.
-    # 55: 'trades_per_min',  # `'tradeRate'`
-    # 56: 'vlm_per_min',  # `'volumeRate'`
-    # 89: 'shortable',  # `'shortableShares'`
+    55: 'trades_per_min',  # `'tradeRate'`
+    56: 'vlm_per_min',  # `'volumeRate'`
+    89: 'shortable_units',  # `'shortableShares'`
 }
 
 
@@ -1048,7 +1049,6 @@ async def reset_on_feed():
                         async for ticker in stream:
                             quote = normalize(ticker)
                             fqme = quote['fqme']
-                            # print(f'sending {fqme}:\n{quote}')
                             await send_chan.send({fqme: quote})
 
                             # ugh, clear ticks since we've consumed them

From d42aa60325ec1e36b5be83698c14e30c57145cf4 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 20 Jun 2023 15:54:28 -0400
Subject: [PATCH 81/85] Define the flattened "fundamental double auction"
 emitted tick type set

---
 piker/data/_sampling.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/piker/data/_sampling.py b/piker/data/_sampling.py
index b42d0d8f1..641edf539 100644
--- a/piker/data/_sampling.py
+++ b/piker/data/_sampling.py
@@ -740,13 +740,21 @@ async def sample_and_broadcast(
                     )
 
 
-# a working tick-type-classes template
+# tick-type-classes template for all possible "lowest level" events
+# that can can be emitted by the "top of book" L1 queues and
+# price-matching (with eventual clearing) in a double auction
+# market (queuing) system.
 _tick_groups = {
     'clears': {'trade', 'dark_trade', 'last'},
     'bids': {'bid', 'bsize'},
     'asks': {'ask', 'asize'},
 }
 
+# XXX alo define the flattened set of all such "fundamental ticks"
+# so that it can be used as filter, eg. in the graphics display
+# loop to compute running windowed y-ranges B)
+_auction_ticks: set[str] = set.union(*_tick_groups.values())
+
 
 def frame_ticks(
     first_quote: dict,

From c4277ebd8e7a2f8b9acc37833fdf07791c5b9b7e Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 20 Jun 2023 15:54:47 -0400
Subject: [PATCH 82/85] .ui._display: filter y-ranging to `_auction_ticks`

Since we only ever want to do incremental y-range calcs based on the
price always skip any tick types emitted by the data daemon which aren't
defined in the fundamental set. Further, toss in a new `debug_n_trade:
bool` toggle which by default turns off all loggin and profiler calls;
if you want to do profiling this has to now be adjusted manually!
---
 piker/ui/_display.py | 101 ++++++++++++++++++++++++++++++-------------
 1 file changed, 70 insertions(+), 31 deletions(-)

diff --git a/piker/ui/_display.py b/piker/ui/_display.py
index e3d06e1ee..1884d018b 100644
--- a/piker/ui/_display.py
+++ b/piker/ui/_display.py
@@ -50,6 +50,7 @@
 )
 from ..data._sampling import (
     _tick_groups,
+    _auction_ticks,
     open_sample_stream,
 )
 from ._axes import YAxisLabel
@@ -518,15 +519,21 @@ def graphics_update_cycle(
     trigger_all: bool = False,  # flag used by prepend history updates
     prepend_update_index: int | None = None,
 
+    # NOTE: this has to be manually turned on in code (or by
+    # caller) to get profiling since by default we never want the
+    # overhead!
+    debug_n_trace: bool = False,
+
 ) -> None:
 
-    profiler = Profiler(
-        msg=f'Graphics loop cycle for: `{ds.fqme}`',
-        disabled=not pg_profile_enabled(),
-        ms_threshold=ms_slower_then,
-        delayed=True,
-        # ms_threshold=4,
-    )
+    if debug_n_trace:
+        profiler = Profiler(
+            msg=f'Graphics loop cycle for: `{ds.fqme}`',
+            disabled=not pg_profile_enabled(),
+            ms_threshold=ms_slower_then,
+            delayed=True,
+            # ms_threshold=4,
+        )
 
     # TODO: SPEEDing this all up..
     # - optimize this whole graphics stack with ``numba`` hopefully
@@ -558,7 +565,8 @@ def graphics_update_cycle(
         do_rt_update,
         should_tread,
     ) = main_viz.incr_info(ds=ds)
-    profiler('`.incr_info()`')
+    if debug_n_trace:
+        profiler('`.incr_info()`')
 
     # TODO: we should only run mxmn when we know
     # an update is due via ``do_px_step`` above.
@@ -596,7 +604,8 @@ def graphics_update_cycle(
         # since .interact_graphics_cycle() also calls it?
         # I guess we can add a guard in there?
         _, i_read_range, _ = main_viz.update_graphics()
-        profiler('`Viz.update_graphics()` call')
+        if debug_n_trace:
+            profiler('`Viz.update_graphics()` call')
 
         # don't real-time "shift" the curve to the
         # left unless we get one of the following:
@@ -611,7 +620,8 @@ def graphics_update_cycle(
             # if vlm_chart:
             #     vlm_chart.increment_view(datums=append_diff)
 
-            profiler('view incremented')
+            if debug_n_trace:
+                profiler('view incremented')
 
         # NOTE: do this **after** the tread to ensure we take the yrange
         # from the most current view x-domain.
@@ -623,16 +633,25 @@ def graphics_update_cycle(
             i_read_range,
             main_viz,
             ds.vlm_viz,
-            profiler,
+            profiler if debug_n_trace else None,
         )
 
-        profiler(f'{fqme} `multi_maxmin()` call')
+        if debug_n_trace:
+            profiler(f'{fqme} `multi_maxmin()` call')
 
     # iterate frames of ticks-by-type such that we only update graphics
     # using the last update per type where possible.
     ticks_by_type = quote.get('tbt', {})
     for typ, ticks in ticks_by_type.items():
 
+        if typ not in _auction_ticks:
+            if debug_n_trace:
+                log.warning(
+                    'Skipping non-auction-native `{typ}` ticks:\n'
+                    f'{ticks}\n'
+                )
+            continue
+
         # NOTE: ticks are `.append()`-ed to the `ticks_by_type: dict` by the
         # `._sampling.uniform_rate_send()` loop
         tick = ticks[-1]  # get most recent value
@@ -652,16 +671,18 @@ def graphics_update_cycle(
             if (
                 price < mn
             ):
+                if debug_n_trace:
+                    log.info(f'{this_viz.name} new MN from TICK {mn} -> {price}')
                 mn = price
                 yrange_margin = 0.16
-            #     # print(f'{this_viz.name} new MN from TICK {mn}')
 
             if (
                 price > mx
             ):
+                if debug_n_trace:
+                    log.info(f'{this_viz.name} new MX from TICK {mx} -> {price}')
                 mx = price
                 yrange_margin = 0.16
-            #     # print(f'{this_viz.name} new MX from TICK {mx}')
 
             # mx = max(price, mx)
             # mn = min(price, mn)
@@ -719,7 +740,8 @@ def graphics_update_cycle(
         ):
             l1.bid_label.update_fields({'level': price, 'size': size})
 
-    profiler('L1 labels updates')
+    if debug_n_trace:
+        profiler('L1 labels updates')
 
     # Y-autoranging: adjust y-axis limits based on state tracking
     # of previous "last" L1 values which are in view.
@@ -737,9 +759,14 @@ def graphics_update_cycle(
         # complain about out-of-range outliers which can show up
         # in certain annoying feeds (like ib)..
         if (
-            abs(mx_diff) > .25 * lmx
-            or
-            abs(mn_diff) > .25 * lmn
+            lmx
+            and lmn
+            and (
+                abs(mx_diff) > .25 * lmx
+                or
+                abs(mn_diff) > .25 * lmn
+            )
+            and debug_n_trace
         ):
             log.error(
                 f'WTF MN/MX IS WAY OFF:\n'
@@ -750,6 +777,9 @@ def graphics_update_cycle(
                 f'mx_diff: {mx_diff}\n'
                 f'mn_diff: {mn_diff}\n'
             )
+            chart.pause_all_feeds()
+            breakpoint()
+            chart.resume_all_feeds()
 
         # TODO: track local liv maxmin without doing a recompute all the
         # time..plus, just generally the user is more likely to be
@@ -792,7 +822,8 @@ def graphics_update_cycle(
                         },
                     }
                 )
-                profiler('main vb y-autorange')
+                if debug_n_trace:
+                    profiler('main vb y-autorange')
 
         # SLOW CHART y-auto-range resize casd
         # (NOTE: still is still inside the y-range
@@ -820,7 +851,8 @@ def graphics_update_cycle(
         #         f'datetime: {dt}\n'
         #     )
 
-        # profiler('hist `Viz.incr_info()`')
+        # if debug_n_trace:
+        #     profiler('hist `Viz.incr_info()`')
 
         # hist_chart = ds.hist_chart
         # if (
@@ -876,8 +908,8 @@ def graphics_update_cycle(
                     # `draw_last_datum()` ..
                     only_last_uppx=True,
                 )
-
-    profiler('overlays updates')
+    if debug_n_trace:
+        profiler('overlays updates')
 
     # volume chart logic..
     # TODO: can we unify this with the above loop?
@@ -925,7 +957,8 @@ def graphics_update_cycle(
                 # connected to update accompanying overlay
                 # graphics..
             )
-            profiler('`main_vlm_viz.update_graphics()`')
+            if debug_n_trace:
+                profiler('`main_vlm_viz.update_graphics()`')
 
             if (
                 mx_vlm_in_view
@@ -948,7 +981,8 @@ def graphics_update_cycle(
                         },
                     },
                 )
-                profiler('`vlm_chart.view.interact_graphics_cycle()`')
+                if debug_n_trace:
+                    profiler('`vlm_chart.view.interact_graphics_cycle()`')
 
         # update all downstream FSPs
         for curve_name, viz in vlm_vizs.items():
@@ -968,7 +1002,8 @@ def graphics_update_cycle(
                     curve_name,
                     array_key=curve_name,
                 )
-                profiler(f'vlm `Viz[{viz.name}].update_graphics()`')
+                if debug_n_trace:
+                    profiler(f'vlm `Viz[{viz.name}].update_graphics()`')
 
                 # is this even doing anything?
                 # (pretty sure it's the real-time
@@ -980,9 +1015,10 @@ def graphics_update_cycle(
                 #     do_linked_charts=False,
                 #     do_overlay_scaling=False,
                 # )
-                profiler(
-                    f'Viz[{viz.name}].plot.vb.interact_graphics_cycle()`'
-                )
+                if debug_n_trace:
+                    profiler(
+                        f'Viz[{viz.name}].plot.vb.interact_graphics_cycle()`'
+                    )
 
             # even if we're downsampled bigly
             # draw the last datum in the final
@@ -996,11 +1032,14 @@ def graphics_update_cycle(
                 # always update the last datum-element
                 # graphic for all vizs
                 viz.draw_last(array_key=curve_name)
-                profiler(f'vlm `Viz[{viz.name}].draw_last()`')
+                if debug_n_trace:
+                    profiler(f'vlm `Viz[{viz.name}].draw_last()`')
 
-        profiler('vlm Viz all updates complete')
+        if debug_n_trace:
+            profiler('vlm Viz all updates complete')
 
-    profiler.finish()
+    if debug_n_trace:
+        profiler.finish()
 
 
 async def link_views_with_region(

From a44bc4aeb313a204919b7bf686071822f22acad3 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 20 Jun 2023 16:46:30 -0400
Subject: [PATCH 83/85] binance: pre-#520 fixes for `open_cached_client()`
 import and struct-field casting

---
 piker/brokers/binance.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/piker/brokers/binance.py b/piker/brokers/binance.py
index 03840568c..a8791ae9f 100644
--- a/piker/brokers/binance.py
+++ b/piker/brokers/binance.py
@@ -48,11 +48,11 @@
     MktPair,
     digits_to_dec,
 )
-from .._cacheables import open_cached_client
-from ._util import (
+from . import (
     resproc,
     SymbolNotFound,
     DataUnavailable,
+    open_cached_client,
 )
 from ._util import (
     get_logger,
@@ -443,15 +443,14 @@ async def stream_messages(
                 # decode/encode, see:
                 # https://jcristharif.com/msgspec/structs.html#type-validation
                 msg = AggTrade(**msg)
-                msg.typecast()
                 yield 'trade', {
                     'symbol': msg.s,
                     'last': msg.p,
                     'brokerd_ts': time.time(),
                     'ticks': [{
                         'type': 'trade',
-                        'price': msg.p,
-                        'size': msg.q,
+                        'price': float(msg.p),
+                        'size': float(msg.q),
                         'broker_ts': msg.T,
                     }],
                 }

From 35359861bb93dd19b11deb3c57d5b6ff5e31e87c Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 20 Jun 2023 17:57:00 -0400
Subject: [PATCH 84/85] .brokers._daemon: add notes around needed brokerd
 respawn tech

---
 piker/brokers/_daemon.py | 58 +++++++++++++++++++++++++++-------------
 piker/data/feed.py       |  2 +-
 2 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/piker/brokers/_daemon.py b/piker/brokers/_daemon.py
index 34cbfca43..368e81164 100644
--- a/piker/brokers/_daemon.py
+++ b/piker/brokers/_daemon.py
@@ -19,9 +19,12 @@
 ``brokerd``.
 
 '''
+from __future__ import annotations
 from contextlib import (
     asynccontextmanager as acm,
 )
+from typing import TYPE_CHECKING
+import exceptiongroup as eg
 
 import tractor
 import trio
@@ -29,6 +32,9 @@
 from . import _util
 from . import get_brokermod
 
+if TYPE_CHECKING:
+    from ..data import _FeedsBus
+
 # `brokerd` enabled modules
 # TODO: move this def to the `.data` subpkg..
 # NOTE: keeping this list as small as possible is part of our caps-sec
@@ -69,24 +75,40 @@ async def _setup_persistent_brokerd(
     # set global for this actor to this new process-wide instance B)
     _util.log = log
 
-    from piker.data.feed import (
-        _bus,
-        get_feed_bus,
-    )
-    global _bus
-    assert not _bus
-
-    async with trio.open_nursery() as service_nursery:
-        # assign a nursery to the feeds bus for spawning
-        # background tasks from clients
-        get_feed_bus(brokername, service_nursery)
-
-        # unblock caller
-        await ctx.started()
-
-        # we pin this task to keep the feeds manager active until the
-        # parent actor decides to tear it down
-        await trio.sleep_forever()
+    from piker.data import feed
+    assert not feed._bus
+
+    # allocate a nursery to the bus for spawning background
+    # tasks to service client IPC requests, normally
+    # `tractor.Context` connections to explicitly required
+    # `brokerd` endpoints such as:
+    # - `stream_quotes()`,
+    # - `manage_history()`,
+    # - `allocate_persistent_feed()`,
+    # - `open_symbol_search()`
+    # NOTE: see ep invocation details inside `.data.feed`.
+    try:
+        async with trio.open_nursery() as service_nursery:
+            bus: _FeedsBus = feed.get_feed_bus(
+                brokername,
+                service_nursery,
+            )
+            assert bus is feed._bus
+
+            # unblock caller
+            await ctx.started()
+
+            # we pin this task to keep the feeds manager active until the
+            # parent actor decides to tear it down
+            await trio.sleep_forever()
+
+    except eg.ExceptionGroup:
+        # TODO: likely some underlying `brokerd` IPC connection
+        # broke so here we handle a respawn and re-connect attempt!
+        # This likely should pair with development of the OCO task
+        # nusery in dev over @ `tractor` B)
+        # https://github.com/goodboy/tractor/pull/363
+        raise
 
 
 async def spawn_brokerd(
diff --git a/piker/data/feed.py b/piker/data/feed.py
index 1871db7d6..ea7f360b9 100644
--- a/piker/data/feed.py
+++ b/piker/data/feed.py
@@ -190,7 +190,7 @@ def remove_subs(
 
 def get_feed_bus(
     brokername: str,
-    nursery: Optional[trio.Nursery] = None,
+    nursery: trio.Nursery | None = None,
 
 ) -> _FeedsBus:
     '''

From 27ec461f3e64b6217870b5a072bef57dab635eb6 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Mon, 10 Jul 2023 09:40:37 -0400
Subject: [PATCH 85/85] .nativedb: ignore an `expired/` subdir

---
 piker/storage/nativedb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/piker/storage/nativedb.py b/piker/storage/nativedb.py
index ff914245d..274bf0399 100644
--- a/piker/storage/nativedb.py
+++ b/piker/storage/nativedb.py
@@ -187,7 +187,7 @@ async def list_keys(self) -> list[str]:
 
     def index_files(self):
         for path in self._datadir.iterdir():
-            if 'borked' in path.name:
+            if path.name in {'borked', 'expired',}:
                 continue
 
             key: str = path.name.rstrip('.parquet')