# Descriptions

The notebook demonstrates how to use `MarketData`.

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import logging

import pandas as pd

import core.config as cconfig
import core.finance.bid_ask as cfibiask
import helpers.hdbg as hdbg
import helpers.henv as henv
import helpers.hprint as hprint
import im_v2.ccxt.data.client as icdcl
import im_v2.common.universe as ivcu
import market_data.market_data_example as mdmadaex

  from tqdm.autonotebook import tqdm


In [3]:
log_level = logging.INFO
hdbg.init_logger(verbosity=log_level)

_LOG = logging.getLogger(__name__)

_LOG.info("%s", henv.get_system_signature()[0])

hprint.config_notebook()

[0m[36mINFO[0m: > cmd='/venv/lib/python3.9/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-1dbb370d-c8db-4d0f-9565-fe05bcc897d7.json'
INFO  # Git
  branch_name='CmTask7174_Centralize_get_bid_ask_columns_by_level'
  hash='8d8485293'
  # Last commits:
    * 8d8485293 Toma Jordania CmTask7170: expose universe_version to execution analysis notebook (#7176) (   5 hours ago) Tue Feb 13 12:54:15 2024  (HEAD -> CmTask7174_Centralize_get_bid_ask_columns_by_level, origin/master, origin/HEAD)
    * 66874fec8 Sameep Pote CmTask7179 Display account balance before flattening (#7180)      (  18 hours ago) Mon Feb 12 23:07:04 2024           
    * 45ff312b4 Samarth KaPatel update (#7182)                                                    (  20 hours ago) Mon Feb 12 21:38:17 2024           
# Machine info
  system=Linux
  node name=617f61f51a8b
  release=5.15.0-1052-aws
  version=#57~20.04.1-Ubuntu SMP Mon Jan 15 17:04:56 UTC 2024
  machine=x86_64
  processor=x86_64


# Get asset ids

In [4]:
universe_version = "v7.4"
universe_config = {
    "vendor": "CCXT",
    "version": universe_version,
    "mode": "trade",
    "as_full_symbol": True,
}

In [5]:
full_symbols = ivcu.get_vendor_universe(**universe_config)
# Use only a subset for the demonstration.
full_symbols = full_symbols[4:6]
_LOG.info("Full symbols=%s", full_symbols)

INFO  Full symbols=['binance::BNB_USDT', 'binance::BTC_USDT']


In [6]:
asset_ids = list(ivcu.build_numerical_to_string_id_mapping(full_symbols).keys())
_LOG.info("Asset ids=%s", asset_ids)

INFO  Asset ids=[8968126878, 1467591036]


# `ImClientMarketData`

In [7]:
im_client_market_data_config = {
    "start_timestamp": pd.Timestamp("2023-09-11T00:00:00", tz="UTC"),
    "end_timestamp": pd.Timestamp("2023-09-11T04:00:00", tz="UTC"),
    "im_client": {
        "universe_version": universe_version,
        "root_dir": "s3://cryptokaizen-data-test/v3",
        "partition_mode": "by_year_month",
        "dataset": "ohlcv",
        "contract_type": "futures",
        "data_snapshot": "",
        "aws_profile": "ck",
        "resample_1min": False,
        "version": "v1_0_0",
        "download_universe_version": "v7_3",
        "tag": "downloaded_1min",
    },
    "ts_col_name": "timestamp",
    "columns": None,
    "column_remap": None,
    "filter_data_mode": "assert",
    "wall_clock_time": pd.Timestamp("2100-01-01 00:00:00+00:00"),
}
im_client_market_data_config = cconfig.Config().from_dict(
    im_client_market_data_config
)
print(im_client_market_data_config)

start_timestamp: 2023-09-11 00:00:00+00:00
end_timestamp: 2023-09-11 04:00:00+00:00
im_client: 
  universe_version: v7.4
  root_dir: s3://cryptokaizen-data-test/v3
  partition_mode: by_year_month
  dataset: ohlcv
  contract_type: futures
  data_snapshot: 
  aws_profile: ck
  resample_1min: False
  version: v1_0_0
  download_universe_version: v7_3
  tag: downloaded_1min
ts_col_name: timestamp
columns: None
column_remap: None
filter_data_mode: assert
wall_clock_time: 2100-01-01 00:00:00+00:00


In [8]:
ohlcv_im_client = icdcl.CcxtHistoricalPqByTileClient(
    **im_client_market_data_config["im_client"]
)
ohlcv_market_data = mdmadaex.get_HistoricalImClientMarketData_example1(
    ohlcv_im_client,
    asset_ids,
    im_client_market_data_config["columns"],
    im_client_market_data_config["column_remap"],
    wall_clock_time=im_client_market_data_config["wall_clock_time"],
    filter_data_mode=im_client_market_data_config["filter_data_mode"],
)
ohlcv_data = ohlcv_market_data.get_data_for_interval(
    im_client_market_data_config["start_timestamp"],
    im_client_market_data_config["end_timestamp"],
    im_client_market_data_config["ts_col_name"],
    asset_ids,
)
ohlcv_data.head(3)

Unnamed: 0_level_0,asset_id,full_symbol,open,high,low,close,volume,knowledge_timestamp,start_ts
end_ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-09-10 20:00:00-04:00,1467591036,binance::BTC_USDT,25829.0,25830.5,25828.4,25828.4,35.063,2023-09-11 01:23:22.496982+00:00,2023-09-10 19:59:00-04:00
2023-09-10 20:00:00-04:00,8968126878,binance::BNB_USDT,212.25,212.29,212.25,212.28,120.72,2023-09-11 01:23:28.294152+00:00,2023-09-10 19:59:00-04:00
2023-09-10 20:01:00-04:00,1467591036,binance::BTC_USDT,25828.4,25835.8,25828.1,25835.7,152.568,2023-09-12 01:23:44.163254+00:00,2023-09-10 20:00:00-04:00


# `StitchedMarketData`

In [9]:
stitched_market_data_config = {
    "start_timestamp": pd.Timestamp("2023-09-11T00:00:00", tz="UTC"),
    "end_timestamp": pd.Timestamp("2023-09-11T04:00:00", tz="UTC"),
    "ohlcv_market_data": {
        "im_client": {
            "universe_version": universe_version,
            "root_dir": "s3://cryptokaizen-data-test/v3",
            "partition_mode": "by_year_month",
            "dataset": "ohlcv",
            "contract_type": "futures",
            "data_snapshot": "",
            "aws_profile": "ck",
            "resample_1min": False,
            "version": "v1_0_0",
            "download_universe_version": "v7_3",
            "tag": "downloaded_1min",
        },
        "ts_col_name": "timestamp",
        "columns": None,
        "column_remap": None,
        "filter_data_mode": "assert",
    },
    "bid_ask_market_data": {
        "im_client": {
            # Download universe version.
            "universe_version": universe_version,
            "dataset": "bid_ask",
            "contract_type": "futures",
            # Data snapshot is not applicable for data version = "v3".
            "data_snapshot": "",
            "universe_version": universe_version,
            # Data currently residing in the test bucket
            "root_dir": "s3://cryptokaizen-data-test/v3",
            "partition_mode": "by_year_month",
            "dataset": "bid_ask",
            "contract_type": "futures",
            "version": "v1_0_0",
            "download_universe_version": "v7",
            "tag": "resampled_1min",
            "aws_profile": "ck",
        },
        "ts_col_name": "timestamp",
        # TODO(Grisha): for some reason the current filtering mechanism filters out `asset_ids` which
        # makes it impossible to stitch the 2 market data dfs. So adding the necessary columns manually.
        "columns": cfibiask.get_bid_ask_columns_by_level(1)
        + ["asset_id", "full_symbol", "start_ts", "knowledge_timestamp"],
        "column_remap": None,
        "filter_data_mode": "assert",
    },
    "stitched_market_data": {
        "ts_col_name": "timestamp",
        "columns": None,
        "column_remap": None,
        # TODO(Grisha): check why it fails when the mode is `assert`.
        "filter_data_mode": "warn_and_trim",
    },
}
stitched_market_data_config = cconfig.Config().from_dict(
    stitched_market_data_config
)
print(stitched_market_data_config)

start_timestamp: 2023-09-11 00:00:00+00:00
end_timestamp: 2023-09-11 04:00:00+00:00
ohlcv_market_data: 
  im_client: 
    universe_version: v7.4
    root_dir: s3://cryptokaizen-data-test/v3
    partition_mode: by_year_month
    dataset: ohlcv
    contract_type: futures
    data_snapshot: 
    aws_profile: ck
    resample_1min: False
    version: v1_0_0
    download_universe_version: v7_3
    tag: downloaded_1min
  ts_col_name: timestamp
  columns: None
  column_remap: None
  filter_data_mode: assert
bid_ask_market_data: 
  im_client: 
    universe_version: v7.4
    dataset: bid_ask
    contract_type: futures
    data_snapshot: 
    root_dir: s3://cryptokaizen-data-test/v3
    partition_mode: by_year_month
    version: v1_0_0
    download_universe_version: v7
    tag: resampled_1min
    aws_profile: ck
  ts_col_name: timestamp
  columns: ['level_1.bid_price.open', 'level_1.bid_price.high', 'level_1.bid_price.low', 'level_1.bid_price.close', 'level_1.bid_price.mean', 'level_1.bid_size.op

In [10]:
ohlcv_im_client = icdcl.CcxtHistoricalPqByTileClient(
    **stitched_market_data_config["ohlcv_market_data"]["im_client"]
)
ohlcv_market_data = mdmadaex.get_HistoricalImClientMarketData_example1(
    ohlcv_im_client,
    asset_ids,
    stitched_market_data_config["ohlcv_market_data"]["columns"],
    stitched_market_data_config["ohlcv_market_data"]["column_remap"],
    filter_data_mode=stitched_market_data_config["ohlcv_market_data"][
        "filter_data_mode"
    ],
)
ohlcv_data = ohlcv_market_data.get_data_for_interval(
    stitched_market_data_config["start_timestamp"],
    stitched_market_data_config["end_timestamp"],
    stitched_market_data_config["ohlcv_market_data"]["ts_col_name"],
    asset_ids,
)
ohlcv_data.head(3)



Unnamed: 0_level_0,asset_id,full_symbol,open,high,low,close,volume,knowledge_timestamp,start_ts
end_ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-09-10 20:00:00-04:00,1467591036,binance::BTC_USDT,25829.0,25830.5,25828.4,25828.4,35.063,2023-09-11 01:23:22.496982+00:00,2023-09-10 19:59:00-04:00
2023-09-10 20:00:00-04:00,8968126878,binance::BNB_USDT,212.25,212.29,212.25,212.28,120.72,2023-09-11 01:23:28.294152+00:00,2023-09-10 19:59:00-04:00
2023-09-10 20:01:00-04:00,1467591036,binance::BTC_USDT,25828.4,25835.8,25828.1,25835.7,152.568,2023-09-12 01:23:44.163254+00:00,2023-09-10 20:00:00-04:00


In [11]:
bid_ask_im_client = icdcl.CcxtHistoricalPqByTileClient(
    **stitched_market_data_config["bid_ask_market_data"]["im_client"]
)
bid_ask_market_data = mdmadaex.get_HistoricalImClientMarketData_example1(
    bid_ask_im_client,
    asset_ids,
    stitched_market_data_config["bid_ask_market_data"]["columns"],
    stitched_market_data_config["bid_ask_market_data"]["column_remap"],
    filter_data_mode=stitched_market_data_config["bid_ask_market_data"][
        "filter_data_mode"
    ],
)

In [12]:
stitched_mdata = mdmadaex.get_HorizontalStitchedMarketData_example1(
    bid_ask_market_data,
    ohlcv_market_data,
    asset_ids,
    stitched_market_data_config["stitched_market_data"]["columns"],
    stitched_market_data_config["stitched_market_data"]["column_remap"],
    filter_data_mode=stitched_market_data_config["stitched_market_data"][
        "filter_data_mode"
    ],
)
stitched_mdata_df = stitched_mdata.get_data_for_interval(
    stitched_market_data_config["start_timestamp"],
    stitched_market_data_config["end_timestamp"],
    stitched_market_data_config["stitched_market_data"]["ts_col_name"],
    asset_ids,
)
stitched_mdata_df.head(3)

Unnamed: 0_level_0,asset_id,full_symbol,level_1.bid_price.open,level_1.bid_price.high,level_1.bid_price.low,level_1.bid_price.close,level_1.bid_price.mean,level_1.bid_size.open,level_1.bid_size.max,level_1.bid_size.min,level_1.bid_size.close,level_1.bid_size.mean,level_1.ask_price.open,level_1.ask_price.high,level_1.ask_price.low,level_1.ask_price.close,level_1.ask_price.mean,level_1.ask_size.open,level_1.ask_size.max,level_1.ask_size.min,level_1.ask_size.close,level_1.ask_size.mean,start_ts,open,high,low,close,volume,knowledge_timestamp
end_ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
2023-09-10 20:00:00-04:00,1467591036,binance::BTC_USDT,,,,,,,,,,,,,,,,,,,,,2023-09-10 19:59:00-04:00,25829.0,25830.5,25828.4,25828.4,35.063,2023-09-11 01:23:22.496982+00:00
2023-09-10 20:00:00-04:00,8968126878,binance::BNB_USDT,,,,,,,,,,,,,,,,,,,,,2023-09-10 19:59:00-04:00,212.25,212.29,212.25,212.28,120.72,2023-09-11 01:23:28.294152+00:00
2023-09-10 20:01:00-04:00,1467591036,binance::BTC_USDT,25828.4,25835.7,25828.4,25835.7,25833.086667,3.307,40.841,0.047,4.403,16.718008,25828.5,25835.8,25828.5,25835.8,25833.18875,9.038,24.754,0.005,10.631,6.13795,2023-09-10 20:00:00-04:00,25828.4,25835.8,25828.1,25835.7,152.568,2023-09-19 14:47:21.627940+00:00


# `ReplayedMarketData`

In [13]:
# TODO(Dan): Add reference code for `ReplayedMarketData`.