# Description


This notebook showcases locations and basic structure of raw data from:

- S3 (parquet datasets)
- IM DB (Postgres)

The secondary purpose is to provide a guide on how to use `RawDataReader`

## Specs
- This notebook:
  - is a catalog of all the datasets that exist in the our system
  - shows how to load data using our low-level functions or specific API for specific datasets
  - shows how a snippet of the data looks like (for this we want to load the minimal amount of data)
  - doesn't compute any statistics
  - should be quickly to execute, like < 1min, so we can run it in the unit tests

## Life cycle
- Any time a new dataset is added (e.g., in real-time DB, Parquet) we add some information on how to load it and how it looks like
- In general we try not to delete any data but we only add data loaders

## Monster dataset matrix spreadsheet

The gallery should match 1-to-1 with the dataset matrix

https://docs.google.com/spreadsheets/d/1aN2TBTtDqX5itnlG70lS2otkKCHKPN2yE_Hu3JPhPVo/edit#gid=0

# Imports

In [1]:
import datetime
import logging

import pandas as pd

import helpers.hdbg as hdbg
import helpers.henv as henv
import helpers.hpandas as hpandas
import helpers.hprint as hprint
import im_v2.common.data.client.im_raw_data_client as imvcdcimrdc

  from tqdm.autonotebook import tqdm


In [2]:
hdbg.init_logger(verbosity=logging.INFO)
log_level = logging.INFO

_LOG = logging.getLogger(__name__)

_LOG.info("%s", henv.get_system_signature()[0])

hprint.config_notebook()

[0m[36mINFO[0m: > cmd='/venv/lib/python3.9/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-164b1e6c-92c2-4758-a2f4-9051d5477e89.json'
INFO  # Git
  branch_name='CmampTask7077_Add_OKX_dataset_to_the_dataset_matrix_and_data_gallery'
  hash='f3647e605'
  # Last commits:
    * f3647e605 Sameep Pote CmTask7044 Backup reconcile dir if it exists 2 (#7067)            (18 minutes ago) Tue Feb 6 08:40:22 2024  (HEAD -> CmampTask7077_Add_OKX_dataset_to_the_dataset_matrix_and_data_gallery, origin/master, origin/HEAD)
    * 26ac32c86 Nina Lee CmTask6284_lint_files (#7095)                                     (  11 hours ago) Mon Feb 5 21:53:15 2024           
    * afcbe5684 Toma Jordania CmTask7078: fix run_notebooks (#7084)                             (  17 hours ago) Mon Feb 5 15:33:30 2024           
# Machine info
  system=Linux
  node name=a4f1759c3b24
  release=5.15.0-1051-aws
  version=#56~20.04.1-Ubuntu SMP Tue Nov 28 15:43:31 UTC 2023
  machine=x86_64
  

# Realtime (the DB data)

## realtime.airflow.resampled_1min.postgres.bid_ask.futures.v7.ccxt.binance.v1_0_0

In [3]:
signature = "realtime.airflow.resampled_1min.postgres.bid_ask.futures.v7.ccxt.binance.v1_0_0"
reader = imvcdcimrdc.RawDataReader(signature)
data = reader.read_data_head()
_LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Unable to fetch DB credentials from environment variables: 
	'POSTGRES_HOST'
	Attempting env file method.
INFO  Unable to fetch DB credentials from env file: 
	
################################################################################
* Failed assertion *
File '/app/amp/im_v2/devops/env/prod.im_db_config.env' doesn't exist
################################################################################

	Attempting AWS SecretsManager method.
INFO  Fetching secret: prod.im_data_db.read_only
INFO  Created prod DB connection: 
 None
INFO  Enabled connection to the `ccxt_bid_ask_futures_resampled_1min` DB table
INFO  Enabled connection to the `ccxt_bid_ask_futures_resampled_1min` DB table
INFO  Exe

  df = pd.read_sql_query(query, connection)


Unnamed: 0,timestamp,bid_size,bid_price,ask_size,ask_price,currency_pair,exchange_id,level,end_download_timestamp,knowledge_timestamp
0.0,1702274460000,20500.4,7.268303,38345.05,7.27911,AXS_USDT,binance,6,,2023-12-11 06:01:20.996784+00:00
1.0,1702274460000,18011.433333,7.272497,16529.066667,7.275167,AXS_USDT,binance,2,,2023-12-11 06:01:20.996784+00:00
2.0,1702274460000,23583.083333,7.265144,43114.433333,7.28215,AXS_USDT,binance,9,,2023-12-11 06:01:20.996784+00:00
,...,...,...,...,...,...,...,...,...,...
7.0,1702274460000,9551.366667,7.273376,7092.2,7.274042,AXS_USDT,binance,1,,2023-12-11 06:01:20.996784+00:00
8.0,1702274460000,25538.1,7.266469,39129.266667,7.281209,AXS_USDT,binance,8,,2023-12-11 06:01:20.996784+00:00
9.0,1702274460000,12207.418333,2.29848,5977.248333,2.300519,WAVES_USDT,binance,10,,2023-12-11 06:01:20.996784+00:00


INFO  None


## realtime.airflow.downloaded_200ms.postgres.bid_ask.futures.v7.ccxt.binance.v1_0_0

In [4]:
signature = "realtime.airflow.downloaded_200ms.postgres.bid_ask.futures.v7.ccxt.binance.v1_0_0"
reader = imvcdcimrdc.RawDataReader(signature)
data = reader.read_data_head()
_LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Enabled connection to the `ccxt_bid_ask_futures_raw` DB table
INFO  Enabled connection to the `ccxt_bid_ask_futures_raw` DB table
INFO  Executing query: 
	SELECT * FROM ccxt_bid_ask_futures_raw WHERE exchange_id = 'binance' ORDER BY timestamp DESC LIMIT 10


  df = pd.read_sql_query(query, connection)


Unnamed: 0,timestamp,bid_size,bid_price,ask_size,ask_price,currency_pair,exchange_id,level,end_download_timestamp,knowledge_timestamp


INFO  None


## realtime.airflow.downloaded_200ms.postgres.bid_ask.futures.v7_6.ccxt.okx.v1_0_0

In [5]:
# This works with stage 'preprod'
signature = "realtime.airflow.downloaded_200ms.postgres.bid_ask.futures.v7_6.ccxt.okx.v1_0_0"
reader = imvcdcimrdc.RawDataReader(signature)
data = reader.read_data_head()
_LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Enabled connection to the `ccxt_bid_ask_futures_raw` DB table
INFO  Enabled connection to the `ccxt_bid_ask_futures_raw` DB table
INFO  Executing query: 
	SELECT * FROM ccxt_bid_ask_futures_raw WHERE exchange_id = 'okx' ORDER BY timestamp DESC LIMIT 10


  df = pd.read_sql_query(query, connection)


Unnamed: 0,timestamp,bid_size,bid_price,ask_size,ask_price,currency_pair,exchange_id,level,end_download_timestamp,knowledge_timestamp


INFO  None


## realtime.airflow.downloaded_1min.postgres.ohlcv.futures.v7.ccxt.binance.v1_0_0

In [6]:
signature = "realtime.airflow.downloaded_1min.postgres.ohlcv.futures.v7.ccxt.binance.v1_0_0"
reader = imvcdcimrdc.RawDataReader(signature)
data = reader.read_data_head()
_LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Enabled connection to the `ccxt_ohlcv_futures` DB table
INFO  Enabled connection to the `ccxt_ohlcv_futures` DB table
INFO  Executing query: 
	SELECT * FROM ccxt_ohlcv_futures WHERE exchange_id = 'binance' ORDER BY timestamp DESC LIMIT 10


  df = pd.read_sql_query(query, connection)


Unnamed: 0,timestamp,open,high,low,close,volume,currency_pair,exchange_id,end_download_timestamp,knowledge_timestamp
0.0,1702980240000,0.744,0.7458,0.744,0.7456,8646.0,CTK_USDT,binance,2023-12-19 10:05:10.113941+00:00,2023-12-19 10:05:10.131729+00:00
1.0,1702980240000,74.834,74.849,74.762,74.784,6153.0,SOL_USDT,binance,2023-12-19 10:05:10.113476+00:00,2023-12-19 10:05:10.131729+00:00
2.0,1702980240000,0.09243,0.09248,0.09242,0.09245,1576605.0,DOGE_USDT,binance,2023-12-19 10:05:10.112997+00:00,2023-12-19 10:05:10.131729+00:00
,...,...,...,...,...,...,...,...,...,...
7.0,1702980240000,2.389,2.396,2.387,2.392,108273.0,NEAR_USDT,binance,2023-12-19 10:05:10.110538+00:00,2023-12-19 10:05:10.131729+00:00
8.0,1702980240000,0.2612,0.2612,0.2589,0.2591,1497995.0,BAKE_USDT,binance,2023-12-19 10:05:10.110098+00:00,2023-12-19 10:05:10.131729+00:00
9.0,1702980240000,2244.74,2244.74,2243.41,2243.95,1427.707,ETH_USDT,binance,2023-12-19 10:05:10.101885+00:00,2023-12-19 10:05:10.131729+00:00


INFO  None


## realtime.airflow.downloaded_1min.postgres.ohlcv.futures.v7_6.ccxt.okx.v1_0_0

In [7]:
# This works with stage 'preprod'
signature = "realtime.airflow.downloaded_1min.postgres.ohlcv.futures.v7_6.ccxt.okx.v1_0_0"
reader = imvcdcimrdc.RawDataReader(signature)
data = reader.read_data_head()
_LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Enabled connection to the `ccxt_ohlcv_futures` DB table
INFO  Enabled connection to the `ccxt_ohlcv_futures` DB table
INFO  Executing query: 
	SELECT * FROM ccxt_ohlcv_futures WHERE exchange_id = 'okx' ORDER BY timestamp DESC LIMIT 10


  df = pd.read_sql_query(query, connection)


Unnamed: 0,timestamp,open,high,low,close,volume,currency_pair,exchange_id,end_download_timestamp,knowledge_timestamp


INFO  None


# Historical (data updated daily)

## periodic_daily.airflow.downloaded_1min.csv.ohlcv.futures.v7.ccxt.binance.v1_0_0

In [8]:
#  the dataset reside under previous, deprecated schema:
# s3://cryptokaizen-data/reorg/daily_staged.airflow.pq/

# signature = "periodic_daily.airflow.downloaded_1min.csv.ohlcv.futures.v7.ccxt.binance.v1_0_0"
# reader = imvcdcimrdc.RawDataReader(signature)
# data = reader.read_data_head()
# _LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

## periodic_daily.airflow.downloaded_1min.parquet.ohlcv.futures.v7.ccxt.binance.v1_0_0

In [9]:
signature = "periodic_daily.airflow.downloaded_1min.parquet.ohlcv.futures.v7.ccxt.binance.v1_0_0"
reader = imvcdcimrdc.RawDataReader(signature)
data = reader.read_data_head()
_LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading the data from `s3://cryptokaizen-data/v3/periodic_daily/airflow/downloaded_1min/parquet/ohlcv/futures/v7/ccxt/binance/v1_0_0`


Unnamed: 0,timestamp,open,high,low,close,volume,exchange_id,knowledge_timestamp,currency_pair,year,month
2023-12-19 00:00:00+00:00,1702944000000,0.499,0.4995,0.4981,0.4982,403860.0,binance,2023-12-20 00:25:23.340503+00:00,SAND_USDT,2023,12
2023-12-19 00:01:00+00:00,1702944060000,0.4983,0.4984,0.4974,0.4975,127231.0,binance,2023-12-20 00:25:23.340503+00:00,SAND_USDT,2023,12
2023-12-19 00:02:00+00:00,1702944120000,0.4974,0.4977,0.497,0.4976,91335.0,binance,2023-12-20 00:25:23.340503+00:00,SAND_USDT,2023,12
,...,...,...,...,...,...,...,...,...,...,...
2023-12-19 00:07:00+00:00,1702944420000,0.4956,0.4957,0.495,0.4957,95430.0,binance,2023-12-20 00:25:23.340503+00:00,SAND_USDT,2023,12
2023-12-19 00:08:00+00:00,1702944480000,0.4958,0.4972,0.4958,0.4971,297091.0,binance,2023-12-20 00:25:23.340503+00:00,SAND_USDT,2023,12
2023-12-19 00:09:00+00:00,1702944540000,0.4972,0.4975,0.4968,0.4975,83471.0,binance,2023-12-20 00:25:23.340503+00:00,SAND_USDT,2023,12


INFO  None


## periodic_daily.airflow.downloaded_1min.parquet.ohlcv.spot.v7.ccxt.binance.v1_0_0

In [10]:
signature = "periodic_daily.airflow.downloaded_1min.parquet.ohlcv.futures.v7.ccxt.binance.v1_0_0"
reader = imvcdcimrdc.RawDataReader(signature)
data = reader.read_data_head()
_LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading the data from `s3://cryptokaizen-data/v3/periodic_daily/airflow/downloaded_1min/parquet/ohlcv/futures/v7/ccxt/binance/v1_0_0`


Unnamed: 0,timestamp,open,high,low,close,volume,exchange_id,knowledge_timestamp,currency_pair,year,month
2023-12-19 00:00:00+00:00,1702944000000,0.499,0.4995,0.4981,0.4982,403860.0,binance,2023-12-20 00:25:23.340503+00:00,SAND_USDT,2023,12
2023-12-19 00:01:00+00:00,1702944060000,0.4983,0.4984,0.4974,0.4975,127231.0,binance,2023-12-20 00:25:23.340503+00:00,SAND_USDT,2023,12
2023-12-19 00:02:00+00:00,1702944120000,0.4974,0.4977,0.497,0.4976,91335.0,binance,2023-12-20 00:25:23.340503+00:00,SAND_USDT,2023,12
,...,...,...,...,...,...,...,...,...,...,...
2023-12-19 00:07:00+00:00,1702944420000,0.4956,0.4957,0.495,0.4957,95430.0,binance,2023-12-20 00:25:23.340503+00:00,SAND_USDT,2023,12
2023-12-19 00:08:00+00:00,1702944480000,0.4958,0.4972,0.4958,0.4971,297091.0,binance,2023-12-20 00:25:23.340503+00:00,SAND_USDT,2023,12
2023-12-19 00:09:00+00:00,1702944540000,0.4972,0.4975,0.4968,0.4975,83471.0,binance,2023-12-20 00:25:23.340503+00:00,SAND_USDT,2023,12


INFO  None


## periodic_daily.airflow.downloaded_1min.parquet.ohlcv.spot.v7.ccxt.binanceus.v1_0_0

In [11]:
signature = "periodic_daily.airflow.downloaded_1min.parquet.ohlcv.spot.v7.ccxt.binanceus.v1_0_0"
reader = imvcdcimrdc.RawDataReader(signature)
data = reader.read_data_head()
_LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading the data from `s3://cryptokaizen-data/v3/periodic_daily/airflow/downloaded_1min/parquet/ohlcv/spot/v7/ccxt/binanceus/v1_0_0`


Unnamed: 0,timestamp,open,high,low,close,volume,exchange_id,knowledge_timestamp,currency_pair,year,month
2023-12-19 00:00:00+00:00,1702944000000,0.6127,0.6127,0.6127,0.6127,0.0,binanceus,2023-12-20 00:17:44.396895+00:00,XRP_USDT,2023,12
2023-12-19 00:01:00+00:00,1702944060000,0.6127,0.6127,0.6127,0.6127,0.0,binanceus,2023-12-20 00:17:44.396895+00:00,XRP_USDT,2023,12
2023-12-19 00:02:00+00:00,1702944120000,0.6127,0.6127,0.6127,0.6127,0.0,binanceus,2023-12-20 00:17:44.396895+00:00,XRP_USDT,2023,12
,...,...,...,...,...,...,...,...,...,...,...
2023-12-19 00:07:00+00:00,1702944420000,0.6108,0.6108,0.6108,0.6108,0.0,binanceus,2023-12-20 00:17:44.396895+00:00,XRP_USDT,2023,12
2023-12-19 00:08:00+00:00,1702944480000,0.6108,0.6108,0.6108,0.6108,0.0,binanceus,2023-12-20 00:17:44.396895+00:00,XRP_USDT,2023,12
2023-12-19 00:09:00+00:00,1702944540000,0.6108,0.6108,0.6108,0.6108,0.0,binanceus,2023-12-20 00:17:44.396895+00:00,XRP_USDT,2023,12


INFO  None


## periodic_daily.airflow.downloaded_1min.parquet.ohlcv.futures.v3.crypto_chassis.binance.v1_0_0

In [12]:
signature = "periodic_daily.airflow.downloaded_1min.parquet.ohlcv.futures.v3.crypto_chassis.binance.v1_0_0"
reader = imvcdcimrdc.RawDataReader(signature)
data = reader.read_data_head()
_LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading the data from `s3://cryptokaizen-data/v3/periodic_daily/airflow/downloaded_1min/parquet/ohlcv/futures/v3/crypto_chassis/binance/v1_0_0`


Unnamed: 0,timestamp,open,high,low,close,volume,vwap,number_of_trades,twap,exchange_id,knowledge_timestamp,currency_pair,year,month
2023-03-01 00:00:00+00:00,1677628800,0.3761,0.3762,0.3758,0.3759,1940393.5,0.37596,152,0.375929,binance,2023-03-02 00:17:09.917800+00:00,XRP_USDT,2023,3
2023-03-01 00:01:00+00:00,1677628860,0.3758,0.3761,0.3757,0.3759,1025109.2,0.375924,126,0.375914,binance,2023-03-02 00:17:09.917800+00:00,XRP_USDT,2023,3
2023-03-01 00:02:00+00:00,1677628920,0.376,0.376,0.3758,0.3759,800839.5,0.375923,84,0.375885,binance,2023-03-02 00:17:09.917800+00:00,XRP_USDT,2023,3
,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-01 00:07:00+00:00,1677629220,0.3755,0.3756,0.3752,0.3754,836788.0,0.375344,84,0.37536,binance,2023-03-02 00:17:09.917800+00:00,XRP_USDT,2023,3
2023-03-01 00:08:00+00:00,1677629280,0.3754,0.3756,0.3751,0.3752,1099763.6,0.375405,73,0.375284,binance,2023-03-02 00:17:09.917800+00:00,XRP_USDT,2023,3
2023-03-01 00:09:00+00:00,1677629340,0.3751,0.3753,0.3751,0.3752,490412.7,0.375221,76,0.375217,binance,2023-03-02 00:17:09.917800+00:00,XRP_USDT,2023,3


INFO  None


## periodic_daily.airflow.downloaded_1sec.parquet.bid_ask.futures.v3.crypto_chassis.binance.v1_0_0

In [13]:
signature = "periodic_daily.airflow.downloaded_1sec.parquet.bid_ask.futures.v3.crypto_chassis.binance.v1_0_0"
reader = imvcdcimrdc.RawDataReader(signature)
data = reader.read_data_head()
_LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading the data from `s3://cryptokaizen-data/v3/periodic_daily/airflow/downloaded_1sec/parquet/bid_ask/futures/v3/crypto_chassis/binance/v1_0_0`


Unnamed: 0,timestamp,bid_price_l1,bid_size_l1,bid_price_l2,bid_size_l2,bid_price_l3,bid_size_l3,bid_price_l4,bid_size_l4,bid_price_l5,bid_size_l5,bid_price_l6,bid_size_l6,bid_price_l7,bid_size_l7,bid_price_l8,bid_size_l8,bid_price_l9,bid_size_l9,bid_price_l10,bid_size_l10,ask_price_l1,ask_size_l1,ask_price_l2,ask_size_l2,ask_price_l3,ask_size_l3,ask_price_l4,ask_size_l4,ask_price_l5,ask_size_l5,ask_price_l6,ask_size_l6,ask_price_l7,ask_size_l7,ask_price_l8,ask_size_l8,ask_price_l9,ask_size_l9,ask_price_l10,ask_size_l10,exchange_id,knowledge_timestamp,currency_pair,year,month
2023-05-25 00:00:00+00:00,1684972800,0.4527,94582.1,0.4526,238532.7,0.4525,318108.7,0.4524,234131.4,0.4523,244834.4,0.4522,287343.1,0.4521,209828.7,0.452,450113.4,0.4519,224532.8,0.4518,137232.7,0.4528,195813.7,0.4529,247508.9,0.453,389605.5,0.4531,274644.0,0.4532,327111.9,0.4533,104264.4,0.4534,327910.9,0.4535,291567.2,0.4536,273069.0,0.4537,125406.9,binance,2023-05-26 11:02:56.333619+00:00,XRP_USDT,2023,5
2023-05-25 00:00:01+00:00,1684972801,0.4527,96417.5,0.4526,215962.9,0.4525,314114.8,0.4524,181673.5,0.4523,219119.8,0.4522,223983.8,0.4521,257760.4,0.452,406923.9,0.4519,225980.7,0.4518,88439.1,0.4528,192489.1,0.4529,246653.8,0.453,352176.1,0.4531,225902.9,0.4532,271303.9,0.4533,112338.9,0.4534,278927.7,0.4535,241481.3,0.4536,292041.6,0.4537,142539.8,binance,2023-05-26 11:02:56.333619+00:00,XRP_USDT,2023,5
2023-05-25 00:00:04+00:00,1684972804,0.4527,95843.3,0.4526,215962.9,0.4525,314114.8,0.4524,181673.5,0.4523,219119.8,0.4522,222884.7,0.4521,213522.1,0.452,406923.9,0.4519,225980.7,0.4518,88439.1,0.4528,192489.1,0.4529,246653.8,0.453,352176.1,0.4531,225902.9,0.4532,271303.9,0.4533,112338.9,0.4534,278927.7,0.4535,241481.3,0.4536,292041.6,0.4537,142539.8,binance,2023-05-26 11:02:56.333619+00:00,XRP_USDT,2023,5
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-25 00:00:10+00:00,1684972810,0.4526,182406.9,0.4525,94436.5,0.4524,356446.9,0.4523,144691.8,0.4522,217475.4,0.4521,300742.8,0.452,317142.6,0.4519,221717.9,0.4518,207148.4,0.4517,348817.9,0.4527,67402.8,0.4528,211817.5,0.4529,156633.7,0.453,284660.3,0.4531,284074.9,0.4532,390195.2,0.4533,124777.4,0.4534,329753.2,0.4535,293339.2,0.4536,165001.8,binance,2023-05-26 11:02:56.333619+00:00,XRP_USDT,2023,5
2023-05-25 00:00:11+00:00,1684972811,0.4524,87761.2,0.4523,285138.7,0.4522,277054.0,0.4521,251072.5,0.452,313119.2,0.4519,230758.2,0.4518,244251.5,0.4517,469503.3,0.4516,255341.8,0.4515,371912.7,0.4525,47356.7,0.4526,148782.9,0.4527,137532.2,0.4528,291235.5,0.4529,366460.4,0.453,218088.8,0.4531,159114.7,0.4532,236985.0,0.4533,132626.4,0.4534,318109.7,binance,2023-05-26 11:02:56.333619+00:00,XRP_USDT,2023,5
2023-05-25 00:00:12+00:00,1684972812,0.4524,106484.7,0.4523,243601.8,0.4522,306354.8,0.4521,217339.3,0.452,301149.7,0.4519,229513.4,0.4518,323599.6,0.4517,485389.8,0.4516,250579.2,0.4515,357326.0,0.4525,106970.1,0.4526,182159.2,0.4527,134391.9,0.4528,385906.2,0.4529,303583.6,0.453,202247.9,0.4531,166568.1,0.4532,241499.0,0.4533,150186.6,0.4534,265674.0,binance,2023-05-26 11:02:56.333619+00:00,XRP_USDT,2023,5


INFO  None


## periodic_daily.airflow.resampled_1min.parquet.bid_ask.futures.v3.crypto_chassis.binance.v1_0_0

In [14]:
signature = "periodic_daily.airflow.resampled_1min.parquet.bid_ask.futures.v3.crypto_chassis.binance.v1_0_0"
reader = imvcdcimrdc.RawDataReader(signature)
data = reader.read_data_head()
_LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading the data from `s3://cryptokaizen-data/v3/periodic_daily/airflow/resampled_1min/parquet/bid_ask/futures/v3/crypto_chassis/binance/v1_0_0`


Unnamed: 0,timestamp,bid_price_l1,bid_size_l1,ask_price_l1,ask_size_l1,bid_price_l2,bid_size_l2,ask_price_l2,ask_size_l2,bid_price_l3,bid_size_l3,ask_price_l3,ask_size_l3,bid_price_l4,bid_size_l4,ask_price_l4,ask_size_l4,bid_price_l5,bid_size_l5,ask_price_l5,ask_size_l5,bid_price_l6,bid_size_l6,ask_price_l6,ask_size_l6,bid_price_l7,bid_size_l7,ask_price_l7,ask_size_l7,bid_price_l8,bid_size_l8,ask_price_l8,ask_size_l8,bid_price_l9,bid_size_l9,ask_price_l9,ask_size_l9,bid_price_l10,bid_size_l10,ask_price_l10,ask_size_l10,exchange_id,knowledge_timestamp,currency_pair,year,month
2023-05-01 00:00:00+00:00,1682899200,0.079467,14402045.0,0.079466,16416679.0,0.079452,17415718.0,0.079474,18163641.0,0.079446,25863738.0,0.079499,25072570.0,0.079435,34349488.0,0.079513,40708428.0,0.07943,42400632.0,0.07952,45824766.0,0.07941,40020922.0,0.079532,50353128.0,0.079399,34533506.0,0.079534,42839719.0,0.079387,43974278.0,0.07954,44790974.0,0.079385,56132153.0,0.079549,36367318.0,0.079379,69440354.0,0.079554,34693305.0,binance,2023-05-01 11:06:08.004309+00:00,DOGE_USDT,2023,5
2023-05-01 00:01:00+00:00,1682899260,0.079418,11216503.0,0.07943,15358128.0,0.079403,17882854.0,0.07943,22848786.0,0.079392,24342427.0,0.079435,28709878.0,0.079379,30858540.0,0.079448,36879665.0,0.079368,34065090.0,0.079463,41894186.0,0.079357,32033015.0,0.079473,39618126.0,0.079349,36815418.0,0.079477,40893607.0,0.079339,37739930.0,0.079487,36630803.0,0.079328,46825390.0,0.079499,39561209.0,0.079319,48302668.0,0.079498,41985107.0,binance,2023-05-02 11:06:23.772871+00:00,DOGE_USDT,2023,5
2023-05-01 00:02:00+00:00,1682899320,0.079424,14576594.0,0.079455,20149840.0,0.079416,21185749.0,0.079459,25174929.0,0.079417,28079831.0,0.079466,28325292.0,0.079408,37954006.0,0.079467,36430946.0,0.079399,40201744.0,0.079482,36879835.0,0.079384,34294571.0,0.079499,40039127.0,0.079373,29766726.0,0.079507,42584890.0,0.079365,38356351.0,0.079521,34304860.0,0.07936,45870289.0,0.079537,39670140.0,0.079344,55694648.0,0.079531,45841128.0,binance,2023-05-02 11:06:23.772871+00:00,DOGE_USDT,2023,5
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-01 00:07:00+00:00,1682899620,0.07958,12615047.0,0.079587,18099163.0,0.079567,25561157.0,0.079596,26694606.0,0.079556,24743497.0,0.079607,36911395.0,0.079548,31338251.0,0.079616,37072240.0,0.079538,33503402.0,0.079626,47253798.0,0.079526,40869029.0,0.079637,51582941.0,0.079515,33783846.0,0.079648,50281779.0,0.079505,42392683.0,0.079656,44874584.0,0.079496,42794353.0,0.079666,58335305.0,0.079485,66989644.0,0.079677,77216007.0,binance,2023-05-02 11:06:23.772871+00:00,DOGE_USDT,2023,5
2023-05-01 00:08:00+00:00,1682899680,0.079566,16237705.0,0.079581,22035762.0,0.079556,22896789.0,0.079592,29821508.0,0.079548,28452861.0,0.079603,29324122.0,0.079537,33566104.0,0.079611,36339891.0,0.079531,38396723.0,0.07962,52153382.0,0.079522,35269517.0,0.079631,50193944.0,0.079508,35945070.0,0.079639,55440717.0,0.079497,42695376.0,0.07965,53306652.0,0.079485,49711997.0,0.079659,52400667.0,0.079477,59593645.0,0.079672,58321467.0,binance,2023-05-02 11:06:23.772871+00:00,DOGE_USDT,2023,5
2023-05-01 00:09:00+00:00,1682899740,0.079581,15293988.0,0.079596,14880004.0,0.079575,13956723.0,0.079605,22855460.0,0.079559,29367299.0,0.079614,28880096.0,0.079551,37994404.0,0.079627,40193930.0,0.079543,46850310.0,0.079635,48779908.0,0.079536,42570843.0,0.079641,39459051.0,0.079524,40087865.0,0.079654,47431217.0,0.079511,47904113.0,0.079662,42987565.0,0.079501,39986009.0,0.079672,42711318.0,0.079491,51553976.0,0.079686,50535998.0,binance,2023-05-02 11:06:23.772871+00:00,DOGE_USDT,2023,5


INFO  None


## periodic_daily.airflow.downloaded_1sec.parquet.bid_ask.spot.v3.crypto_chassis.binance.v1_0_0

In [15]:
# The dataset reside under previous schema:
# s3://cryptokaizen-data/reorg/daily_staged.airflow.pq/

# TODO(Juraj): Spot bid ask spot data are not collected currently
# signature = "periodic_daily.airflow.downloaded_1sec.parquet.bid_ask.spot.v3.crypto_chassis.binance.v1_0_0"
# reader = imvcdcimrdc.RawDataReader(signature)
# data = reader.read_data_head()
# _LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

## periodic_daily.airflow.resampled_1min.parquet.bid_ask.spot.v3.crypto_chassis.binance.v1_0_0

In [16]:
# The dataset reside under previous schema:
# s3://cryptokaizen-data/reorg/daily_staged.airflow.pq/

# TODO(Juraj): Spot bid ask spot data are not collected currently
# signature = "periodic_daily.airflow.downloaded_1sec.parquet.bid_ask.spot.v3.crypto_chassis.binance.v1_0_0"
# reader = imvcdcimrdc.RawDataReader(signature)
# data = reader.read_data_head()
# _LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

## periodic_daily.airflow.downloaded_1min.parquet.ohlcv.spot.v3.crypto_chassis.binance.v1_0_0

In [17]:
signature = "periodic_daily.airflow.downloaded_1min.parquet.ohlcv.spot.v3.crypto_chassis.binance.v1_0_0"
reader = imvcdcimrdc.RawDataReader(signature)
data = reader.read_data_head()
_LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading the data from `s3://cryptokaizen-data/v3/periodic_daily/airflow/downloaded_1min/parquet/ohlcv/spot/v3/crypto_chassis/binance/v1_0_0`


Unnamed: 0,timestamp,open,high,low,close,volume,vwap,number_of_trades,twap,exchange_id,knowledge_timestamp,currency_pair,year,month
2022-10-01 00:00:00+00:00,1664582400,0.4346,0.4347,0.4341,0.4341,28292.6,0.434331,31,0.434419,binance,2022-10-02 00:16:43.552325+00:00,ADA_USDT,2022,10
2022-10-01 00:01:00+00:00,1664582460,0.434,0.4341,0.4339,0.4341,51737.3,0.433961,15,0.43394,binance,2022-10-02 00:16:43.552325+00:00,ADA_USDT,2022,10
2022-10-01 00:02:00+00:00,1664582520,0.4341,0.4343,0.4341,0.4342,38364.8,0.43419,11,0.434191,binance,2022-10-02 00:16:43.552325+00:00,ADA_USDT,2022,10
,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-10-01 00:07:00+00:00,1664582820,0.4352,0.4353,0.4352,0.4352,9646.3,0.435264,9,0.435256,binance,2022-10-02 00:16:43.552325+00:00,ADA_USDT,2022,10
2022-10-01 00:08:00+00:00,1664582880,0.4353,0.4355,0.4353,0.4354,41853.8,0.435395,27,0.435448,binance,2022-10-02 00:16:43.552325+00:00,ADA_USDT,2022,10
2022-10-01 00:09:00+00:00,1664582940,0.4354,0.4358,0.4354,0.4356,251143.6,0.435609,31,0.435597,binance,2022-10-02 00:16:43.552325+00:00,ADA_USDT,2022,10


INFO  None


## periodic_daily.airflow.downloaded_1sec.parquet.trades.futures.v3_1.crypto_chassis.binance.v1_0_0

In [18]:
signature = "periodic_daily.airflow.downloaded_1sec.parquet.trades.futures.v3_1.crypto_chassis.binance.v1_0_0"
reader = imvcdcimrdc.RawDataReader(signature)
data = reader.read_data_head()
_LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading the data from `s3://cryptokaizen-data/v3/periodic_daily/airflow/downloaded_1sec/parquet/trades/futures/v3_1/crypto_chassis/binance/v1_0_0`


Unnamed: 0,timestamp,price,size,is_buyer_maker,exchange_id,knowledge_timestamp,currency_pair,year,month
2023-05-25 21:57:10+00:00,1685051830,0.4524,50.0,1,binance,2023-05-26 10:35:00.030731+00:00,XRP_USDT,2023,5
2023-05-25 21:57:11+00:00,1685051831,0.4525,217.3,0,binance,2023-05-26 10:35:00.030731+00:00,XRP_USDT,2023,5
2023-05-25 21:57:12+00:00,1685051832,0.4524,1115.6,1,binance,2023-05-26 10:35:00.030731+00:00,XRP_USDT,2023,5
,...,...,...,...,...,...,...,...,...
2023-05-25 21:57:17+00:00,1685051837,0.4525,187.4,0,binance,2023-05-26 10:35:00.030731+00:00,XRP_USDT,2023,5
2023-05-25 21:57:19+00:00,1685051839,0.4524,50.0,1,binance,2023-05-26 10:35:00.030731+00:00,XRP_USDT,2023,5
2023-05-25 21:57:20+00:00,1685051840,0.4525,13.0,0,binance,2023-05-26 10:35:00.030731+00:00,XRP_USDT,2023,5


INFO  None


## periodic_daily.airflow.downloaded_1min.parquet.ohlcv.futures.v7_3.ccxt.okx.v1_0_0

In [19]:
signature = "periodic_daily.airflow.downloaded_1min.parquet.ohlcv.futures.v7_3.ccxt.okx.v1_0_0"
reader = imvcdcimrdc.RawDataReader(signature)
data = reader.read_data_head()
_LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading the data from `s3://cryptokaizen-data/v3/periodic_daily/airflow/downloaded_1min/parquet/ohlcv/futures/v7_3/ccxt/okx/v1_0_0`


Unnamed: 0,timestamp,open,high,low,close,volume,exchange_id,knowledge_timestamp,currency_pair,year,month
2023-12-19 15:00:00+00:00,1702998000000,73.14,73.25,73.03,73.03,3369.515674,okx,2023-12-20 00:17:32.064560+00:00,SOL_USDT,2023,12
2023-12-19 15:01:00+00:00,1702998060000,73.06,73.08,72.97,72.98,1206.250574,okx,2023-12-20 00:17:32.064560+00:00,SOL_USDT,2023,12
2023-12-19 15:02:00+00:00,1702998120000,72.98,72.98,72.82,72.9,1920.901061,okx,2023-12-20 00:17:32.064560+00:00,SOL_USDT,2023,12
,...,...,...,...,...,...,...,...,...,...,...
2023-12-19 15:07:00+00:00,1702998420000,73.43,73.62,73.3,73.56,2228.349973,okx,2023-12-20 00:17:32.064560+00:00,SOL_USDT,2023,12
2023-12-19 15:08:00+00:00,1702998480000,73.55,73.62,73.54,73.56,1348.934377,okx,2023-12-20 00:17:32.064560+00:00,SOL_USDT,2023,12
2023-12-19 15:09:00+00:00,1702998540000,73.57,73.73,73.57,73.7,1390.305672,okx,2023-12-20 00:17:32.064560+00:00,SOL_USDT,2023,12


INFO  None


## bulk.airflow.downloaded_1min.parquet.ohlcv.futures.v7_5.ccxt.binance.v1_0_0

_This dataset is in the test stage only_

In [20]:
signature = "bulk.airflow.downloaded_1min.parquet.ohlcv.futures.v7_5.ccxt.binance.v1_0_0"
reader = imvcdcimrdc.RawDataReader(signature, stage="test")
# 4 months of data is available.
start_timestamp = pd.Timestamp("2023-02-01T00:00:00+00:00")
end_timestamp = pd.Timestamp("2023-06-01T00:00:00+00:00")
binance_ohlcv_data = reader.read_data(start_timestamp, end_timestamp)
_LOG.log(log_level, hpandas.df_to_str(binance_ohlcv_data.head(), log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3


Unnamed: 0_level_0,timestamp,open,high,low,close,volume,exchange_id,knowledge_timestamp,currency_pair,year,month
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-02-01 00:00:00+00:00,1675209600000,0.5141,0.5143,0.5137,0.5142,59009.0,binance,2023-06-20 17:19:02.877891+00:00,1INCH_USDT,2023,2
2023-02-01 00:01:00+00:00,1675209660000,0.5142,0.5143,0.514,0.514,26966.0,binance,2023-06-20 17:19:02.877891+00:00,1INCH_USDT,2023,2
2023-02-01 00:02:00+00:00,1675209720000,0.5141,0.5157,0.5141,0.5154,49452.0,binance,2023-06-20 17:19:02.877891+00:00,1INCH_USDT,2023,2
2023-02-01 00:03:00+00:00,1675209780000,0.5154,0.5162,0.5153,0.5157,60371.0,binance,2023-06-20 17:19:02.877891+00:00,1INCH_USDT,2023,2
2023-02-01 00:04:00+00:00,1675209840000,0.5155,0.5157,0.5147,0.5148,24003.0,binance,2023-06-20 17:19:02.877891+00:00,1INCH_USDT,2023,2


INFO  None


# Archived data (data transferred from IM DB to postgres)

TODO(Juraj): #CmTask3376 Update once the support for archive data has been added to the `RawDataReader`

- So far only single dataset stored in s3://cryptokaizen-data/db_archive/prod/ccxt_bid_ask_futures_raw/timestamp/
   - can be retrieved using `hparquet.from_parquet`
   - be aware of the large footprint of the dataset

# RawDataReader Guide

## Loading parquet data with filters

TODO(Juraj): Support for filtering by level for parquet bid/ask datasets will be added once
#3694 is finished.

In [21]:
signature = "periodic_daily.airflow.downloaded_1min.parquet.ohlcv.futures.v7.ccxt.binance.v1_0_0"
start_timestamp = pd.Timestamp(
    datetime.datetime.utcnow() - datetime.timedelta(minutes=10, days=2), tz="UTC"
)
end_timestamp = start_timestamp + datetime.timedelta(minutes=10)
currency_pairs = ["BTC_USDT", "ETH_USDT"]
reader = imvcdcimrdc.RawDataReader(signature)
data = reader.read_data(
    start_timestamp, end_timestamp, currency_pairs=currency_pairs
)
_LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3


Unnamed: 0_level_0,timestamp,open,high,low,close,volume,exchange_id,knowledge_timestamp,currency_pair,year,month
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1


INFO  None


In [22]:
# This works with stage preprod.
signature = "periodic_daily.airflow.downloaded_1min.parquet.ohlcv.futures.v7_6.ccxt.okx.v1_0_0"
start_timestamp = pd.Timestamp(
    datetime.datetime.utcnow() - datetime.timedelta(minutes=10, days=1), tz="UTC"
)
end_timestamp = start_timestamp + datetime.timedelta(minutes=10)
currency_pairs = ["BTC_USDT", "ETH_USDT"]
reader = imvcdcimrdc.RawDataReader(signature, stage='preprod')
data = reader.read_data(
    start_timestamp, end_timestamp, currency_pairs=currency_pairs
)
_LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3


Unnamed: 0_level_0,timestamp,open,high,low,close,volume,exchange_id,knowledge_timestamp,currency_pair,year,month
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1


INFO  None


## Loading postgres data with filters

In [23]:
signature = "realtime.airflow.resampled_1min.postgres.bid_ask.futures.v7.ccxt.binance.v1_0_0"
reader = imvcdcimrdc.RawDataReader(signature)
start_timestamp = pd.Timestamp(
    datetime.datetime.utcnow() - datetime.timedelta(minutes=10, days=2), tz="UTC"
)
end_timestamp = start_timestamp + datetime.timedelta(minutes=10)
currency_pairs = ["BTC_USDT", "ETH_USDT"]
bid_ask_levels = [1, 2]
data = reader.read_data(
    start_timestamp,
    end_timestamp,
    currency_pairs=currency_pairs,
    bid_ask_levels=bid_ask_levels,
)
_LOG.log(log_level, hpandas.df_to_str(data, log_level=log_level))

INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Loading dataset schema file: /app/amp/data_schema/dataset_schema_versions/dataset_schema_v3.json
INFO  Loaded dataset schema version v3
INFO  Enabled connection to the `ccxt_bid_ask_futures_resampled_1min` DB table
INFO  Executing query: 
	SELECT * FROM ccxt_bid_ask_futures_resampled_1min WHERE timestamp >= 1707036762243 AND timestamp <= 1707037362243 AND currency_pair IN ('BTC_USDT', 'ETH_USDT') AND level IN (1, 2) AND exchange_id = 'binance'


  df = pd.read_sql_query(query, connection)


Unnamed: 0_level_0,currency_pair,exchange_id,end_download_timestamp,knowledge_timestamp
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


INFO  None
