# Imports

In [None]:
import logging
import os

import pandas as pd

import core.config.config_ as cconconf
import core.statistics as cstats
import helpers.hdbg as hdbg
import helpers.henv as henv
import helpers.hparquet as hparque
import helpers.hprint as hprint
import im_v2.ccxt.data.client as icdcl
import research_amp.cc.statistics as ramccsta

  from tqdm.autonotebook import tqdm


In [2]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

_LOG.info("%s", henv.get_system_signature()[0])

hprint.config_notebook()

[0m[36mINFO[0m: > cmd='/venv/lib/python3.8/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-dec019e3-5363-43ff-b414-2e56f99e09a8.json'
>>ENV<<: is_inside_container=True: code_version=1.0.9, container_version=1.0.9, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CI=''
>>ENV<<: AM_AWS_PROFILE=True AM_ECR_BASE_PATH=True AM_S3_BUCKET=True AM_TELEGRAM_TOKEN=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=False AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True
INFO  generated new fontManager
INFO  # Git
    branch_name='CMTask1680_Research_available_CCXT_data_at_ck_AWS_profile'
    hash='d968be24b'
    # Last commits:
      * d968be24b Nina Lee fixed                                                             (19 minutes ago) Wed Apr 20 21:40:11 2022  (HEAD -> CMTask1680_Research_available_CCXT_data_at_ck_AWS_profile, origin/CMTask1680_Research_available_CCXT_data_at_ck_AWS_profile)
      * 5cb0933d8 Nina Lee linter                       

# Configs

In [33]:
def get_cmtask1680_config_ccxt() -> cconconf.Config:
    """
    Get task1680-specific config.
    """
    config = cconconf.Config()
    # Load parameters.
    config.add_subconfig("load")
    config["load"]["aws_profile"] = "ck"
    # TODO(Nina): @all replace `s3://cryptokaizen-data` with `get_s3_bucket()` after #1667 is implemented.
    config["load"]["data_dir"] = os.path.join(
        "s3://cryptokaizen-data",
        "historical",
    )
    # Data parameters.
    config.add_subconfig("data")
    config["data"]["vendor"] = "CCXT"
    config["data"]["data_snapshot"] = "latest"
    # Column names.
    config.add_subconfig("column_names")
    config["column_names"]["close_price"] = "close"
    config["column_names"]["currency_pair"] = "currency_pair"
    config["column_names"]["exchange_id"] = "exchange_id"
    return config

In [34]:
config = get_cmtask1680_config_ccxt()
print(config)

load:
  aws_profile: ck
  data_dir: s3://cryptokaizen-data/historical
data:
  vendor: CCXT
  data_snapshot: latest
column_names:
  close_price: close
  currency_pair: currency_pair
  exchange_id: exchange_id


# Functions

In [31]:
# TODO(Nina): @all Use functions from `research_amp.cc.statistics` instead.
def compute_stats_per_currency_pair(currency_pair_list: list) -> pd.DataFrame:
    """
    For each currency pair in the list compute stats.

    Statistics include:
       - minimum timestamp
       - maximum timestamp
       - the number of data points
       - days of data available
       - coverage, i.e. the number of not NaN data points divided
         by the number of all data points as percentage
       - average data points per day

    :param currency_pair_list: list of currency pairs to compute stats for.
    """
    res = {}
    # Iterate over currency pairs.
    for currency_pair in currency_pair_list:
        data_currency_pair = data.loc[data["currency_pair"] == currency_pair]
        # Compute the number of days available.
        days_availiable = (
            data_currency_pair.last_valid_index()
            - data_currency_pair.first_valid_index()
        ).days
        # Compute the number of data points.
        n_data_points = data_currency_pair.close.count()
        # Compute data coverage.
        coverage = 100 * (1 - cstats.compute_frac_nan(data_currency_pair.close))
        # Combine the stats in a single dict.
        res.update(
            {
                currency_pair: [
                    data_currency_pair.index.min(),
                    data_currency_pair.index.max(),
                    n_data_points,
                    coverage,
                    days_availiable,
                    n_data_points / days_availiable,
                ]
            }
        )
        # Covert into a DataFrame.
        df_res = pd.DataFrame(
            data=res.values(),
            columns=[
                "min_ts",
                "max_ts",
                "n_data_points",
                "coverage",
                "days_available",
                "avg_data_points_per_day",
            ],
            index=res.keys(),
        )
    return df_res


def get_file_path_for_exchange(config: cconconf.Config, exchange: str) -> str:
    """
    Get file path for exchange-specific data.

    E.g., `"s3://cryptokaizen-data/historical/ccxt/latest/binance/"`.
    """
    data_dir = config["load"]["data_dir"]
    vendor = config["data"]["vendor"].lower()
    data_snapshot = config["data"]["data_snapshot"]
    file_path = os.path.join(data_dir, vendor, data_snapshot, exchange)
    return file_path

# Load CCXT data from the historical bucket

## binance stats

In [13]:
# TODO(Nina): @all Usage of the client is very slow due to CMTask1726.
#  Until this issue is fixed, you can speed up the client by replacing `apply`
#  with the vectorasied counterpart: `df['exchange_id'] + "::" + df['currency_pair']`.
universe_version = "v3"
resample_1min = True
ccxt_historical_client = icdcl.ccxt_clients.CcxtHistoricalPqByTileClient(
    universe_version,
    resample_1min,
    config["load"]["data_dir"],
    "by_year_month",
    aws_profile=config["load"]["aws_profile"],
)

In [14]:
universe = ccxt_historical_client.get_universe()
universe

['binance::ADA_USDT',
 'binance::AVAX_USDT',
 'binance::BNB_USDT',
 'binance::BTC_USDT',
 'binance::DOGE_USDT',
 'binance::EOS_USDT',
 'binance::ETH_USDT',
 'binance::LINK_USDT',
 'binance::SOL_USDT',
 'ftx::BNB_USDT',
 'ftx::BTC_USDT',
 'ftx::DOGE_USDT',
 'ftx::ETH_USDT',
 'ftx::LINK_USDT',
 'ftx::SOL_USDT',
 'ftx::XRP_USDT',
 'gateio::ADA_USDT',
 'gateio::AVAX_USDT',
 'gateio::BNB_USDT',
 'gateio::BTC_USDT',
 'gateio::DOGE_USDT',
 'gateio::EOS_USDT',
 'gateio::ETH_USDT',
 'gateio::FIL_USDT',
 'gateio::LINK_USDT',
 'gateio::SOL_USDT',
 'gateio::XRP_USDT',
 'kucoin::ADA_USDT',
 'kucoin::AVAX_USDT',
 'kucoin::BNB_USDT',
 'kucoin::BTC_USDT',
 'kucoin::DOGE_USDT',
 'kucoin::EOS_USDT',
 'kucoin::ETH_USDT',
 'kucoin::FIL_USDT',
 'kucoin::LINK_USDT',
 'kucoin::SOL_USDT',
 'kucoin::XRP_USDT']

In [8]:
# TODO(Nina): @all Kernel's dead after trying to load data for the whole universe due to CMTask1726.
# Load all the data available for the 1st full symbol in the universe.
start_ts = None
end_ts = None
data = ccxt_historical_client.read_data([universe[0]], start_ts, end_ts)

In [15]:
_LOG.info(data.shape)
data.head(3)

INFO  (1833200, 6)


Unnamed: 0_level_0,full_symbol,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-08-17 00:00:00+00:00,binance::ADA_USDT,0.0946,0.0948,0.09442,0.09479,41334.2
2018-08-17 00:01:00+00:00,binance::ADA_USDT,0.09479,0.0948,0.09425,0.09473,98801.4
2018-08-17 00:02:00+00:00,binance::ADA_USDT,0.09434,0.09457,0.09421,0.09456,19992.1


In [10]:
# TODO(Nina): @all Refactor functions from `research_amp.cc.statistics` to properly work with
# `ImClient` data.
compute_start_end_stats = ramccsta.compute_start_end_stats(data, config)
compute_start_end_stats

AssertionError: 
################################################################################
* Failed assertion *
val1=['close', 'currency_pair', 'exchange_id']
issubset
val2=['close', 'full_symbol', 'high', 'low', 'open', 'volume']
val1 - val2=['currency_pair', 'exchange_id']
################################################################################


In [37]:
# TODO(Nina): @all all exchange ids in a bucket could be extracted via `listdir()` from `helpers.hs3`.
binance_exchange = "binance"
file_path = get_file_path_for_exchange(config, binance_exchange)
data = hparque.from_parquet(file_path, aws_profile=config["load"]["aws_profile"])
_LOG.info(data.shape)
data.head(3)

INFO  (43913620, 9)


Unnamed: 0_level_0,timestamp,open,high,low,close,volume,currency_pair,year,month
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-10-01 00:00:00+00:00,1538352000000,0.08511,0.08522,0.08507,0.08509,108759.7,ADA_USDT,2018,10
2018-10-01 00:01:00+00:00,1538352060000,0.08514,0.08559,0.08509,0.08559,1074230.8,ADA_USDT,2018,10
2018-10-01 00:02:00+00:00,1538352120000,0.08538,0.08558,0.08537,0.08553,62781.7,ADA_USDT,2018,10


In [13]:
currency_pairs = list(data["currency_pair"].unique())
dfb = compute_stats_per_currency_pair(currency_pairs)
dfb["exchange_id"] = binance_exchange
dfb["vendor"] = config["data"]["vendor"]
dfb

Unnamed: 0,min_ts,max_ts,n_data_points,coverage,days_available,avg_data_points_per_day,exchange_id,vendor
ADA_USDT,2018-08-17 00:00:00+00:00,2022-02-10 01:19:00+00:00,10127280,100.0,1228,8246.970684,binance,CCXT
AVAX_USDT,2020-09-22 06:30:00+00:00,2022-02-10 01:19:00+00:00,3519308,100.0,497,7081.102616,binance,CCXT
BNB_USDT,2018-08-17 00:00:00+00:00,2022-02-10 01:19:00+00:00,5064230,100.0,1228,4123.965798,binance,CCXT
BTC_USDT,2018-08-17 00:00:00+00:00,2022-02-10 01:19:00+00:00,5058925,100.0,1228,4119.645765,binance,CCXT
DOGE_USDT,2019-07-05 12:00:00+00:00,2022-02-10 01:19:00+00:00,3675393,100.0,863,4258.856315,binance,CCXT
EOS_USDT,2018-08-17 00:00:00+00:00,2022-02-10 01:19:00+00:00,5064528,100.0,1228,4124.208469,binance,CCXT
ETH_USDT,2018-08-17 00:00:00+00:00,2022-02-10 01:19:00+00:00,5059418,100.0,1228,4120.047231,binance,CCXT
LINK_USDT,2019-01-16 10:00:00+00:00,2022-02-10 01:19:00+00:00,4407090,100.0,1120,3934.901786,binance,CCXT
SOL_USDT,2020-08-11 06:00:00+00:00,2022-02-10 01:19:00+00:00,1937448,100.0,497,3898.285714,binance,CCXT


## bitfinex stats

In [38]:
bitfinex_exchange = "bitfinex"
file_path = get_file_path_for_exchange(config, bitfinex_exchange)
data = hparque.from_parquet(file_path, aws_profile=config["load"]["aws_profile"])
_LOG.info(data.shape)
data.head(3)

INFO  (28734778, 10)


Unnamed: 0_level_0,timestamp,open,high,low,close,volume,knowledge_timestamp,currency_pair,year,month
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-10-01 00:05:00+00:00,1601510700000,0.10134,0.10134,0.10134,0.10134,10.256057,,ADA_USDT,2020,10
2020-10-01 00:14:00+00:00,1601511240000,0.1015,0.1015,0.1015,0.1015,11.231263,,ADA_USDT,2020,10
2020-10-01 00:22:00+00:00,1601511720000,0.10284,0.10284,0.10284,0.10284,10.521761,,ADA_USDT,2020,10


In [14]:
currency_pairs = list(data["currency_pair"].unique())
dfb = compute_stats_per_currency_pair(currency_pairs)
dfb["exchange_id"] = bitfinex_exchange
dfb["vendor"] = config["data"]["vendor"]
dfb

Unnamed: 0,min_ts,max_ts,n_data_points,coverage,days_available,avg_data_points_per_day,exchange_id,vendor
ADA_USDT,2020-08-06 10:11:00+00:00,2022-02-11 16:10:00+00:00,2873571,100.0,498,5770.222892,bitfinex,CCXT
AVAX_USDT,2020-09-23 12:03:00+00:00,2022-02-11 16:10:00+00:00,2873342,100.0,498,5769.763052,bitfinex,CCXT
BTC_USDT,2019-03-11 10:05:00+00:00,2022-02-11 10:39:00+00:00,2871979,100.0,864,3324.049769,bitfinex,CCXT
DOGE_USDT,2021-04-21 11:03:00+00:00,2022-02-11 16:10:00+00:00,2874478,100.0,133,21612.616541,bitfinex,CCXT
EOS_USDT,2019-04-12 10:02:00+00:00,2022-02-11 16:16:00+00:00,2874177,100.0,864,3326.59375,bitfinex,CCXT
ETH_USDT,2019-03-11 10:03:00+00:00,2022-02-11 10:45:00+00:00,2873510,100.0,864,3325.821759,bitfinex,CCXT
FIL_USDT,2020-10-15 18:54:00+00:00,2022-02-11 16:18:00+00:00,2876303,100.0,483,5955.078675,bitfinex,CCXT
LINK_USDT,2020-08-21 09:04:00+00:00,2022-02-11 16:20:00+00:00,2873642,100.0,498,5770.365462,bitfinex,CCXT
SOL_USDT,2021-02-25 11:10:00+00:00,2022-02-11 16:22:00+00:00,2870334,100.0,133,21581.458647,bitfinex,CCXT
XRP_USDT,2020-12-18 09:53:00+00:00,2022-02-11 16:22:00+00:00,2873442,100.0,420,6841.528571,bitfinex,CCXT


## ftx stats

In [15]:
ftx_exchange = "ftx"
file_path = get_file_path_for_exchange(config, ftx_exchange)
data = hparque.from_parquet(file_path, aws_profile=config["load"]["aws_profile"])
_LOG.info(data.shape)
data.head(3)

INFO  (10741864, 9)


Unnamed: 0_level_0,timestamp,open,high,low,close,volume,currency_pair,year,month
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-10-01 00:00:00+00:00,1601510400000,29.252,29.302,29.252,29.252,0.0,BNB_USDT,2020,10
2020-10-01 00:01:00+00:00,1601510460000,29.252,29.288,29.252,29.283,522.40872,BNB_USDT,2020,10
2020-10-01 00:02:00+00:00,1601510520000,29.283,29.311,29.282,29.299,0.0,BNB_USDT,2020,10


In [16]:
currency_pairs = list(data["currency_pair"].unique())
dfb = compute_stats_per_currency_pair(currency_pairs)
dfb["exchange_id"] = ftx_exchange
dfb["vendor"] = config["data"]["vendor"]
dfb

Unnamed: 0,min_ts,max_ts,n_data_points,coverage,days_available,avg_data_points_per_day,exchange_id,vendor
BNB_USDT,2020-04-09 20:55:00+00:00,2022-02-10 01:19:00+00:00,1729566,100.0,497,3480.012072,ftx,CCXT
BTC_USDT,2020-03-28 14:40:00+00:00,2022-02-10 01:19:00+00:00,1764654,100.0,497,3550.61167,ftx,CCXT
DOGE_USDT,2021-01-13 04:06:00+00:00,2022-02-10 01:19:00+00:00,928264,100.0,392,2368.020408,ftx,CCXT
ETH_USDT,2020-03-28 14:40:00+00:00,2022-02-10 01:19:00+00:00,1764697,100.0,497,3550.698189,ftx,CCXT
LINK_USDT,2020-04-21 02:34:00+00:00,2022-02-10 01:19:00+00:00,1697181,100.0,497,3414.851107,ftx,CCXT
SOL_USDT,2020-07-27 00:13:00+00:00,2022-02-10 01:19:00+00:00,1418020,100.0,497,2853.158954,ftx,CCXT
XRP_USDT,2020-07-19 13:37:00+00:00,2022-02-10 01:19:00+00:00,1439482,100.0,497,2896.342052,ftx,CCXT


## gateio stats

In [40]:
gateio_exchange = "gateio"
file_path = get_file_path_for_exchange(config, gateio_exchange)
data = hparque.from_parquet(file_path, aws_profile=config["load"]["aws_profile"])
_LOG.info(data.shape)
data.head(3)

INFO  (4265385, 9)


Unnamed: 0_level_0,timestamp,open,high,low,close,volume,currency_pair,year,month
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-11-12 11:15:00+00:00,1636715700000,2.02,2.0218,2.0179,2.0218,6927.864861,ADA_USDT,2021,11
2021-11-12 11:16:00+00:00,1636715760000,2.0218,2.0241,2.021,2.0218,5126.790028,ADA_USDT,2021,11
2021-11-12 11:17:00+00:00,1636715820000,2.0218,2.023,2.0215,2.023,9198.983848,ADA_USDT,2021,11


In [18]:
currency_pairs = list(data["currency_pair"].unique())
dfb = compute_stats_per_currency_pair(currency_pairs)
dfb["exchange_id"] = gateio_exchange
dfb["vendor"] = config["data"]["vendor"]
dfb

Unnamed: 0,min_ts,max_ts,n_data_points,coverage,days_available,avg_data_points_per_day,exchange_id,vendor
ADA_USDT,2021-06-22 20:35:00+00:00,2022-02-10 01:19:00+00:00,387735,100.0,89,4356.573034,gateio,CCXT
AVAX_USDT,2021-06-23 00:26:00+00:00,2022-02-10 01:19:00+00:00,387261,100.0,89,4351.247191,gateio,CCXT
BNB_USDT,2021-06-22 22:30:00+00:00,2022-02-10 01:19:00+00:00,387481,100.0,89,4353.719101,gateio,CCXT
BTC_USDT,2021-06-22 14:50:00+00:00,2022-02-10 01:19:00+00:00,388181,100.0,89,4361.58427,gateio,CCXT
DOGE_USDT,2021-06-22 19:38:00+00:00,2022-02-10 01:19:00+00:00,387801,100.0,89,4357.314607,gateio,CCXT
EOS_USDT,2021-06-22 23:28:00+00:00,2022-02-10 01:19:00+00:00,387329,100.0,89,4352.011236,gateio,CCXT
ETH_USDT,2021-06-22 15:48:00+00:00,2022-02-10 01:19:00+00:00,388145,100.0,89,4361.179775,gateio,CCXT
FIL_USDT,2021-06-22 18:40:00+00:00,2022-02-10 01:19:00+00:00,387881,100.0,89,4358.213483,gateio,CCXT
LINK_USDT,2021-06-22 21:33:00+00:00,2022-02-10 01:19:00+00:00,387523,100.0,89,4354.191011,gateio,CCXT
SOL_USDT,2021-06-22 16:45:00+00:00,2022-02-10 01:19:00+00:00,388088,100.0,89,4360.539326,gateio,CCXT


## kucoin stat

In [41]:
kucoin_exchange = "kucoin"
file_path = get_file_path_for_exchange(config, kucoin_exchange)
data = hparque.from_parquet(file_path, aws_profile=config["load"]["aws_profile"])
_LOG.info(data.shape)
data.head(3)

INFO  (29533167, 9)


Unnamed: 0_level_0,timestamp,open,high,low,close,volume,currency_pair,year,month
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019-10-01 00:00:00+00:00,1569888000000,0.038753,0.038753,0.038753,0.038753,17.2192,ADA_USDT,2019,10
2019-10-01 00:01:00+00:00,1569888060000,0.038753,0.038753,0.038753,0.038753,0.0,ADA_USDT,2019,10
2019-10-01 00:02:00+00:00,1569888120000,0.038753,0.038753,0.038753,0.038753,0.0,ADA_USDT,2019,10


In [20]:
currency_pairs = list(data["currency_pair"].unique())
dfb = compute_stats_per_currency_pair(currency_pairs)
dfb["exchange_id"] = kucoin_exchange
dfb["vendor"] = config["data"]["vendor"]
dfb

Unnamed: 0,min_ts,max_ts,n_data_points,coverage,days_available,avg_data_points_per_day,exchange_id,vendor
ADA_USDT,2019-07-04 10:27:00+00:00,2022-02-10 14:32:00+00:00,3491352,100.0,863,4045.599073,kucoin,CCXT
AVAX_USDT,2021-03-05 10:00:00+00:00,2022-02-10 14:53:00+00:00,847887,100.0,125,6783.096,kucoin,CCXT
BNB_USDT,2019-06-19 10:00:00+00:00,2022-02-10 14:56:00+00:00,3547671,100.0,863,4110.858633,kucoin,CCXT
BTC_USDT,2018-08-17 00:00:00+00:00,2022-02-10 14:59:00+00:00,4583370,100.0,1228,3732.385993,kucoin,CCXT
DOGE_USDT,2021-02-09 03:00:00+00:00,2022-02-10 15:02:00+00:00,951951,100.0,125,7615.608,kucoin,CCXT
EOS_USDT,2018-08-17 00:10:00+00:00,2022-02-10 15:06:00+00:00,4174923,100.0,1228,3399.77443,kucoin,CCXT
ETH_USDT,2018-08-17 00:01:00+00:00,2022-02-10 15:09:00+00:00,4514736,100.0,1228,3676.495114,kucoin,CCXT
FIL_USDT,2020-10-15 17:25:00+00:00,2022-02-10 15:12:00+00:00,1454115,100.0,482,3016.8361,kucoin,CCXT
LINK_USDT,2020-08-20 10:00:00+00:00,2022-02-10 15:24:00+00:00,1698492,100.0,497,3417.488934,kucoin,CCXT
SOL_USDT,2021-08-04 10:00:00+00:00,2022-02-10 15:28:00+00:00,189027,100.0,125,1512.216,kucoin,CCXT


In [1]:
# See the stats for buckets `cryptokaizen-data2/historical/` and `cryptokaizen-data/daily_staged`,
# we decided not to include them in the analysis at the moment. Feel free to remove if it is not
# needed.

# Load CCXT data from data2 bucket

In [6]:
# def read_exchange_df(paths: list) -> pd.DataFrame:
#     """
#     Read csv files from `s3://cryptokaizen-data2/historical/ and convert it to
#     a DataFrame.
#     """
#     all_data = []
#     for currency_pair, path in paths:
#         data = hpandas.read_csv_to_df(path)
#         data["currency_pair"] = currency_pair
#         all_data.append(data)
#     df = pd.concat(all_data)
#     return df

## binance stat

In [10]:
# paths = [
#     (
#         "ADA_USDT",
#         "s3://cryptokaizen-data2/historical/binance/ADA_USDT_20220210-104334.csv",
#     ),
#     (
#         "AVAX_USDT",
#         "s3://cryptokaizen-data2/historical/binance/AVAX_USDT_20220210-105623.csv",
#     ),
#     (
#         "BNB_USDT",
#         "s3://cryptokaizen-data2/historical/binance/BNB_USDT_20220210-110910.csv",
#     ),
#     (
#         "BTC_USDT",
#         "s3://cryptokaizen-data2/historical/binance/BTC_USDT_20220210-112208.csv",
#     ),
#     (
#         "DOGE_USDT",
#         "s3://cryptokaizen-data2/historical/binance/DOGE_USDT_20220210-113502.csv",
#     ),
#     (
#         "EOS_USDT",
#         "s3://cryptokaizen-data2/historical/binance/EOS_USDT_20220210-114748.csv",
#     ),
#     (
#         "ETH_USDT",
#         "s3://cryptokaizen-data2/historical/binance/ETH_USDT_20220210-120031.csv",
#     ),
#     (
#         "LINK_USDT",
#         "s3://cryptokaizen-data2/historical/binance/LINK_USDT_20220210-121311.csv",
#     ),
#     (
#         "SOL_USDT",
#         "s3://cryptokaizen-data2/historical/binance/SOL_USDT_20220210-122551.csv",
#     ),
# ]
# data = read_exchange_df(paths)
# data.head()

Unnamed: 0,timestamp,open,high,low,close,volume,end_download_timestamp,currency_pair
0,1627776000000,1.3193,1.3198,1.3184,1.3191,231817.54,2022-02-10 10:30:48.347909+00:00,ADA_USDT
1,1627776060000,1.3191,1.3194,1.3141,1.3145,392782.42,2022-02-10 10:30:48.347909+00:00,ADA_USDT
2,1627776120000,1.3145,1.3154,1.3108,1.3119,656717.86,2022-02-10 10:30:48.347909+00:00,ADA_USDT
3,1627776180000,1.3118,1.3137,1.3105,1.3133,451947.63,2022-02-10 10:30:48.347909+00:00,ADA_USDT
4,1627776240000,1.3135,1.3153,1.3129,1.3152,132722.44,2022-02-10 10:30:48.347909+00:00,ADA_USDT


In [22]:
# hdateti.convert_unix_epoch_to_timestamp(data.timestamp.max())

Timestamp('2022-02-10 01:19:00+0000', tz='UTC')

In [23]:
# hdateti.convert_unix_epoch_to_timestamp(data.timestamp.min())

Timestamp('2021-08-01 00:00:00+0000', tz='UTC')

## bitfinex stat

In [24]:
# paths = [
#     (
#         "ADA_USDT",
#         "s3://cryptokaizen-data2/historical/bitfinex/ADA_USDT_20220211-161045.csv",
#     ),
#     (
#         "AVAX_USDT",
#         "s3://cryptokaizen-data2/historical/bitfinex/AVAX_USDT_20220211-161212.csv",
#     ),
#     (
#         "BTC_USDT",
#         "s3://cryptokaizen-data2/historical/bitfinex/BTC_USDT_20220211-161338.csv",
#     ),
#     (
#         "DOGE_USDT",
#         "s3://cryptokaizen-data2/historical/bitfinex/DOGE_USDT_20220211-161507.csv",
#     ),
#     (
#         "EOS_USDT",
#         "s3://cryptokaizen-data2/historical/bitfinex/EOS_USDT_20220211-161634.csv",
#     ),
#     (
#         "ETH_USDT",
#         "s3://cryptokaizen-data2/historical/bitfinex/ETH_USDT_20220211-161801.csv",
#     ),
#     (
#         "FIL_USDT",
#         "s3://cryptokaizen-data2/historical/bitfinex/FIL_USDT_20220211-161926.csv",
#     ),
#     (
#         "LINK_USDT",
#         "s3://cryptokaizen-data2/historical/bitfinex/LINK_USDT_20220211-162053.csv",
#     ),
#     (
#         "SOL_USDT",
#         "s3://cryptokaizen-data2/historical/bitfinex/SOL_USDT_20220211-162219.csv",
#     ),
#     (
#         "XRP_USDT",
#         "s3://cryptokaizen-data2/historical/bitfinex/XRP_USDT_20220211-162345.csv",
#     ),
# ]
# data = read_exchange_df(paths)
# data.head()

Unnamed: 0,timestamp,open,high,low,close,volume,end_download_timestamp,currency_pair,knowledge_timestamp
0,1627776420000,1.3165,1.3165,1.3165,1.3165,10.68932,2022-02-11 16:09:26.959325+00:00,ADA_USDT,20220211-161045
1,1627776960000,1.3173,1.3173,1.3173,1.3173,11.998408,2022-02-11 16:09:26.959325+00:00,ADA_USDT,20220211-161045
2,1627777560000,1.3269,1.3269,1.3269,1.3269,10.397264,2022-02-11 16:09:26.959325+00:00,ADA_USDT,20220211-161045
3,1627777620000,1.3292,1.3292,1.3292,1.3292,11.041356,2022-02-11 16:09:26.959325+00:00,ADA_USDT,20220211-161045
4,1627778100000,1.3281,1.3296,1.3281,1.3296,21.982842,2022-02-11 16:09:26.959325+00:00,ADA_USDT,20220211-161045


In [25]:
# hdateti.convert_unix_epoch_to_timestamp(data.timestamp.max())

Timestamp('2022-02-11 16:22:00+0000', tz='UTC')

In [26]:
# hdateti.convert_unix_epoch_to_timestamp(data.timestamp.min())

Timestamp('2021-08-01 00:00:00+0000', tz='UTC')

## ftx stat

In [27]:
# paths = [
#     (
#         "BNB_USDT",
#         "s3://cryptokaizen-data2/historical/ftx/BNB_USDT_20220210-104642.csv",
#     ),
#     (
#         "BNB_USDT",
#         "s3://cryptokaizen-data2/historical/ftx/BNB_USDT_20220210-123958.csv",
#     ),
#     (
#         "BTC_USDT",
#         "s3://cryptokaizen-data2/historical/ftx/BTC_USDT_20220210-110047.csv",
#     ),
#     (
#         "BTC_USDT",
#         "s3://cryptokaizen-data2/historical/ftx/BTC_USDT_20220210-125404.csv",
#     ),
#     (
#         "DOGE_USDT",
#         "s3://cryptokaizen-data2/historical/ftx/DOGE_USDT_20220210-111452.csv",
#     ),
#     (
#         "ETH_USDT",
#         "s3://cryptokaizen-data2/historical/ftx/ETH_USDT_20220210-112851.csv",
#     ),
#     (
#         "LINK_USDT",
#         "s3://cryptokaizen-data2/historical/ftx/LINK_USDT_20220210-114240.csv",
#     ),
#     (
#         "SOL_USDT",
#         "s3://cryptokaizen-data2/historical/ftx/SOL_USDT_20220210-115701.csv",
#     ),
#     (
#         "XRP_USDT",
#         "s3://cryptokaizen-data2/historical/ftx/XRP_USDT_20220210-121122.csv",
#     ),
# ]
# data = read_exchange_df(paths)
# data.head()

Unnamed: 0,timestamp,open,high,low,close,volume,end_download_timestamp,currency_pair
0,1627776000000,332.6,332.677,332.37,332.546,41009.83096,2022-02-10 10:32:48.602579+00:00,BNB_USDT
1,1627776060000,332.546,332.635,331.679,332.034,3413.42262,2022-02-10 10:32:48.602579+00:00,BNB_USDT
2,1627776120000,332.052,332.2,330.893,331.314,149071.66973,2022-02-10 10:32:48.602579+00:00,BNB_USDT
3,1627776180000,331.314,331.422,330.867,331.214,37332.95273,2022-02-10 10:32:48.602579+00:00,BNB_USDT
4,1627776240000,331.214,332.051,331.214,332.051,39343.20763,2022-02-10 10:32:48.602579+00:00,BNB_USDT


In [28]:
# hdateti.convert_unix_epoch_to_timestamp(data.timestamp.max())

Timestamp('2022-02-10 01:19:00+0000', tz='UTC')

In [29]:
# hdateti.convert_unix_epoch_to_timestamp(data.timestamp.min())

Timestamp('2021-08-01 00:00:00+0000', tz='UTC')

## gateio stat

In [10]:
# paths = [
#     (
#         "BNB_USDT",
#         "s3://cryptokaizen-data2/historical/gateio/ADA_USDT_20220210-112115.csv",
#     ),
#     (
#         "AVAX_USDT",
#         "s3://cryptokaizen-data2/historical/gateio/AVAX_USDT_20220210-113306.csv",
#     ),
#     (
#         "BNB_USDT",
#         "s3://cryptokaizen-data2/historical/gateio/BNB_USDT_20220210-114500.csv",
#     ),
#     (
#         "BTC_USDT",
#         "s3://cryptokaizen-data2/historical/gateio/BTC_USDT_20220210-115659.csv",
#     ),
#     (
#         "DOGE_USDT",
#         "s3://cryptokaizen-data2/historical/gateio/DOGE_USDT_20220210-120851.csv",
#     ),
#     (
#         "EOS_USDT",
#         "s3://cryptokaizen-data2/historical/gateio/EOS_USDT_20220210-122048.csv",
#     ),
#     (
#         "ETH_USDT",
#         "s3://cryptokaizen-data2/historical/gateio/ETH_USDT_20220210-123244.csv",
#     ),
#     (
#         "FIL_USDT",
#         "s3://cryptokaizen-data2/historical/gateio/FIL_USDT_20220210-124438.csv",
#     ),
#     (
#         "LINK_USDT",
#         "s3://cryptokaizen-data2/historical/gateio/LINK_USDT_20220210-125629.csv",
#     ),
#     (
#         "SOL_USDT",
#         "s3://cryptokaizen-data2/historical/gateio/SOL_USDT_20220210-130821.csv",
#     ),
#     (
#         "XRP_USDT",
#         "s3://cryptokaizen-data2/historical/gateio/XRP_USDT_20220210-132013.csv",
#     ),
# ]
# data = read_exchange_df(paths)
# data.head()

Unnamed: 0,timestamp,open,high,low,close,volume,end_download_timestamp,currency_pair
0,1636715700000,2.02,2.0218,2.0179,2.0218,6927.864861,2022-02-10 11:15:44.436692+00:00,BNB_USDT
1,1636715760000,2.0218,2.0241,2.021,2.0218,5126.790028,2022-02-10 11:15:44.436692+00:00,BNB_USDT
2,1636715820000,2.0218,2.023,2.0215,2.023,9198.983848,2022-02-10 11:15:44.436692+00:00,BNB_USDT
3,1636715880000,2.0238,2.0248,2.0237,2.0245,837.509648,2022-02-10 11:15:44.436692+00:00,BNB_USDT
4,1636715940000,2.023,2.0269,2.023,2.0256,22465.93896,2022-02-10 11:15:44.436692+00:00,BNB_USDT


In [31]:
# hdateti.convert_unix_epoch_to_timestamp(data.timestamp.max())

Timestamp('2022-02-10 01:19:00+0000', tz='UTC')

In [32]:
# hdateti.convert_unix_epoch_to_timestamp(data.timestamp.min())

Timestamp('2021-11-12 11:15:00+0000', tz='UTC')

# Load CCXT data from daily staged bucket

In [7]:
# file_path = "%s/ccxt/binance/" % config["load"]["data_dir"]
# kwargs = {"aws_profile": "ck"}
# data = hparque.from_parquet(file_path, **kwargs)
# data.head()

Unnamed: 0_level_0,timestamp,open,high,low,close,volume,end_download_timestamp,exchange_id,knowledge_timestamp,currency_pair,year,month
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-12-30 00:00:00+00:00,1640822400000,1.333,1.334,1.331,1.333,128170.6,2022-03-02 21:23:55.844095+00:00,binance,2022-03-02 21:24:07.954222+00:00,ADA_USDT,2021,12
2021-12-30 00:01:00+00:00,1640822460000,1.334,1.335,1.333,1.333,76045.7,2022-03-02 21:23:55.844095+00:00,binance,2022-03-02 21:24:07.954222+00:00,ADA_USDT,2021,12
2021-12-30 00:02:00+00:00,1640822520000,1.334,1.335,1.327,1.328,378727.3,2022-03-02 21:23:55.844095+00:00,binance,2022-03-02 21:24:07.954222+00:00,ADA_USDT,2021,12
2021-12-30 00:03:00+00:00,1640822580000,1.328,1.329,1.323,1.326,216804.3,2022-03-02 21:23:55.844095+00:00,binance,2022-03-02 21:24:07.954222+00:00,ADA_USDT,2021,12
2021-12-30 00:04:00+00:00,1640822640000,1.326,1.327,1.323,1.327,122692.4,2022-03-02 21:23:55.844095+00:00,binance,2022-03-02 21:24:07.954222+00:00,ADA_USDT,2021,12
