In [None]:
%load_ext lab_black

In [None]:
import os
import io
import json
import pathlib
import enum
import datetime as dt

import pycurl
import pandas as pd

import shared


logger = shared.get_logger()

In [None]:
def download_spot_1m_btc_klines(date: dt.date) -> pd.DataFrame:
    columns = [
        "open_time",
        "open",
        "high",
        "low",
        "close",
        "volume",
        "kline_close_time",
        "quote_asset_volume",
        "number_of_trades",
        "taker_buy_base_asset_volume",
        "taker_buy_quote_asset_volume",
        "t",
    ]
    url = (
        "https://www.binance.com/bapi/bigdata"
        "/v1/public/bigdata/finance/exchange/listDownloadData2"
    )
    body_dict = {
        "bizType": "SPOT",
        "productName": "klines",
        "symbolRequestItems": [
            {
                "endDay": date.isoformat(),
                "granularityList": ["1m"],
                "interval": "daily",
                "startDay": date.isoformat(),
                "symbol": "BTCUSDT",
            }
        ],
    }
    body_str = json.dumps(body_dict)
    req_body_buf = io.StringIO(body_str)
    res_body_buf = io.BytesIO()
    curl = pycurl.Curl()
    curl.setopt(pycurl.URL, url)
    curl.setopt(pycurl.HTTPHEADER, ["Content-Type: application/json"])
    curl.setopt(pycurl.POST, 1)
    curl.setopt(pycurl.TIMEOUT_MS, 3000)
    curl.setopt(pycurl.READDATA, req_body_buf)
    curl.setopt(pycurl.POSTFIELDSIZE, len(body_str))
    curl.setopt(pycurl.WRITEDATA, res_body_buf)
    curl.perform()
    status_code = curl.getinfo(pycurl.RESPONSE_CODE)
    curl.close()
    assert status_code == 200
    res_body_buf.seek(0)
    res_body_str = res_body_buf.read().decode()
    logger.debug("downloaded meta data for %s", date)
    downloaded_list = json.loads(res_body_str)["data"]["downloadItemList"]
    assert len(downloaded_list) == 1
    file_url = downloaded_list[0]["url"]
    out_df = pd.read_csv(file_url, header=None, names=columns)
    logger.debug("downloaded content for %s", date)
    return out_df


def download_spot_btc_trades(date: dt.date) -> pd.DataFrame:
    columns = ["id", "price", "qty", "base_qty", "time", "is_buyer", "is_maker"]
    url = (
        "https://www.binance.com/bapi/bigdata"
        "/v1/public/bigdata/finance/exchange/listDownloadData2"
    )
    body_dict = {
        "bizType": "SPOT",
        "productName": "trades",
        "symbolRequestItems": [
            {
                "endDay": date.isoformat(),
                "granularityList": [],
                "interval": "daily",
                "startDay": date.isoformat(),
                "symbol": "BTCUSDT",
            }
        ],
    }
    body_str = json.dumps(body_dict)
    req_body_buf = io.StringIO(body_str)
    res_body_buf = io.BytesIO()
    curl = pycurl.Curl()
    curl.setopt(pycurl.URL, url)
    curl.setopt(pycurl.HTTPHEADER, ["Content-Type: application/json"])
    curl.setopt(pycurl.POST, 1)
    curl.setopt(pycurl.TIMEOUT_MS, 3000)
    curl.setopt(pycurl.READDATA, req_body_buf)
    curl.setopt(pycurl.POSTFIELDSIZE, len(body_str))
    curl.setopt(pycurl.WRITEDATA, res_body_buf)
    curl.perform()
    status_code = curl.getinfo(pycurl.RESPONSE_CODE)
    curl.close()
    assert status_code == 200
    res_body_buf.seek(0)
    res_body_str = res_body_buf.read().decode()
    logger.debug("downloaded meta data for %s", date)
    downloaded_list = json.loads(res_body_str)["data"]["downloadItemList"]
    assert len(downloaded_list) == 1
    file_url = downloaded_list[0]["url"]
    out_df = pd.read_csv(file_url, header=None, names=columns)
    logger.debug("downloaded content for %s", date)
    return out_df


class DataType(enum.Enum):
    klines = "klines"
    trades = "trades"


def upload_date_range(
    data_type: DataType, date_start: dt.date, date_end: dt.date
) -> None:
    match data_type:
        case DataType.klines:
            prefix = "klines"
            upload_df_f = download_spot_1m_btc_klines
        case DataType.trades:
            prefix = "trades"
            upload_df_f = download_spot_btc_trades
        case _:
            raise NotImplementedError
    dest_dir_path = (
        pathlib.Path("/home/jovyan/.var/binance-local/spot") / prefix / "1m/BTCUSDT"
    )
    os.makedirs(dest_dir_path, exist_ok=True)
    date = date_start
    while date <= date_end:
        file_name = f"BTCUSDT-{prefix}-{date.isoformat()}.csv"
        file_path = dest_dir_path / file_name
        if file_path.exists():
            logger.info("file %s already exists", file_path)
            date = date + dt.timedelta(days=1)
            continue
        df = upload_df_f(date=date)
        if df.empty:
            logger.info("there is not data on %s", date)
        else:
            df.to_csv(file_path, index=False)
            logger.info("saved for %s in %s", date, file_path)
        date = date + dt.timedelta(days=1)
    logger.info("loaded for %s - %s", date_start, date_end)

In [None]:
upload_date_range(
    data_type=DataType.klines,
    date_start=dt.date.fromisoformat("2023-08-01"),
    date_end=dt.date.today() - dt.timedelta(days=1),
)

In [None]:
upload_date_range(
    data_type=DataType.trades,
    date_start=dt.date.fromisoformat("2023-08-01"),
    date_end=dt.date.today() - dt.timedelta(days=1),
)