In [None]:
import os
import io
import json
import pathlib
import datetime as dt

import pycurl
import pandas as pd

import shared


logger = shared.get_logger()

In [None]:
date_start = dt.datetime.strptime('08.01.2023', '%m.%d.%Y').date()
date_end = dt.datetime.strptime('04.01.2024', '%m.%d.%Y').date()

In [None]:
def download_spot_1m_btc_klines(date: dt.date) -> pd.DataFrame:
    columns = ['open_time', 'open', 'high', 'low', 'close', 'volume',
               'kline_close_time', 'quote_asset_volume', 'number_of_trades',
               'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume',
               't']
    url = (
        'https://www.binance.com/bapi/bigdata'
        '/v1/public/bigdata/finance/exchange/listDownloadData2'
    )
    body_dict = {
        "bizType": "SPOT",
        "productName": "klines",
        "symbolRequestItems":[{
            "endDay": date.isoformat(),
            "granularityList": ["1m"],
            "interval": "daily",
            "startDay": date.isoformat(),
            "symbol": "BTCUSDT",
        }],
    }
    body_str = json.dumps(body_dict)
    req_body_buf = io.StringIO(body_str)
    res_body_buf = io.BytesIO()
    curl = pycurl.Curl()
    curl.setopt(pycurl.URL, url)
    curl.setopt(pycurl.HTTPHEADER, ['Content-Type: application/json'])
    curl.setopt(pycurl.POST, 1)
    curl.setopt(pycurl.TIMEOUT_MS, 3000)
    curl.setopt(pycurl.READDATA, req_body_buf) 
    curl.setopt(pycurl.POSTFIELDSIZE, len(body_str))
    curl.setopt(pycurl.WRITEDATA, res_body_buf)
    curl.perform()
    status_code = curl.getinfo(pycurl.RESPONSE_CODE)
    curl.close()
    assert status_code == 200
    res_body_buf.seek(0)
    res_body_str = res_body_buf.read().decode()
    logger.debug('downloaded meta data for %s', date)
    downloaded_list = json.loads(res_body_str)['data']['downloadItemList']
    if len(downloaded_list) == 0:
        logger.debug('there is no data on %s', date)
        return pd.DataFrame([], columns=columns)
    assert len(downloaded_list) == 1
    file_url = downloaded_list[0]['url']
    out_df = pd.read_csv(file_url, header=None, names=columns)
    logger.debug('downloaded content for %s', date)
    return out_df

In [None]:
dest_dir_path_str = '/home/jovyan/.var/binance-local/spot/klines/1m/BTCUSDT'
dest_dir_path = pathlib.Path(dest_dir_path_str)
os.makedirs(dest_dir_path, exist_ok=True)
date = date_start
while date <= date_end:
    day_df = download_spot_1m_btc_klines(date=date)
    if day_df.empty:
        logger.info('there is not data on %s', date)
    else:
        file_name = f'BTCUSDT-klines-{date.isoformat()}.csv'
        file_path = dest_dir_path / file_name
        day_df.to_csv(file_path, index=False)
        logger.info('saved for %s in %s', date, file_path)
    date = date + dt.timedelta(days=1)
logger.info('loaded for %s - %s', date_start, date_end)