In [1]:
import requests
import xml.etree.ElementTree as ET
from tqdm import tqdm
import pandas as pd
import os
from zipfile import ZipFile
import io

In [2]:
config = {
    'symbol':'ETHBTC',
    'dfreq':'15m',
    'pair_id':0, # this is the pair_id foreign key in the database
}

In [3]:
def download_files(config):
    ns = '{http://s3.amazonaws.com/doc/2006-03-01/}'
    file_list = []
    for ufreq in ['daily','monthly']:
        url = f"https://s3-ap-northeast-1.amazonaws.com/data.binance.vision?delimiter=/&prefix=data/spot/{ufreq}/klines/{config['symbol']}/{config['dfreq']}/"
        req = requests.get(url)
        root = ET.fromstring(req.content)
        file_list += [item.text for item in root.findall(f"{ns}Contents/{ns}Key",) if item.text.endswith('.zip')]
    
    ret_list = []
    for f in tqdm(file_list):
        f_ = 'temp/' + str(config['pair_id']) + "_" + f.replace("/","_")
        ret_list.append(f_)
        if not os.path.exists(f_):
            furl = f'https://data.binance.vision/{f}'
            r = requests.get(furl, allow_redirects=True)
            open(f_, 'wb').write(r.content)

    return ret_list

In [4]:
columns = ['open_time','open','high','low','close','volume','close_time','quote_asset_volume',
         'number_of_trades','taker_buy_base_asset_volume','taker_buy_quote_asset_volume','pair_id']
dfs = []
for f in download_files(config):
    input_zip=ZipFile(f)
    data = [input_zip.read(name) for name in input_zip.namelist()][0]
    df = pd.read_csv(io.BytesIO(data), names=columns)
    df['pair_id'] = config['pair_id']
    df['open_time'] = pd.to_datetime(df['open_time'], unit='ms')
    df['close_time'] = pd.to_datetime(df['close_time'], unit='ms')
    df.set_index('open_time', inplace=True)
    dfs.append(df)
df = pd.concat(dfs)
df = df[~df.index.duplicated(keep='first')].sort_index()

df.to_csv(f"{config['pair_id']}_klines_{config['symbol']}_{config['dfreq']}.csv")

print(f"{config['pair_id']}_klines_{config['symbol']}_{config['dfreq']}.csv")

100%|██████████| 167/167 [00:00<00:00, 75773.34it/s]


0_klines_ETHBTC_15m.csv


In [5]:
len(df)

138309