In [1]:
import requests
import xml.etree.ElementTree as ET
from tqdm import tqdm
import pandas as pd
import os
from zipfile import ZipFile
import io

In [2]:
config = {
    'symbol':'ETHUSDT',
    'dfreq':'15m',
    'pair_id':2, # this is the pair_id foreign key in the database
}

In [3]:
def download_files(config):
    ns = '{http://s3.amazonaws.com/doc/2006-03-01/}'
    file_list = []
    for ufreq in ['daily','monthly']:
        # aggTrades instead of klines is OK too, but need to update the columns next cell
        url = f"https://s3-ap-northeast-1.amazonaws.com/data.binance.vision?delimiter=/&prefix=data/spot/{ufreq}/klines/{config['symbol']}/{config['dfreq']}/"
        req = requests.get(url)
        root = ET.fromstring(req.content)
        file_list += [item.text for item in root.findall(f"{ns}Contents/{ns}Key",) if item.text.endswith('.zip')]
    
    ret_list = []
    for f in tqdm(file_list):
        f_ = 'temp/' + str(config['pair_id']) + "_" + f.replace("/","_")
        ret_list.append(f_)
        if not os.path.exists(f_):
            furl = f'https://data.binance.vision/{f}'
            r = requests.get(furl, allow_redirects=True)
            open(f_, 'wb').write(r.content)

    return ret_list

In [4]:
columns = ['open_time','open','high','low','close','volume','close_time','quote_asset_volume',
         'number_of_trades','taker_buy_base_asset_volume','taker_buy_quote_asset_volume','pair_id'] # for klines
dfs = []
for f in download_files(config):
    input_zip=ZipFile(f)
    data = [input_zip.read(name) for name in input_zip.namelist()][0]
    df = pd.read_csv(io.BytesIO(data), names=columns)
    df['pair_id'] = config['pair_id']
    df['open_time'] = pd.to_datetime(df['open_time'], unit='ms')
    df['close_time'] = pd.to_datetime(df['close_time'], unit='ms')
    df.set_index('open_time', inplace=True)
    dfs.append(df)
df = pd.concat(dfs)
df = df[~df.index.duplicated(keep='first')].sort_index()
df = df.tz_localize('UTC')

df.to_csv(f"{config['pair_id']}_klines_{config['symbol']}_{config['dfreq']}.csv")

print(f"{config['pair_id']}_klines_{config['symbol']}_{config['dfreq']}.csv")

100%|██████████| 167/167 [00:00<00:00, 59749.96it/s]


2_klines_ETHUSDT_15m.csv


In [5]:
len(df)

135101

In [6]:
df

Unnamed: 0_level_0,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,pair_id
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017-08-17 04:00:00+00:00,301.13,301.13,298.00,298.00,5.80167,2017-08-17 04:14:59.999,1.744766e+03,22,5.48392,1.649449e+03,2
2017-08-17 04:15:00+00:00,298.00,300.80,298.00,299.39,31.44065,2017-08-17 04:29:59.999,9.396918e+03,26,12.11712,3.625167e+03,2
2017-08-17 04:30:00+00:00,299.39,300.79,299.39,299.60,52.93579,2017-08-17 04:44:59.999,1.585108e+04,39,28.38159,8.499793e+03,2
2017-08-17 04:45:00+00:00,299.60,302.57,299.60,301.61,35.49066,2017-08-17 04:59:59.999,1.069204e+04,42,34.58114,1.041903e+04,2
2017-08-17 05:00:00+00:00,301.61,302.57,300.95,302.01,81.69235,2017-08-17 05:14:59.999,2.462072e+04,52,80.26344,2.418975e+04,2
...,...,...,...,...,...,...,...,...,...,...,...
2021-06-29 22:45:00+00:00,2180.43,2191.48,2170.58,2170.71,7810.79060,2021-06-29 22:59:59.999,1.704841e+07,17380,4045.31809,8.832929e+06,2
2021-06-29 23:00:00+00:00,2170.83,2183.87,2166.17,2166.80,6961.08403,2021-06-29 23:14:59.999,1.513453e+07,10450,2991.42554,6.504847e+06,2
2021-06-29 23:15:00+00:00,2166.81,2176.10,2165.47,2174.73,3942.84901,2021-06-29 23:29:59.999,8.561939e+06,6965,1786.18576,3.878385e+06,2
2021-06-29 23:30:00+00:00,2174.73,2183.00,2174.37,2175.29,4266.75855,2021-06-29 23:44:59.999,9.294051e+06,6740,2223.68471,4.843582e+06,2
