# Data Downloader

In [1]:
# Imports
import time
import pandas as pd
from binance.client import Client
import datetime
import json
from IPython.display import clear_output

def get_api_keys(site: str, api_type: str)->str:
    """
    gets api keys stored in api-keys/api-keys.txt
    site: 'binance'
    api_type: 'api', 'secret'
    """
    with open('../api-keys/api-keys.txt') as json_file:
        return json.load(json_file)[site][api_type]
# Constants
api_key = get_api_keys("binance", "api")
api_secret = get_api_keys("binance", "secret")

bclient = Client(api_key=api_key, api_secret=api_secret)

## Download agg-trades-data

In [None]:
group = 99
index = 495000000
limit = 500
i_limit = 10000
i = i_limit
while True:
    agg_trade_list = []
    
    while i>0:
        while True:
            try:
                start_time = time.time()
                agg_trades = bclient.get_aggregate_trades(symbol='BTCUSDT', fromId=str(index), limit=limit)
                agg_trade_list += list(agg_trades)
                break
            except:
                print(f"FAILED for: {index}, sleeping 10 seconds")
                time.sleep(10)
        clear_output()
        print(f"index got: {index}, i is: {i}")
        index += limit
        i -= 1
        print("--- %s seconds ABOVE 0.05 seconds---" % (time.time() - start_time))
    print(f"saving group {group} up to index {index}")
    if agg_trade_list==[]:
        break
    df = pd.DataFrame(agg_trade_list)
    name = f"../data/BTCUSDT-trades-{str(group)}.csv"
    df.to_csv(name, index=False)
    df = None
    group += 1
    i = i_limit
    bclient = Client(api_key=api_key, api_secret=api_secret)


index got: 625977500, i is: 8045
--- 0.12797284126281738 seconds ABOVE 0.05 seconds---


## Download klines

In [None]:
def binance_downloader(symbol:str, kline_interval:object, interval_name:str, path="data/", start="1 Jan 1900"):
    """
    downloads binance data
    symbol: BTCUSDT
    kline_interval: Client.KLINE_INTERVAL_1DAY, Client.KLINE_INTERVAL_1DAY, Client.KLINE_INTERVAL_1DAY
    interval_name: only used for csv name: BTCUSDT-interval_name.csv
    start: empty if from the very beginning
    """
    start_date = datetime.datetime.strptime(start, '%d %b %Y')
    today = datetime.datetime.today()
    filename = "../data/" + symbol + "-" + interval_name + ".csv"
    print(f"downloading {filename}", end='\r')
#     klines = bclient.get_klines(symbol='BTCUSDT', interval=Client.KLINE_INTERVAL_1MINUTE)
    klines = bclient.get_historical_klines(symbol, kline_interval, start_date.strftime("%d %b %Y %H:%M:%S"), today.strftime("%d %b %Y %H:%M:%S"), 1000)
    data = pd.DataFrame(klines, columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore' ])
    data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ms')
    data.set_index('timestamp', inplace=True)
    data.to_csv(filename)
    print(f"downloaded {filename}   ")

In [7]:
start_time = time.time()
binance_downloader(symbol="BTCBUSD", 
                   kline_interval=Client.KLINE_INTERVAL_1MONTH, 
                   interval_name="month-test", #hour
                   start="1 Jan 1900")

downloaded ../data/BTCBUSD-month-test.csv   
--- 0.27462100982666016 seconds ---
