# Binance API

https://python-binance.readthedocs.io/en/latest/binance.html

In [1]:
import requests, os, json, time, sys, calendar
import pandas as pd
from datetime import datetime, timedelta

In [2]:
r = requests.get("https://api.binance.com/api/v3/depth",
                 params = {"symbol": 'BTCUSDT', "limit": 5})
r.json()

{'lastUpdateId': 7780617324,
 'bids': [['33214.20000000', '0.15548900'],
  ['33212.53000000', '0.00541900'],
  ['33211.33000000', '0.02977700'],
  ['33211.32000000', '0.07462800'],
  ['33205.64000000', '0.02077700']],
 'asks': [['33214.21000000', '0.01550000'],
  ['33216.17000000', '0.10113300'],
  ['33218.14000000', '0.15935600'],
  ['33218.80000000', '0.00813800'],
  ['33219.28000000', '0.15000000']]}

In [3]:
r = requests.get("https://api.binance.com/api/v3/ticker/bookTicker",
                 params = {"symbol": 'BTCUSDT'})
r.json()

{'symbol': 'BTCUSDT',
 'bidPrice': '33214.20000000',
 'bidQty': '0.09513800',
 'askPrice': '33214.21000000',
 'askQty': '0.47617000'}

In [17]:
(4/6-0.4*5.25/6)/0.6*100

52.77777777777777

In [4]:
def print_state(df, from_date, current_date, current_date2, to_date):
    print('•', round(100*(current_date-from_date)/(to_date-from_date), 2),
                  '%  • Total nb trades fetched: ' , len(df),
                  '  • Last date: ', current_date2 ,sep='', end='\r')
    
# get the 100th trade instead of the first for sanity reason
def get_date_of_first_trade(symbol):
    r = requests.get('https://api.binance.com/api/v3/klines', 
    params = {"symbol" : symbol, "interval": '1d',
              "startTime": 0, "limit":100})        

    response = r.json()
    df = pd.DataFrame(response)
    return datetime.utcfromtimestamp((df.iloc[99][0])/1000.0)

def get_unix_ms_from_date(date):
    return int(calendar.timegm(date.timetuple()) * 1000 + date.microsecond/1000)

def get_first_trade_id_from_start_date(symbol, from_date, delta=1):    
    #small delta since the 1000 trades kept are the last 1000
    new_end_date = from_date + timedelta(seconds=delta)
    r = requests.get('https://api.binance.com/api/v3/aggTrades', 
        params = {"symbol" : symbol, "startTime": get_unix_ms_from_date(from_date),
                  "endTime": get_unix_ms_from_date(new_end_date), "limit":1000})        
    #print(r.url)
    if r.status_code != 200:
        print('somethings wrong!', r.status_code)
        print('sleeping for 10s... will retry')
        time.sleep(10)
        return get_first_trade_id_from_start_date(symbol, from_date)
        
    response = r.json()
    
    if(response==[]):
        print('no trade in the range, increase end date')
        return get_first_trade_id_from_start_date(symbol, from_date, delta+5)
        
    df = pd.DataFrame(response)
    if len(df) > 0: return df.iloc[0]['a']
    else: raise Exception('no trades found')

In [5]:
# requests are limited to 1000 trades --> We fetch in packets of 1000 and remove the extras
def fetch_trades(symbol, from_date, to_date, save=False):
    
    genesis_trade = get_date_of_first_trade(symbol)
    if from_date<genesis_trade:
        delta=to_date-from_date
        from_date=genesis_trade
        to_date=from_date+delta
    
    if to_date<from_date:
        print('Error: to_date < from_date')
        return None
    
    from_id = get_first_trade_id_from_start_date(symbol, from_date)
    current_time = 0
    df = pd.DataFrame()
    
    while current_time < get_unix_ms_from_date(to_date):
        try:
            r = requests.get("https://api.binance.com/api/v3/aggTrades",
                             params = {"symbol": symbol, "limit": 1000, "fromId": from_id})
            #print(r.url)
            if r.status_code != 200: 
                retry(r, get_historical_trades(symbol, from_id))
                print('somethings wrong!', r.status_code)
                print('sleeping for 10s... will retry')
                time.sleep(10)
                get_historical_trades(symbol, from_id)
        
            trades = r.json()
            
            # get last trade id to continue here on next loop
            from_id = trades[-1]['a']
            current_time = trades[-1]['T']
            current_date = datetime.utcfromtimestamp(current_time/1000.0)
            df = pd.concat([df, pd.DataFrame(trades)])
            
            print_state(df, from_date, current_date, current_date, to_date)

            #dont exceed request limits
            time.sleep(0.2)
        except Exception:
            print('somethings wrong....... sleeping for 15s')
            time.sleep(15)

    df.drop_duplicates(subset='a', inplace=True)
    df.columns = ["trade_id", "price", "volume", "first_id", "last_id", "timestamp", "is_buyer_maker", "is_trade_best_price"]
    df.timestamp = list(map(lambda elem: datetime.utcfromtimestamp(elem/1000.0), df.timestamp))
    df = df[df.timestamp<=to_date]
    print_state(df, from_date, to_date, df.iloc[-1].timestamp, to_date)
    df.set_index('timestamp', inplace=True)
    
    if save:
        if not os.path.exists(f'data.nosync/{symbol}'):
            os.makedirs(f'data.nosync/{symbol}')
        #name = str(f'from_{from_date})_to_{to_date})'.replace(':', '-').replace(' ', '('))
        name = str(from_date)[:10]
        df.to_hdf(f'data.nosync/{symbol}/{name}.h5', key='s')  
    return df

### Exemple to read data

In [6]:
#pd.read_hdf('data.nosync/BTCUSDT/2020-12-01.h5')

### Download 2020 BTCUSDT

In [11]:
symbols = ['LINKUSDT']
for symbol in symbols:
    from_date = datetime(2021, 1, 6, 0, 0)
    while from_date<datetime(2021, 1, 10, 0, 0):
        to_date = from_date + timedelta(days=1)
        fetch_trades(symbol, from_date, to_date, save=True)
        from_date = to_date

•100.0%  • Total nb trades fetched: 288170  • Last date: 2021-01-09 23:59:59.8100000