# Binance API

https://python-binance.readthedocs.io/en/latest/binance.html

In [4]:
import requests, os, json, time, sys, calendar
import pandas as pd
from datetime import datetime, timedelta

In [50]:
def print_state(df, from_date, current_date, current_date2, to_date):
    print('•', round(100*(current_date-from_date)/(to_date-from_date), 2),
                  '%  • Total nb trades fetched: ' , len(df),
                  '  • Last date: ', current_date2 ,sep='', end='\r')
    
# get the 100th trade instead of the first for sanity reason
def get_date_of_first_trade(symbol):
    r = requests.get('https://api.binance.com/api/v3/klines', 
    params = {"symbol" : symbol, "interval": '1d',
              "startTime": 0, "limit":100})        

    response = r.json()
    df = pd.DataFrame(response)
    return datetime.utcfromtimestamp((df.iloc[99][0])/1000.0)

def get_unix_ms_from_date(date):
    return int(calendar.timegm(date.timetuple()) * 1000 + date.microsecond/1000)

def get_first_trade_id_from_start_date(symbol, from_date, delta=1):    
    #small delta since the 1000 trades kept are the last 1000
    new_end_date = from_date + timedelta(seconds=delta)
    r = requests.get('https://api.binance.com/api/v3/aggTrades', 
        params = {"symbol" : symbol, "startTime": get_unix_ms_from_date(from_date),
                  "endTime": get_unix_ms_from_date(new_end_date), "limit":1000})        
    #print(r.url)
    if r.status_code != 200:
        print('somethings wrong!', r.status_code)
        print('sleeping for 10s... will retry')
        time.sleep(10)
        return get_first_trade_id_from_start_date(symbol, from_date)
        
    response = r.json()
    
    if(response==[]):
        print('no trade in the range, increase end date')
        return get_first_trade_id_from_start_date(symbol, from_date, delta+5)
        
    df = pd.DataFrame(response)
    if len(df) > 0: return df.iloc[0]['a']
    else: raise Exception('no trades found')

In [56]:
# requests are limited to 1000 trades --> We fetch in packets of 1000 and remove the extras
def fetch_trades(symbol, from_date, to_date, save=False):
    
    genesis_trade = get_date_of_first_trade(symbol)
    if from_date<genesis_trade:
        from_date=genesis_trade
    
    if to_date<from_date:
        print('Error: to_date < from_date')
        return None
    
    from_id = get_first_trade_id_from_start_date(symbol, from_date)
    current_time = 0
    df = pd.DataFrame()
    
    while current_time < get_unix_ms_from_date(to_date):
        try:
            r = requests.get("https://api.binance.com/api/v3/aggTrades",
                             params = {"symbol": symbol, "limit": 1000, "fromId": from_id})
            #print(r.url)
            if r.status_code != 200: 
                retry(r, get_historical_trades(symbol, from_id))
                print('somethings wrong!', r.status_code)
                print('sleeping for 10s... will retry')
                time.sleep(10)
                get_historical_trades(symbol, from_id)
        
            trades = r.json()
            
            # get last trade id to continue here on next loop
            from_id = trades[-1]['a']
            current_time = trades[-1]['T']
            current_date = datetime.utcfromtimestamp(current_time/1000.0)
            df = pd.concat([df, pd.DataFrame(trades)])
            
            print_state(df, from_date, current_date, current_date, to_date)

            #dont exceed request limits
            time.sleep(0.2)
        except Exception:
            print('somethings wrong....... sleeping for 15s')
            time.sleep(15)

    df.drop_duplicates(subset='a', inplace=True)
    df.columns = ["trade_id", "price", "volume", "first_id", "last_id", "timestamp", "is_buyer_maker", "is_trade_best_price"]
    df.timestamp = list(map(lambda elem: datetime.utcfromtimestamp(elem/1000.0), df.timestamp))
    df = df[df.timestamp<=to_date]
    print_state(df, from_date, to_date, df.iloc[-1].timestamp, to_date)
    df.set_index('timestamp', inplace=True)
    
    if save:
        if not os.path.exists(f'data.nosync/{symbol}'):
            os.makedirs(f'data.nosync/{symbol}')
        name = str(f'from_{from_date})_to_{to_date})'.replace(':', '-').replace(' ', '('))
        df.to_hdf(f'data.nosync/{symbol}/{name}.h5', key='s')  
    return df

### Example not saving data:

From 01/12/2020 11:00:00 to 01/12/2020 11:03:00

In [54]:
symbol = 'BTCUSDT'
from_date = datetime(2020, 12, 1, 11, 0)
to_date = datetime(2020, 12, 1, 11, 3)
df = fetch_trades(symbol, from_date, to_date, save=False)
df.head()

•100.0%  • Total nb trades fetched: 4023  • Last date: 2020-12-01 11:02:59.8820000

Unnamed: 0_level_0,trade_id,price,volume,first_id,last_id,is_buyer_maker,is_trade_best_price
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-12-01 11:00:00.014,444198688,19739.51,0.3,492095282,492095282,False,True
2020-12-01 11:00:00.318,444198689,19739.5,0.002883,492095283,492095283,True,True
2020-12-01 11:00:00.318,444198690,19739.36,0.00073,492095284,492095284,True,True
2020-12-01 11:00:00.318,444198691,19738.67,1.7e-05,492095285,492095285,True,True
2020-12-01 11:00:00.617,444198692,19739.1,0.002753,492095286,492095286,False,True


### Example from data 

In [60]:
from_date = datetime(2016, 12, 1, 11, 0)
to_date = datetime(2017, 11, 25, 0, 0)
df = fetch_trades(symbol, from_date, to_date, save=False)
df.head()

•100.0%  • Total nb trades fetched: 18662  • Last date: 2017-11-24 23:59:59.485000

Unnamed: 0_level_0,trade_id,price,volume,first_id,last_id,is_buyer_maker,is_trade_best_price
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-11-24 00:00:00.824,915450,8019.97,0.092977,990308,990308,False,True
2017-11-24 00:00:00.824,915451,8019.98,0.031107,990309,990309,False,True
2017-11-24 00:00:01.193,915452,8000.03,0.000557,990310,990310,True,True
2017-11-24 00:00:01.193,915453,8000.02,0.034591,990311,990311,True,True
2017-11-24 00:00:01.210,915454,8000.02,0.0352,990312,990312,True,True


### Exemple to read data

In [None]:
#pd.read_hdf('data.nosync/BTCUSDT/from_2020-12-01(11-00-00)_to_2020-12-01(11-03-00).h5')

datetime.datetime(2017, 11, 24, 0, 0)