# This is only a test of getting data and saving to csv

In [1]:
from binance.client import Client
from enum import Enum, unique

@unique
class DataType(Enum):
    INTERVAL_1MINUTE = Client.KLINE_INTERVAL_1MINUTE
    INTERVAL_3MINUTE = Client.KLINE_INTERVAL_3MINUTE
    INTERVAL_5MINUTE = Client.KLINE_INTERVAL_5MINUTE
    INTERVAL_15MINUTE = Client.KLINE_INTERVAL_15MINUTE
    INTERVAL_30MINUTE = Client.KLINE_INTERVAL_30MINUTE
    INTERVAL_1HOUR = Client.KLINE_INTERVAL_1HOUR
    INTERVAL_2HOUR = Client.KLINE_INTERVAL_2HOUR
    INTERVAL_4HOUR = Client.KLINE_INTERVAL_4HOUR
    INTERVAL_6HOUR = Client.KLINE_INTERVAL_6HOUR
    INTERVAL_8HOUR = Client.KLINE_INTERVAL_8HOUR
    INTERVAL_12HOUR = Client.KLINE_INTERVAL_12HOUR
    INTERVAL_1DAY = Client.KLINE_INTERVAL_1DAY
    INTERVAL_3DAY = Client.KLINE_INTERVAL_3DAY
    INTERVAL_1WEEK = Client.KLINE_INTERVAL_1WEEK
    INTERVAL_1MONTH = Client.KLINE_INTERVAL_1MONTH

interval = DataType.INTERVAL_1DAY 
print(interval.value)

1d


In [2]:
from datetime import datetime, timedelta
import dateparser
import pytz

def date_to_milliseconds(date_str):
    """Convert UTC date to milliseconds

    If using offset strings add "UTC" to date string e.g. "now UTC", "11 hours ago UTC"

    See dateparse docs for formats http://dateparser.readthedocs.io/en/latest/

    :param date_str: date in readable format, i.e. "January 01, 2018", "11 hours ago UTC", "now UTC"
    :type date_str: str
    """
    # get epoch value in UTC
    epoch = datetime.utcfromtimestamp(0).replace(tzinfo=pytz.utc)
    # parse our date string
    d = dateparser.parse(date_str)
    # if the date is not timezone aware apply UTC timezone
    if d.tzinfo is None or d.tzinfo.utcoffset(d) is None:
        d = d.replace(tzinfo=pytz.utc)

    # return the difference in time
    return int((d - epoch).total_seconds() * 1000.0)

def milliseconds_to_date(ms: int) -> str:
    # get epoch value in UTC
    epoch = datetime.utcfromtimestamp(0)
    return str(epoch + timedelta(milliseconds=ms))

milliseconds_to_date(date_to_milliseconds("now"))


'2022-05-21 22:24:18.279000'

In [3]:
def interval_to_milliseconds(interval: DataType):
    """Convert a Binance interval string to milliseconds

    :param interval: Binance interval string 1m, 3m, 5m, 15m, 30m, 1h, 2h, 4h, 6h, 8h, 12h, 1d, 3d, 1w
    :type interval: str

    :return:
        None if unit not one of m, h, d or w
        None if string not in correct format
        int value of interval in milliseconds
    """
    ms = None
    seconds_per_unit = {
        "m": 60,
        "h": 60 * 60,
        "d": 24 * 60 * 60,
        "w": 7 * 24 * 60 * 60
    }

    unit = interval.value[-1]
    if unit in seconds_per_unit:
        try:
            ms = int(interval.value[:-1]) * seconds_per_unit[unit] * 1000
        except ValueError:
            pass
    return ms

interval_to_milliseconds(DataType.INTERVAL_1DAY)

86400000

In [4]:
import time

def get_historical_klines(symbol, interval: DataType, start_str, end_str=None):
    """Get Historical Klines from Binance

    See dateparse docs for valid start and end string formats http://dateparser.readthedocs.io/en/latest/

    If using offset strings for dates add "UTC" to date string e.g. "now UTC", "11 hours ago UTC"

    :param symbol: Name of symbol pair e.g BNBBTC
    :type symbol: str
    :param interval: Biannce Kline interval
    :type interval: str
    :param start_str: Start date string in UTC format
    :type start_str: str
    :param end_str: optional - end date string in UTC format
    :type end_str: str

    :return: list of OHLCV values

    """
    proxies = {
        "http": "http://127.0.0.1:8900",
        "https": "http://127.0.0.1:8900",
    }

    # create the Binance client, no need for api key
    client = Client("", "",  {'proxies': proxies})

    # init our list
    output_data = []

    # setup the max limit
    limit = 500

    # convert interval to useful value in seconds
    timeframe = interval_to_milliseconds(interval)

    # convert our date strings to milliseconds
    start_ts = date_to_milliseconds(start_str)

    # if an end time was passed convert it
    end_ts = None
    if end_str:
        end_ts = date_to_milliseconds(end_str)

    idx = 0
    # it can be difficult to know when a symbol was listed on Binance so allow start time to be before list date
    symbol_existed = False
    while True:
        # fetch the klines from start_ts up to max 500 entries or the end_ts if set
        temp_data = client.get_klines(
            symbol=symbol,
            interval=interval.value,
            limit=limit,
            startTime=start_ts,
            endTime=end_ts
        )

        # handle the case where our start date is before the symbol pair listed on Binance
        if not symbol_existed and len(temp_data):
            symbol_existed = True

        if symbol_existed:
            # append this loops data to our output data
            output_data += temp_data

            # update our start timestamp using the last value in the array and add the interval timeframe
            start_ts = temp_data[len(temp_data) - 1][0] + timeframe
        else:
            # it wasn't listed yet, increment our start date
            start_ts += timeframe

        idx += 1
        # check if we received less than the required limit and exit the loop
        if len(temp_data) < limit:
            # exit the while loop
            break

        # sleep after every 3rd call to be kind to the API
        if idx % 3 == 0:
            time.sleep(1)

    return output_data
    
symbol = "BTCUSDT"
interval = DataType.INTERVAL_1DAY
start = "10 days ago"
end = "now"
data_list = get_historical_klines(symbol, interval, start, end)
data_list

[[1652313600000,
  '29103.94000000',
  '30243.00000000',
  '26700.00000000',
  '29029.75000000',
  '204507.26313800',
  1652399999999,
  '5795451310.94557822',
  3951233,
  '101465.27825700',
  '2877462252.21222825',
  '0'],
 [1652400000000,
  '29029.74000000',
  '31083.37000000',
  '28751.67000000',
  '29287.05000000',
  '97872.36957000',
  1652486399999,
  '2956855804.85611210',
  2051900,
  '48827.72764000',
  '1475641181.96274990',
  '0'],
 [1652486400000,
  '29287.05000000',
  '30343.27000000',
  '28630.00000000',
  '30086.74000000',
  '51095.87863000',
  1652572799999,
  '1501483453.50089050',
  1204626,
  '25908.17135000',
  '761545954.19045650',
  '0'],
 [1652572800000,
  '30086.74000000',
  '31460.00000000',
  '29480.00000000',
  '31328.89000000',
  '46275.66912000',
  1652659199999,
  '1401499926.23041760',
  1016769,
  '24163.36200000',
  '732332763.60944860',
  '0'],
 [1652659200000,
  '31328.89000000',
  '31328.90000000',
  '29087.04000000',
  '29874.01000000',
  '73082.19

In [5]:
import pandas as pd
data_df = pd.DataFrame(data_list, columns =['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_assert_volume',
                                  'number_of_trades', 'taker_buy_volume', 'taker_buy_quote_asset_volume', 'ignore'])
data_df = data_df.drop(columns=['ignore'])
# data_df[['open_time', 'close_time']].apply(lambda x: x.map(milliseconds_to_date))
data_df


Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_assert_volume,number_of_trades,taker_buy_volume,taker_buy_quote_asset_volume
0,1652313600000,29103.94,30243.0,26700.0,29029.75,204507.263138,1652399999999,5795451310.945578,3951233,101465.278257,2877462252.212228
1,1652400000000,29029.74,31083.37,28751.67,29287.05,97872.36957,1652486399999,2956855804.856112,2051900,48827.72764,1475641181.96275
2,1652486400000,29287.05,30343.27,28630.0,30086.74,51095.87863,1652572799999,1501483453.5008905,1204626,25908.17135,761545954.1904565
3,1652572800000,30086.74,31460.0,29480.0,31328.89,46275.66912,1652659199999,1401499926.2304175,1016769,24163.362,732332763.6094487
4,1652659200000,31328.89,31328.9,29087.04,29874.01,73082.19658,1652745599999,2185567735.6820626,1561954,36284.05379,1085179422.7219367
5,1652745600000,29874.01,30788.37,29450.38,30444.93,56724.13307,1652831999999,1715109131.0802064,1175382,28600.36238,865031716.5404288
6,1652832000000,30444.93,30709.99,28654.47,28715.32,59749.15799,1652918399999,1762843836.1269379,1379212,29501.76769,870623227.207057
7,1652918400000,28715.33,30545.18,28691.38,30319.23,67877.36415,1653004799999,2014360135.785272,1860780,35339.65787,1049163442.9020371
8,1653004800000,30319.22,30777.33,28730.0,29201.01,60517.25325,1653091199999,1800589029.270878,1694004,30890.40127,919467696.6181592
9,1653091200000,29201.01,29566.0,28947.28,29302.58,11837.51499,1653177599999,346633278.1638538,374473,5882.1109,172253532.4864833


In [6]:
file_loc = 'data/{}_{}.csv'.format(symbol, interval.value)
data_df.to_csv(file_loc, index=False)

Test reading from csv

In [7]:
history_data = pd.read_csv(file_loc)
history_data

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_assert_volume,number_of_trades,taker_buy_volume,taker_buy_quote_asset_volume
0,1652313600000,29103.94,30243.0,26700.0,29029.75,204507.263138,1652399999999,5795451000.0,3951233,101465.278257,2877462000.0
1,1652400000000,29029.74,31083.37,28751.67,29287.05,97872.36957,1652486399999,2956856000.0,2051900,48827.72764,1475641000.0
2,1652486400000,29287.05,30343.27,28630.0,30086.74,51095.87863,1652572799999,1501483000.0,1204626,25908.17135,761546000.0
3,1652572800000,30086.74,31460.0,29480.0,31328.89,46275.66912,1652659199999,1401500000.0,1016769,24163.362,732332800.0
4,1652659200000,31328.89,31328.9,29087.04,29874.01,73082.19658,1652745599999,2185568000.0,1561954,36284.05379,1085179000.0
5,1652745600000,29874.01,30788.37,29450.38,30444.93,56724.13307,1652831999999,1715109000.0,1175382,28600.36238,865031700.0
6,1652832000000,30444.93,30709.99,28654.47,28715.32,59749.15799,1652918399999,1762844000.0,1379212,29501.76769,870623200.0
7,1652918400000,28715.33,30545.18,28691.38,30319.23,67877.36415,1653004799999,2014360000.0,1860780,35339.65787,1049163000.0
8,1653004800000,30319.22,30777.33,28730.0,29201.01,60517.25325,1653091199999,1800589000.0,1694004,30890.40127,919467700.0
9,1653091200000,29201.01,29566.0,28947.28,29302.58,11837.51499,1653177599999,346633300.0,374473,5882.1109,172253500.0
