# This is only a test of getting data and saving to csv

In [2]:
from binance.client import Client
from enum import Enum, unique

@unique
class DataType(Enum):
    INTERVAL_1MINUTE = Client.KLINE_INTERVAL_1MINUTE
    INTERVAL_3MINUTE = Client.KLINE_INTERVAL_3MINUTE
    INTERVAL_5MINUTE = Client.KLINE_INTERVAL_5MINUTE
    INTERVAL_15MINUTE = Client.KLINE_INTERVAL_15MINUTE
    INTERVAL_30MINUTE = Client.KLINE_INTERVAL_30MINUTE
    INTERVAL_1HOUR = Client.KLINE_INTERVAL_1HOUR
    INTERVAL_2HOUR = Client.KLINE_INTERVAL_2HOUR
    INTERVAL_4HOUR = Client.KLINE_INTERVAL_4HOUR
    INTERVAL_6HOUR = Client.KLINE_INTERVAL_6HOUR
    INTERVAL_8HOUR = Client.KLINE_INTERVAL_8HOUR
    INTERVAL_12HOUR = Client.KLINE_INTERVAL_12HOUR
    INTERVAL_1DAY = Client.KLINE_INTERVAL_1DAY
    INTERVAL_3DAY = Client.KLINE_INTERVAL_3DAY
    INTERVAL_1WEEK = Client.KLINE_INTERVAL_1WEEK
    INTERVAL_1MONTH = Client.KLINE_INTERVAL_1MONTH

interval = DataType.INTERVAL_1DAY 
print(interval.value)

1d


In [3]:
from datetime import datetime, timedelta
import dateparser
import pytz

def date_to_milliseconds(date_str):
    """Convert UTC date to milliseconds

    If using offset strings add "UTC" to date string e.g. "now UTC", "11 hours ago UTC"

    See dateparse docs for formats http://dateparser.readthedocs.io/en/latest/

    :param date_str: date in readable format, i.e. "January 01, 2018", "11 hours ago UTC", "now UTC"
    :type date_str: str
    """
    # get epoch value in UTC
    epoch = datetime.utcfromtimestamp(0).replace(tzinfo=pytz.utc)
    # parse our date string
    d = dateparser.parse(date_str)
    # if the date is not timezone aware apply UTC timezone
    if d.tzinfo is None or d.tzinfo.utcoffset(d) is None:
        d = d.replace(tzinfo=pytz.utc)

    # return the difference in time
    return int((d - epoch).total_seconds() * 1000.0)

def milliseconds_to_date(ms: int) -> str:
    # get epoch value in local time
    epoch = datetime.fromtimestamp(0)
    return str(epoch + timedelta(milliseconds=ms))

milliseconds_to_date(date_to_milliseconds("now UTC+8"))
# milliseconds_to_date(1653222960000)
# date_to_milliseconds("now")


'2022-05-24 22:13:12.370000'

In [4]:
def interval_to_milliseconds(interval: DataType):
    """Convert a Binance interval string to milliseconds

    :param interval: Binance interval string 1m, 3m, 5m, 15m, 30m, 1h, 2h, 4h, 6h, 8h, 12h, 1d, 3d, 1w
    :type interval: str

    :return:
        None if unit not one of m, h, d or w
        None if string not in correct format
        int value of interval in milliseconds
    """
    ms = None
    seconds_per_unit = {
        "m": 60,
        "h": 60 * 60,
        "d": 24 * 60 * 60,
        "w": 7 * 24 * 60 * 60
    }

    unit = interval.value[-1]
    if unit in seconds_per_unit:
        try:
            ms = int(interval.value[:-1]) * seconds_per_unit[unit] * 1000
        except ValueError:
            pass
    return ms

interval_to_milliseconds(DataType.INTERVAL_1DAY)

86400000

In [5]:
import time

def get_historical_klines(symbol, interval: DataType, start_str, end_str=None):
    """Get Historical Klines from Binance

    See dateparse docs for valid start and end string formats http://dateparser.readthedocs.io/en/latest/

    If using offset strings for dates add "UTC" to date string e.g. "now UTC", "11 hours ago UTC"

    :param symbol: Name of symbol pair e.g BNBBTC
    :type symbol: str
    :param interval: Biannce Kline interval
    :type interval: str
    :param start_str: Start date string in UTC format
    :type start_str: str
    :param end_str: optional - end date string in UTC format
    :type end_str: str

    :return: list of OHLCV values

    """
    proxies = {
        "http": "http://127.0.0.1:8900",
        "https": "http://127.0.0.1:8900",
    }

    # create the Binance client, no need for api key
    client = Client("", "",  {'proxies': proxies})

    # init our list
    output_data = []

    # setup the max limit
    limit = 500

    # convert interval to useful value in seconds
    timeframe = interval_to_milliseconds(interval)

    # convert our date strings to milliseconds
    start_ts = date_to_milliseconds(start_str)

    # if an end time was passed convert it
    end_ts = None
    if end_str:
        end_ts = date_to_milliseconds(end_str)

    idx = 0
    # it can be difficult to know when a symbol was listed on Binance so allow start time to be before list date
    symbol_existed = False
    while True:
        # fetch the klines from start_ts up to max 500 entries or the end_ts if set
        temp_data = client.get_klines(
            symbol=symbol,
            interval=interval.value,
            limit=limit,
            startTime=start_ts,
            endTime=end_ts
        )

        # handle the case where our start date is before the symbol pair listed on Binance
        if not symbol_existed and len(temp_data):
            symbol_existed = True

        if symbol_existed:
            # append this loops data to our output data
            output_data += temp_data

            # update our start timestamp using the last value in the array and add the interval timeframe
            start_ts = temp_data[len(temp_data) - 1][0] + timeframe
        else:
            # it wasn't listed yet, increment our start date
            start_ts += timeframe

        idx += 1
        # check if we received less than the required limit and exit the loop
        if len(temp_data) < limit:
            # exit the while loop
            break

        # sleep after every 3rd call to be kind to the API
        if idx % 3 == 0:
            time.sleep(1)

    return output_data
    
symbol = "BTCBUSD"
interval = DataType.INTERVAL_1MINUTE
start = "10 minutes ago UTC+8"
end = "7 minutes ago UTC+8"
data_list = get_historical_klines(symbol, interval, start, end)
data_list

[[1653401040000,
  '28952.45000000',
  '28953.24000000',
  '28886.23000000',
  '28918.36000000',
  '29.17111000',
  1653401099999,
  '843705.34758050',
  651,
  '12.77976000',
  '369602.04789640',
  '0'],
 [1653401100000,
  '28918.34000000',
  '28954.70000000',
  '28878.48000000',
  '28890.41000000',
  '76.79360000',
  1653401159999,
  '2220226.81876930',
  812,
  '27.09509000',
  '783360.93187570',
  '0'],
 [1653401160000,
  '28893.06000000',
  '28894.88000000',
  '28800.00000000',
  '28807.31000000',
  '124.40390000',
  1653401219999,
  '3586257.69918880',
  2555,
  '53.05909000',
  '1529751.18677910',
  '0']]

In [16]:
import pandas as pd
data_df = pd.DataFrame(data_list, columns =['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_assert_volume',
                                  'number_of_trades', 'taker_buy_volume', 'taker_buy_quote_asset_volume', 'ignore'])
# data_df = data_df.drop(columns=['ignore'])
del data_df['ignore']
# data_df[['open_time', 'close_time']].apply(lambda x: x.map(milliseconds_to_date))
# data_df['open_time'].map(milliseconds_to_date)
data_df
data_df.iloc[-2:].reset_index(drop=True)


Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_assert_volume,number_of_trades,taker_buy_volume,taker_buy_quote_asset_volume
0,1653401100000,28918.34,28954.7,28878.48,28890.41,76.7936,1653401159999,2220226.8187693,812,27.09509,783360.9318757
1,1653401160000,28893.06,28894.88,28800.0,28807.31,124.4039,1653401219999,3586257.6991888,2555,53.05909,1529751.1867791


In [6]:
# import os
# file_loc = 'data/{}_{}.csv'.format(symbol, interval.value)
# if os.path.exists(file_loc):
#     # If file exist, append.
#     data_df.to_csv(file_loc, mode='a', index=False, header=False)
# else:
#     data_df.to_csv(file_loc, index=False)

Test reading from csv

In [7]:
# history_data = None
# if os.path.exists(file_loc):
#     history_data = pd.read_csv(file_loc)
# history_data

Get the target element

In [9]:
# data_df.iloc[-1]['open_time']
# milliseconds_to_date(int(data_df['open_time'].values[0]))
# data_df['open_time'].values[-1]

Rows and columns

In [17]:
# data_df.shape[0]
# len(data_df)
# data_df.shape[1]

11

In [43]:
new_data = pd.DataFrame() 
# new_data.assign(
#     open_time = data_df['open'].values, 
#     open = data_df['open'].values)
new_data['open_time'] = data_df['open'].values
new_data['ot'] = data_df['open'].values
new_data.iloc[-2:, :]

Unnamed: 0,open_time,ot
1,29229.66,29229.66
2,29254.0,29254.0
