In [1]:
import datetime as dt
import json
import requests
import pandas as pd
from dateutil.relativedelta import relativedelta

# Request to Binance

In [2]:
def get_binance_data(symbol='ETHUSDT', interval='1h', start_date:str=None):
    # Define the start and end times for the data
    if start_date:
        start_time = dt.datetime.strptime(start_date, '%Y-%m-%d')
        end_time = start_time + relativedelta(months=1)
    else:
        end_time = dt.datetime.now()
        start_time = end_time + relativedelta(months=-1)
    
    
    # Convert the times to Unix timestamps in milliseconds
    start_timestamp = int(start_time.timestamp() * 1000)
    end_timestamp = int(end_time.timestamp() * 1000)
    
    # Define the Binance API endpoint for K-line data
    endpoint = 'https://api.binance.com/api/v3/klines'
    
    # Define the parameters for the API request
    symbol = 'ETHUSDT'
    interval = '1h'
    limit=10000
    params = {'symbol': symbol, 'interval': interval, 'startTime': start_timestamp, 'endTime': end_timestamp, 'limit': limit}
    
    # Send the API request and store the response data in a list
    data = []
    
    response = requests.get(endpoint, params=params)
    klines = json.loads(response.text)
    data = pd.DataFrame(klines)
    data.columns = ['open_time',
                  'open', 'high', 'low', 'close', 'volume',
                  'close_time', 'qav', 'num_trades',
                  'taker_base_vol', 'taker_quote_vol', 'ignore']
    data.index = [dt.datetime.fromtimestamp(x / 1000.0) for x in data.open_time]
    data['open_time'] = [dt.datetime.fromtimestamp(x / 1000.) for x in data.open_time]
    data['close_time'] = [dt.datetime.fromtimestamp(x / 1000.) for x in data.close_time]
    data.drop(data.tail(1).index, inplace=True)
    return data

In [3]:
df = get_binance_data(symbol='BTCUSDT', start_date='2022-01-01')

In [4]:
df.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,qav,num_trades,taker_base_vol,taker_quote_vol,ignore
2022-01-01 00:00:00,2022-01-01 00:00:00,3645.04,3701.85,3633.84,3688.41,13070.9293,2022-01-01 00:59:59.999,47926729.240767,32205,6910.6642,25347125.964657,0
2022-01-01 01:00:00,2022-01-01 01:00:00,3688.12,3712.5,3673.55,3681.8,8131.409,2022-01-01 01:59:59.999,30031978.972415,21380,3829.3638,14142151.656249,0
2022-01-01 02:00:00,2022-01-01 02:00:00,3681.8,3697.9,3672.22,3676.23,6004.291,2022-01-01 02:59:59.999,22146433.615268,13214,3367.8659,12423892.882739,0
2022-01-01 03:00:00,2022-01-01 03:00:00,3676.22,3730.0,3676.22,3723.04,9023.374,2022-01-01 03:59:59.999,33430053.335683,21702,4552.4363,16864789.87679,0
2022-01-01 04:00:00,2022-01-01 04:00:00,3723.04,3748.45,3714.1,3724.89,8997.7569,2022-01-01 04:59:59.999,33572623.190461,21666,4156.575,15508157.024431,0


In [5]:
df.tail()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,qav,num_trades,taker_base_vol,taker_quote_vol,ignore
2022-01-31 19:00:00,2022-01-31 19:00:00,2620.66,2692.79,2613.35,2685.0,48694.3771,2022-01-31 19:59:59.999,129324027.545149,79454,27985.7234,74347906.276201,0
2022-01-31 20:00:00,2022-01-31 20:00:00,2684.78,2696.33,2669.28,2686.82,31359.7943,2022-01-31 20:59:59.999,84113003.686906,55708,16817.5758,45106664.359355,0
2022-01-31 21:00:00,2022-01-31 21:00:00,2686.9,2694.92,2673.04,2679.17,14535.9691,2022-01-31 21:59:59.999,39006146.260717,36940,6994.6519,18771936.870232,0
2022-01-31 22:00:00,2022-01-31 22:00:00,2679.17,2706.77,2661.53,2672.72,26051.81,2022-01-31 22:59:59.999,69974339.894553,56117,14091.078,37878473.326162,0
2022-01-31 23:00:00,2022-01-31 23:00:00,2672.5,2694.85,2662.51,2683.97,17418.7787,2022-01-31 23:59:59.999,46649444.708565,34074,8337.793,22327641.048081,0


In [6]:
start_date = (dt.datetime.today() - relativedelta(months=1)).__str__().split(' ')[0][:-2] + '01'
int(start_date[:4])

2024

In [7]:
start_date

'2024-01-01'

In [9]:
start_date = (dt.datetime.today() - relativedelta(months=1)).__str__().split(' ')[0][:-2] + '01'
while int(start_date[:4]) > 2017:
    df = get_binance_data(symbol='BTCUSDT', start_date=start_date)
    file_name = '../data/raw/btc-usdt/' + start_date + '.csv'
    df.to_csv(file_name)
    print(f'file {file_name} wrote.')
    start_date = dt.datetime.strptime(start_date, '%Y-%m-%d') + relativedelta(months=-1)
    start_date = start_date.__str__().split(' ')[0][:-2] + '01'

file ../data/raw/btc-usdt/2024-01-01.csv wrote.
file ../data/raw/btc-usdt/2023-12-01.csv wrote.
file ../data/raw/btc-usdt/2023-11-01.csv wrote.
file ../data/raw/btc-usdt/2023-10-01.csv wrote.
file ../data/raw/btc-usdt/2023-09-01.csv wrote.
file ../data/raw/btc-usdt/2023-08-01.csv wrote.
file ../data/raw/btc-usdt/2023-07-01.csv wrote.
file ../data/raw/btc-usdt/2023-06-01.csv wrote.
file ../data/raw/btc-usdt/2023-05-01.csv wrote.
file ../data/raw/btc-usdt/2023-04-01.csv wrote.
file ../data/raw/btc-usdt/2023-03-01.csv wrote.
file ../data/raw/btc-usdt/2023-02-01.csv wrote.
file ../data/raw/btc-usdt/2023-01-01.csv wrote.
file ../data/raw/btc-usdt/2022-12-01.csv wrote.
file ../data/raw/btc-usdt/2022-11-01.csv wrote.
file ../data/raw/btc-usdt/2022-10-01.csv wrote.
file ../data/raw/btc-usdt/2022-09-01.csv wrote.
file ../data/raw/btc-usdt/2022-08-01.csv wrote.
file ../data/raw/btc-usdt/2022-07-01.csv wrote.
file ../data/raw/btc-usdt/2022-06-01.csv wrote.
file ../data/raw/btc-usdt/2022-05-01.csv

In [ ]:
start_date = dt.datetime.today().__str__().split(' ')[0][:-2] + '01'