In [7]:
import os
from pathlib import Path

import sys
import csv
import ccxt

root = os.path.abspath('')
sys.path.append(root + '/python')


def retry_fetch_ohlcv(exchange, max_retries, symbol, timeframe, since, limit):
    num_retries = 0
    try:
        num_retries += 1
        ohlcv = exchange.fetch_ohlcv(symbol, timeframe, since, limit)
        # print('Fetched', len(ohlcv), symbol, 'candles from', exchange.iso8601 (ohlcv[0][0]), 'to', exchange.iso8601 (ohlcv[-1][0]))
        return ohlcv
    except Exception:
        if num_retries > max_retries:
            raise  # Exception('Failed to fetch', timeframe, symbol, 'OHLCV in', max_retries, 'attempts')


def scrape_ohlcv(exchange, max_retries, symbol, timeframe, since, limit):
    earliest_timestamp = exchange.milliseconds()
    timeframe_duration_in_seconds = exchange.parse_timeframe(timeframe)
    timeframe_duration_in_ms = timeframe_duration_in_seconds * 1000
    timedelta = limit * timeframe_duration_in_ms
    all_ohlcv = []
    while True:
        fetch_since = earliest_timestamp - timedelta
        ohlcv = retry_fetch_ohlcv(exchange, max_retries, symbol, timeframe, fetch_since, limit)
        if ohlcv[0][0] >= earliest_timestamp:
            break
        earliest_timestamp = ohlcv[0][0]
        all_ohlcv = ohlcv + all_ohlcv
        print(len(all_ohlcv), symbol, 'candles in total from', exchange.iso8601(all_ohlcv[0][0]), 'to',
              exchange.iso8601(all_ohlcv[-1][0]))
        # if we have reached the checkpoint
        if fetch_since < since:
            break
    return all_ohlcv


def write_to_csv(filename, exchange, data):
    p = Path("./data/raw/", str(exchange))
    p.mkdir(parents=True, exist_ok=True)
    full_path = p / str(filename)
    with Path(full_path).open('w+', newline='') as output_file:
        csv_writer = csv.writer(output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerows(data)


def scrape_candles_to_csv(path, exchange_id, max_retries, symbol, timeframe, since, limit):
    exchange = getattr(ccxt, exchange_id)({
        'enableRateLimit': True,
    })
    if isinstance(since, str):
        since = exchange.parse8601(since)
    exchange.load_markets()

    filename = f'{path}{exchange.markets.get(symbol)["base"]}_{exchange.markets.get(symbol)["base"]}_{timeframe}.csv'
    ohlcv = scrape_ohlcv(exchange, max_retries, symbol, timeframe, since, limit)
    write_to_csv(filename, exchange, ohlcv)
    print('Saved', len(ohlcv), 'candles from', exchange.iso8601(ohlcv[0][0]), 'to', exchange.iso8601(ohlcv[-1][0]),
          'to', filename)

In [9]:
symbol = 'ETH/USDT'
timeframe = '15m'

scrape_candles_to_csv('./', 'binanceusdm', 3, symbol, timeframe, '2019-07-15T00:00:00Z', 1000)

1000 ETH/USDT candles in total from 2022-07-05T12:00:00.000Z to 2022-07-15T21:45:00.000Z
2000 ETH/USDT candles in total from 2022-06-25T02:00:00.000Z to 2022-07-15T21:45:00.000Z
3000 ETH/USDT candles in total from 2022-06-14T16:00:00.000Z to 2022-07-15T21:45:00.000Z
4000 ETH/USDT candles in total from 2022-06-04T06:00:00.000Z to 2022-07-15T21:45:00.000Z
5000 ETH/USDT candles in total from 2022-05-24T20:00:00.000Z to 2022-07-15T21:45:00.000Z
6000 ETH/USDT candles in total from 2022-05-14T10:00:00.000Z to 2022-07-15T21:45:00.000Z
7000 ETH/USDT candles in total from 2022-05-04T00:00:00.000Z to 2022-07-15T21:45:00.000Z
8000 ETH/USDT candles in total from 2022-04-23T14:00:00.000Z to 2022-07-15T21:45:00.000Z
9000 ETH/USDT candles in total from 2022-04-13T04:00:00.000Z to 2022-07-15T21:45:00.000Z
10000 ETH/USDT candles in total from 2022-04-02T18:00:00.000Z to 2022-07-15T21:45:00.000Z
11000 ETH/USDT candles in total from 2022-03-23T08:00:00.000Z to 2022-07-15T21:45:00.000Z
12000 ETH/USDT cand