In [4]:
import os
import sys
import ccxt
import pandas as pd
import csv
from datetime import datetime
import time
# import pytz
import json
import numpy as np 
from pymongo import MongoClient
from bson.objectid import ObjectId
from pprint import pprint

import schedule


In [33]:
mongo_client = MongoClient()

In [62]:
def retry_fetch_ohlcv(exchange_id, max_retries, symbol, timeframe, since, limit, params={}):
    exchange = getattr(ccxt, exchange_id)({
        'enableRateLimit': True,  # required by the Manual
    })
    num_retries = 0
    try:
        num_retries += 1
        ohlcv = exchange.fetch_ohlcv(symbol, timeframe, since, limit, params=params)
        # print('Fetched', len(ohlcv), symbol, 'candles from', exchange.iso8601 (ohlcv[0][0]), 'to', exchange.iso8601 (ohlcv[-1][0]))
        return ohlcv
    except Exception:
        if num_retries > max_retries:
            raise  # Exception('Failed to fetch', timeframe, symbol, 'OHLCV in', max_retries, 'attempts')


def scrape_ohlcv(exchange_id, max_retries, symbol, timeframe, since, limit, params={}):
    exchange = getattr(ccxt, exchange_id)({
        'enableRateLimit': True,  # required by the Manual
    })
    timeframe_duration_in_seconds = exchange.parse_timeframe(timeframe)
    timeframe_duration_in_ms = timeframe_duration_in_seconds * 1000
    timedelta = limit * timeframe_duration_in_ms
    now = exchange.milliseconds()
    all_ohlcv = []
    fetch_since = since
    while fetch_since < now:
        ohlcv = retry_fetch_ohlcv(exchange_id, max_retries, symbol, timeframe, fetch_since, limit, params=params)
        fetch_since = (ohlcv[-1][0] + 1) if len(ohlcv) else (fetch_since + timedelta)
        all_ohlcv = all_ohlcv + ohlcv
        if len(all_ohlcv):
            print(len(all_ohlcv), 'candles in total from', exchange.iso8601(all_ohlcv[0][0]), 'to', exchange.iso8601(all_ohlcv[-1][0]))
        else:
            print(len(all_ohlcv), 'candles in total from', exchange.iso8601(fetch_since))
    return exchange.filter_by_since_limit(all_ohlcv, since, None, key=0)


def scrape_candles_to_csv(filename, exchange_id, max_retries, symbol, timeframe, since, limit):
    # instantiate the exchange by id
    exchange = getattr(ccxt, exchange_id)({
        'enableRateLimit': True,  # required by the Manual
    })
    # convert since from string to milliseconds integer if needed
    if isinstance(since, str):
        since = exchange.parse8601(since)
    # preload all markets from the exchange
    exchange.load_markets()
    # fetch all candles
    ohlcv = scrape_ohlcv(exchange, max_retries, symbol, timeframe, since, limit)
    # save them to csv file
    write_to_csv(filename, ohlcv)
    print('Saved', len(ohlcv), 'candles from', exchange.iso8601(ohlcv[0][0]), 'to', exchange.iso8601(ohlcv[-1][0]), 'to', filename)


def scrape_candles_to_db(exchange_id, symbol, timeframe, since, max_retries=3, limit=100):
    # instantiate the exchange by id
    exchange = getattr(ccxt, exchange_id)({
        'enableRateLimit': True,  # required by the Manual
    })
    # convert since from string to milliseconds integer if needed
    if isinstance(since, str):
        since = exchange.parse8601(since)
    # preload all markets from the exchange
    exchange.load_markets()
    # fetch all candles
    ohlcv = scrape_ohlcv(exchange_id, max_retries, symbol, timeframe, since, limit)
    # ohlcv = ohlcv[0:-1]
    if len(ohlcv) > 0:
        df = pd.DataFrame(ohlcv)
        df.columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
        df['date'] = pd.to_datetime(df['timestamp'], unit='ms', utc=False)
        df = df[['timestamp', 'date', 'open', 'high', 'low', 'close', 'volume']]

        db = mongo_client[exchange_id]
        collection = db[f'{symbol}-{timeframe}']
        collection.delete_many({'timestamp': {'$gte': since}})
        collection.insert_many(df.to_dict("records"))

    print('Saved to DB', len(ohlcv), 'candles from', exchange.iso8601(ohlcv[0][0]), 'to', exchange.iso8601(ohlcv[-1][0]))


In [3]:

def job():
    print("I'm working...")

schedule.every(3).seconds.do(job)
# schedule.every(10).minutes.do(job)
# schedule.every().hour.do(job)
# schedule.every().day.at("10:30").do(job)
# schedule.every(5).to(10).minutes.do(job)
# schedule.every().monday.do(job)
# schedule.every().wednesday.at("13:15").do(job)
# schedule.every().minute.at(":17").do(job)

while True:
    schedule.run_pending()
    time.sleep(1)

I'm working...
I'm working...


KeyboardInterrupt: 

In [90]:
tframe2msec = {
  '1m': 1000 * 60 * 1,
  '5m': 1000 * 60 * 5,
  '15m': 1000 * 60 * 15,
  '30m': 1000 * 60 * 30,
  '1h': 1000 * 60 * 60,
  '8h': 1000 * 60 * 60 * 8,
  '1d': 1000 * 60 * 60 * 24,
}

def get_now_btimestamp(timeframe):
  delta = tframe2msec[timeframe]
  now_stamp = int(datetime.now().timestamp() * 1000)
  now_bstamp = divmod(now_stamp, delta)[0] * delta
  return now_bstamp


def get_latest_to_db(exchange_id, symbol, timeframe):
  db = mongo_client[exchange_id]
  collection_name = f'{symbol}-{timeframe}'

  now_bstamp = get_now_btimestamp(timeframe) 
  collection = db[collection_name]
  res = list(collection.find({'timestamp': {'$lt' : now_bstamp}}).sort([('timestamp', -1)]))
  since = now_bstamp
  if (len(res) == 0):
    since = now_bstamp - 5 * delta
  elif (now_bstamp - delta > res[0]['timestamp']):
    since = res[0]['timestamp']

  if (since < now_bstamp):
    print('Scraping to db:', exchange_id, symbol, timeframe)
    scrape_candles_to_db(exchange_id, symbol, timeframe, since)


In [92]:

exchange_id = 'kucoinfutures'
symbol = 'ADAUSDTM'
timeframe = '5m'

get_latest_to_db(exchange_id, symbol, timeframe)

Scraping to db: kucoinfutures ADAUSDTM 5m
2 candles in total from 2022-02-02T23:50:00.000Z to 2022-02-02T23:55:00.000Z
2 candles in total from 2022-02-02T23:50:00.000Z to 2022-02-02T23:55:00.000Z
Saved to DB 2 candles from 2022-02-02T23:50:00.000Z to 2022-02-02T23:55:00.000Z


1

In [60]:
[1,2,3,4][0:-1]

[1, 2, 3]