In [None]:
import os
import sys
import ccxt
import pandas as pd
import csv
from datetime import datetime
import time
# import pytz
import json
import numpy as np 
from pymongo import MongoClient
from bson.objectid import ObjectId
from pprint import pprint



print('python', sys.version)
print('CCXT Version:', ccxt.__version__)

In [None]:
mongo_client = MongoClient()


In [None]:
exchanges = {}
config = False
with open ('config.json') as js: 
        config = json.load(js)

for exkey, exconf in config['exchanges'].items():
        if exconf['enabled'] and not exkey in exchanges:
                excred = exconf['cred']
                for crk, crv in excred.items():
                        if crv[0] == "$" : 
                                excred[crk] = os.getenv(crv[1:])

                exchanges[exkey] = getattr(ccxt, exkey) (excred)

In [None]:
def retry_fetch_ohlcv(exchange_id, max_retries, symbol, timeframe, since, limit):
    exchange = getattr(ccxt, exchange_id)({
        'enableRateLimit': True,  # required by the Manual
    })
    num_retries = 0
    try:
        num_retries += 1
        ohlcv = exchange.fetch_ohlcv(symbol, timeframe, since, limit)
        # print('Fetched', len(ohlcv), symbol, 'candles from', exchange.iso8601 (ohlcv[0][0]), 'to', exchange.iso8601 (ohlcv[-1][0]))
        return ohlcv
    except Exception:
        if num_retries > max_retries:
            raise  # Exception('Failed to fetch', timeframe, symbol, 'OHLCV in', max_retries, 'attempts')


def scrape_ohlcv(exchange_id, max_retries, symbol, timeframe, since, limit):
    exchange = getattr(ccxt, exchange_id)({
        'enableRateLimit': True,  # required by the Manual
    })
    timeframe_duration_in_seconds = exchange.parse_timeframe(timeframe)
    timeframe_duration_in_ms = timeframe_duration_in_seconds * 1000
    timedelta = limit * timeframe_duration_in_ms
    now = exchange.milliseconds()
    all_ohlcv = []
    fetch_since = since
    while fetch_since < now:
        ohlcv = retry_fetch_ohlcv(exchange_id, max_retries, symbol, timeframe, fetch_since, limit)
        fetch_since = (ohlcv[-1][0] + 1) if len(ohlcv) else (fetch_since + timedelta)
        all_ohlcv = all_ohlcv + ohlcv
        if len(all_ohlcv):
            print(len(all_ohlcv), 'candles in total from', exchange.iso8601(all_ohlcv[0][0]), 'to', exchange.iso8601(all_ohlcv[-1][0]))
        else:
            print(len(all_ohlcv), 'candles in total from', exchange.iso8601(fetch_since))
    return exchange.filter_by_since_limit(all_ohlcv, since, None, key=0)


def write_to_csv(filename, data):
    with open(filename, mode='w') as output_file:
        csv_writer = csv.writer(output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerows(data)


def scrape_candles_to_csv(filename, exchange_id, max_retries, symbol, timeframe, since, limit):
    # instantiate the exchange by id
    exchange = getattr(ccxt, exchange_id)({
        'enableRateLimit': True,  # required by the Manual
    })
    # convert since from string to milliseconds integer if needed
    if isinstance(since, str):
        since = exchange.parse8601(since)
    # preload all markets from the exchange
    exchange.load_markets()
    # fetch all candles
    ohlcv = scrape_ohlcv(exchange, max_retries, symbol, timeframe, since, limit)
    # save them to csv file
    write_to_csv(filename, ohlcv)
    print('Saved', len(ohlcv), 'candles from', exchange.iso8601(ohlcv[0][0]), 'to', exchange.iso8601(ohlcv[-1][0]), 'to', filename)


def scrape_candles_to_db(exchange_id, max_retries, symbol, timeframe, since, limit):
    # instantiate the exchange by id
    exchange = getattr(ccxt, exchange_id)({
        'enableRateLimit': True,  # required by the Manual
    })
    # convert since from string to milliseconds integer if needed
    if isinstance(since, str):
        since = exchange.parse8601(since)
    # preload all markets from the exchange
    exchange.load_markets()
    # fetch all candles
    ohlcv = scrape_ohlcv(exchange_id, max_retries, symbol, timeframe, since, limit)
    if len(ohlcv) > 0:
        df = pd.DataFrame(ohlcv)
        df.columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
        df['date'] = pd.to_datetime(df['timestamp'], unit='ms', utc=False)
        df = df[['timestamp', 'date', 'open', 'high', 'low', 'close', 'volume']]

        db = mongo_client[exchange_id]
        collection = db[f'{symbol}-{timeframe}']
        collection.delete_many({'timestamp': {'$gte': since}})
        collection.insert_many(df.to_dict("records"))

    print('Saved to DB', len(ohlcv), 'candles from', exchange.iso8601(ohlcv[0][0]), 'to', exchange.iso8601(ohlcv[-1][0]))



In [None]:
#########################################
# download klines from exchange to MongoDB

# exchange_ids = ['kucoinfutures']
# symbols = ['XBTUSDTM', 'ETHUSDTM', 'ADAUSDTM', 'DOTUSDTM', 'SOLUSDTM']
since = '2021-01-01T00:00:00Z'
exchange_ids = ['binance']
symbols = ['BTCUSDT', 'ETHUSDT', 'ADAUSDT', 'DOTUSDT', 'SOLUSDT']
timeframes = ['1d', '8h', '4h', '1h']
for exchange_id in exchange_ids:
  for symbol in symbols:
    for timeframe in timeframes:
      scrape_candles_to_db(exchange_id, 3, symbol, timeframe, since, 500)


# scrape_candles_to_db('kucoinfutures', 3, 'ALGOUSDTM', '8h', '2022-01-31T00:00:00Z', 100)

In [None]:
#########################################
# export all klines from MongoDB to csv files

for exchange_id in ['kucoinfutures']:
  for coll_name in mongo_client[exchange_id].list_collection_names():
    symbol,timeframe = coll_name.split('-')
    print(f'Exporting {coll_name}...')
    collection = mongo_client[exchange_id][coll_name]
    df = pd.DataFrame(collection.find())
    df.set_index(df["timestamp"], inplace=True, drop=True)
    df.drop(columns=['_id', 'timestamp', 'date'], inplace=True)
    data_dir = f'./data/{exchange_id}'
    if not os.path.exists(data_dir): os.makedirs(data_dir)
    csv_fname = f'{data_dir}/{symbol}-{timeframe}.csv'
    df.to_csv(csv_fname,header=False)

In [None]:
data_dir = f'./data/{exchange_id}'
if not os.path.exists(data_dir): os.makedirs(data_dir) 

In [None]:
symbol = 'ALGOUSDTM'
timeframe = '1d'
since = '2020-01-01T00:00:00Z'
limit = 500
max_retries = 3
if isinstance(since, str): since = exchange.parse8601(since)

csv_fname = f'{data_dir}/{symbol}-{timeframe}.csv'
scrape_candles_to_csv(csv_fname, exchange_id, max_retries, symbol, timeframe, since, limit)


In [None]:
def csv_to_db(exchange_id, symbol, timeframe):
  data_dir = f'./data/{exchange_id}'
  if not os.path.exists(data_dir): os.makedirs(data_dir) 
  collection_name = f'{symbol}-{timeframe}'
  csv_fname = f'{data_dir}/{symbol}-{timeframe}.csv'
  # df = pd.DataFrame(ohlcv)
  df = pd.read_csv(csv_fname)

  df.columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
  df['date'] = pd.to_datetime(df['timestamp'], unit='ms', utc=False)
  df = df[['timestamp', 'date', 'open', 'high', 'low', 'close', 'volume']]

  db = mongo_client[exchange_id]
  dbcoll = db[collection_name]
  dbcoll.drop()
  dbcoll.insert_many(df.to_dict("records"))

In [None]:
#################################
# import clines from csv to DB

db = mongo_client[exchange_id]
# symbol = 'ALGOUSDTM'
# timeframe = '15m'
exchange_id = 'kucoinfutures'
data_dir = f'./data/{exchange_id}'
symbols = ['ALGOUSDTM']
timeframes = ['5m', '15m', '1h', '8h', '1d']
for symbol in symbols:
  for timeframe in timeframes:
    # print(timeframe, symbol)
    csv_to_db(exchange_id, symbol, timeframe)

# # df.reset_index(inplace=True)
# df

In [None]:
#################################
# import all csv files DB

exchange_id = 'kucoinfutures'
data_dir = f'./data/{exchange_id}'
if not os.path.exists(data_dir): os.makedirs(data_dir) 

from os import listdir
from os.path import isfile, join

csv_files = [f for f in listdir(data_dir) if isfile(join(data_dir, f))]
for csv_fname in csv_files:
  symbol, timeframe = csv_fname.split('.')[0].split('-')
  print(symbol,timeframe)
  csv_to_db(exchange_id, symbol, timeframe)

In [None]:
tframe2msec = {
  '1m': 1000 * 60 * 1,
  '5m': 1000 * 60 * 5,
  '15m': 1000 * 60 * 15,
  '30m': 1000 * 60 * 30,
  '1h': 1000 * 60 * 60,
  '4h': 1000 * 60 * 60 * 4,
  '8h': 1000 * 60 * 60 * 8,
  '1d': 1000 * 60 * 60 * 24,
}

def dbload_klines(exchange_id,symbol, tframe):
  # if db is None: db = mongo_client[exchange_id]
  db = mongo_client[exchange_id]
  collection_name = f'{symbol}-{tframe}'
  collection = db[collection_name]
  df = pd.DataFrame(collection.find())
  # df['timestamp'] = pd.to_numeric(df.timestamp)
  df.set_index(df["timestamp"], inplace=True, drop=True)
  df.sort_index(axis = 0, inplace=True)
  return df

def find_missing(df, delta):
  df = df.copy()
  df['prev_timestamp'] = df.timestamp.shift(fill_value=-1)
  df['prev_date'] = df.date.shift(fill_value=pd.NaT)
  df = df[df.prev_timestamp != -1] 
  df['delta'] = (df.timestamp - df.prev_timestamp)

  mdf = df[df.delta != delta]
  mdf = mdf[['timestamp', 'prev_timestamp']]
  return mdf

def dbload_missing(exchange,symbol, tframe):
  df = dbload_klines(exchange,symbol, tframe)
  mdf = find_missing(df, tframe2msec[tframe])
  mdf['exchange_id'] = exchange_id
  mdf['symbol'] = symbol
  mdf['tframe'] = tframe
  return mdf

In [None]:
exchange_id = 'kucoinfutures'
symbol = 'ALGOUSDTM'
tframe = '8h'
dbload_missing(exchange_id, symbol, tframe)
# df = dbload_klines(exchange_id, symbol, tframe)
# df

In [156]:
################################
# find all missing
exchange_ids = ['kucoinfutures']
mdf = pd.concat([
    dbload_missing(*ex_sy_tf) 
    for ex_sy_tf in # (exchange_id, symbol, tframe) 
    [
      tuple([exchange_id, *collection.split('-')]) for collection in  mongo_client[exchange_id].list_collection_names()
      for exchange_id in exchange_ids
    ]
  ])[['exchange_id', 'symbol', 'tframe', 'timestamp', 'prev_timestamp']]
  
missing = list(mdf.to_records(index=False))
missing

[('kucoinfutures', 'ETHUSDTM', '4h', 1624809600000, 1624766400000),
 ('kucoinfutures', 'ALGOUSDTM', '15m', 1643087700000, 1643084100000),
 ('kucoinfutures', 'ETHUSDTM', '1h', 1638424800000, 1638417600000),
 ('kucoinfutures', 'ALGOUSDTM', '8h', 1624809600000, 1624752000000),
 ('kucoinfutures', 'SOLUSDTM', '4h', 1624809600000, 1624766400000),
 ('kucoinfutures', 'XBTUSDTM', '1h', 1638424800000, 1638417600000),
 ('kucoinfutures', 'ADAUSDTM', '1m', 1643877600000, 1643877480000),
 ('kucoinfutures', 'ADAUSDTM', '1m', 1643878980000, 1643878860000),
 ('kucoinfutures', 'SOLUSDTM', '8h', 1624809600000, 1624752000000),
 ('kucoinfutures', 'DOTUSDTM', '4h', 1624809600000, 1624766400000),
 ('kucoinfutures', 'DOTUSDTM', '1m', 1643877600000, 1643877480000),
 ('kucoinfutures', 'DOTUSDTM', '1m', 1643880660000, 1643880540000),
 ('kucoinfutures', 'ALGOUSDTM', '5m', 1643088000000, 1643084700000),
 ('kucoinfutures', 'ALGOUSDTM', '5m', 1643715300000, 1643714700000),
 ('kucoinfutures', 'ALGOUSDTM', '5m', 16437

In [157]:
mdf['date'] = pd.to_datetime(mdf.timestamp, unit='ms', utc=False)
mdf['prev_date'] = pd.to_datetime(mdf.prev_timestamp, unit='ms', utc=False)
mdf

Unnamed: 0_level_0,exchange_id,symbol,tframe,timestamp,prev_timestamp,date,prev_date
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1624809600000,kucoinfutures,ETHUSDTM,4h,1624809600000,1624766400000,2021-06-27 16:00:00,2021-06-27 04:00:00
1643087700000,kucoinfutures,ALGOUSDTM,15m,1643087700000,1643084100000,2022-01-25 05:15:00,2022-01-25 04:15:00
1638424800000,kucoinfutures,ETHUSDTM,1h,1638424800000,1638417600000,2021-12-02 06:00:00,2021-12-02 04:00:00
1624809600000,kucoinfutures,ALGOUSDTM,8h,1624809600000,1624752000000,2021-06-27 16:00:00,2021-06-27 00:00:00
1624809600000,kucoinfutures,SOLUSDTM,4h,1624809600000,1624766400000,2021-06-27 16:00:00,2021-06-27 04:00:00
...,...,...,...,...,...,...,...
1643086800000,kucoinfutures,XBTUSDTM,15m,1643086800000,1643085000000,2022-01-25 05:00:00,2022-01-25 04:30:00
1624809600000,kucoinfutures,ETHUSDTM,8h,1624809600000,1624752000000,2021-06-27 16:00:00,2021-06-27 00:00:00
1638424800000,kucoinfutures,DOTUSDTM,1h,1638424800000,1638417600000,2021-12-02 06:00:00,2021-12-02 04:00:00
1638424800000,kucoinfutures,ALGOUSDTM,1h,1638424800000,1638417600000,2021-12-02 06:00:00,2021-12-02 04:00:00


In [155]:
#########################################
# strip last n klines from MongoDB 
n = 2
for exchange_id in ['kucoinfutures']:
  for coll_name in mongo_client[exchange_id].list_collection_names():
    symbol,timeframe = coll_name.split('-')
    print(f'Stripping last {n} records from {coll_name}...')
    dbcoll = mongo_client[exchange_id][coll_name]
    since = list(dbcoll.find({}, {"timestamp":1}).sort('timestamp',-1).skip(0).limit(1))[0]['timestamp']
    dbcoll.delete_many({'timestamp': {'$gte': since}})


Stripping last 2 records from ETHUSDTM-4h...
Stripping last 2 records from ALGOUSDTM-15m...
Stripping last 2 records from ETHUSDTM-1h...
Stripping last 2 records from XBTUSDTM-1d...
Stripping last 2 records from ALGOUSDTM-8h...
Stripping last 2 records from SOLUSDTM-4h...
Stripping last 2 records from XBTUSDTM-1h...
Stripping last 2 records from ADAUSDTM-1h...
Stripping last 2 records from ALGOUSDTM-1d...
Stripping last 2 records from SOLUSDTM-8h...
Stripping last 2 records from ETHUSDTM-1d...
Stripping last 2 records from DOTUSDTM-1d...
Stripping last 2 records from DOTUSDTM-4h...
Stripping last 2 records from ETHUSDTM-15m...
Stripping last 2 records from ALGOUSDTM-5m...
Stripping last 2 records from ADAUSDTM-5m...
Stripping last 2 records from SOLUSDTM-1h...
Stripping last 2 records from SOLUSDTM-15m...
Stripping last 2 records from DOTUSDTM-8h...
Stripping last 2 records from XBTUSDTM-4h...
Stripping last 2 records from XBTUSDTM-8h...
Stripping last 2 records from ADAUSDTM-8h...
Str

In [None]:
exchange_id = 'kucoinfutures'
# exchange = exchange_id
symbol = 'ALGOUSDTM'
timeframe = '8h'
max_retries = 3
since = 1624752000000
to = 1624809600000
limit = 10
scrape_ohlcv(exchange_id, max_retries, symbol, timeframe, since, limit, params={'endAt': to})