In [8]:
import pandas as pd
import numpy as np
import math
import sys
import os.path
import time

from bitmex import bitmex
from datetime import timedelta
from dateutil import parser
from IPython.display import clear_output

In [9]:
def update_progress(progress, message):
    ### Arguements
    # progress - int or float of percentage done
    # message  - anything you want to relay to the right of the progress-bar
    sys.stdout.write('\r[{0}{1}] {2}'.format('#'*int(progress/4), '_'*(25 - int(progress/4)), f"{round(progress,2)}% {message}")),

In [10]:
def save_to_csv(df, filename):
    print("")
    sys.stdout.write('\rSaving...'.format(filename)),
    df.to_csv(filename)
    sys.stdout.write('\r{0} saved..!\n'.format(filename)),

In [11]:
def download_bitmex_data_to_csv(symbols_to_get, kline_size):
    global new_data
    new_data = False
    print("Downloading data...")
    for symbol in symbols_to_get:
        
        filename = f"{symbol}-{kline_size}-data.csv"
        
        if os.path.isfile(filename):
            mydf = pd.read_csv(filename)
            oldest_point_of_data = parser.parse(mydf["timestamp"].iloc[-1]) + timedelta(minutes = + binsizes[kline_size])
            latest_point_of_data = client.Trade.Trade_getBucketed(symbol=symbol, binSize=kline_size, count=1, reverse=True).result()[0][0]['timestamp']
        else:
            mydf = pd.DataFrame()
            oldest_point_of_data = client.Trade.Trade_getBucketed(symbol=symbol, binSize=kline_size, count=1, reverse=False).result()[0][0]['timestamp']
            latest_point_of_data = client.Trade.Trade_getBucketed(symbol=symbol, binSize=kline_size, count=1, reverse=True).result()[0][0]['timestamp']
        
        delta_time = latest_point_of_data - oldest_point_of_data
        delta_minutes = delta_time.days * 24 * 60 + delta_time.seconds / 60
        available_data = math.ceil(delta_minutes/binsizes[kline_size])
        rounds = math.ceil(available_data / batch_size)
        
        if rounds == 0:
            print(f"No new data for {symbol}..!")
            
        else:
            new_data = True
            print("\n" + str(delta_time) + " data available for " + symbol + " (" + str(available_data) + " points of " + kline_size + " data)")
            for x in range(rounds):
                time.sleep(1) # To prevent API overflow, only allowed to call once pr. second.
                new_time = (oldest_point_of_data + timedelta(minutes = x * batch_size * binsizes[kline_size]))
                data = client.Trade.Trade_getBucketed(symbol=symbol, binSize=kline_size, count=batch_size, startTime = new_time).result()[0]
                temp_df = pd.DataFrame(data)
                mydf = mydf.append(temp_df)
                progress = (x + 1) / rounds * 100
                update_progress(progress, f"Downloading round {x + 1} / {rounds}")

            mydf.set_index("timestamp", inplace=True)
            mydf = mydf[["close", "high", "low", "open", "trades", "volume", "vwap", "turnover"]] # Which columns to save
            
            save_to_csv(mydf, filename)

    print("\nAll caught up..!\n")

In [12]:
def merge_and_clean(symbols_to_get, kline_size, save = False):
    if new_data:
        print("Merging data...")
        for symbol in symbols_to_get:

            progress = (symbols_to_get.index(symbol) + 1) / len(symbols_to_get) * 100
            update_progress(progress, f"Adding {symbol} to dataframe")
            filename = f"{symbol}-{kline_size}-data.csv"
            temp_df = pd.read_csv(filename)

            # rename volume and close to include the ticker so we can still which close/volume is which:
            temp_df.rename(columns={"close": f"{symbol}_close",
                               "high": f"{symbol}_high",
                               "low": f"{symbol}_low",
                               "open": f"{symbol}_open",
                               "trades": f"{symbol}_trades",
                               "volume": f"{symbol}_volume",
                               "vwap": f"{symbol}_vwap",
                               "turnover": f"{symbol}_turnover"}, inplace=True)

            temp_df.set_index("timestamp", inplace=True)  # set time as index so we can join them on this shared time
            temp_df = temp_df[[f"{symbol}_close",
                               f"{symbol}_high",
                               f"{symbol}_low",
                               f"{symbol}_open",
                               f"{symbol}_trades",
                               f"{symbol}_volume",
                               f"{symbol}_vwap",
                               f"{symbol}_turnover"]]  # ignore the other columns besides price and volume

            if symbols_to_get.index(symbol)==0:
                mydf = temp_df
            else:
                mydf = mydf.join(temp_df)

        update_progress(100.0, f"Dataframe is complete..!")

        mydf = mydf.astype("float64") # Makes sure that all values are treated as the same type
        mydf[mydf==np.inf]=np.nan # Converts any inf values to nan
        mydf.fillna(method="ffill", inplace=True)  # Any gaps in the data is filled with previously known values (ffill = forward fill)
        mydf.dropna(inplace=True) # Despite the fact that there shouldn't be any nans left we still use dropna to make sure

        if save:
            filename = f"Merged-{kline_size}-data.csv"
            save_to_csv(mydf, filename)
    else:
        print("No new data, no merging required...")

In [13]:
### API
api_key = '[REDACTED]'
api_secret = '[REDACTED]'

### SETTINGS
symbols_to_get = ["XBTUSD", "ETHUSD", "XRPZ18", "LTCZ18", "EOSZ18", "BCHZ18", "ADAZ18", "TRXZ18"]
kline_size = "1m"

### CONSTANTS / GLOBALS

binsizes = {"1m": 1, "5m": 5, "1h": 60, "1d": 1440}
batch_size = 750
client = bitmex(test=False, api_key=api_key, api_secret=api_secret)

In [14]:
### It's runtime baby
clear_output()
download_bitmex_data_to_csv(symbols_to_get, kline_size)
merge_and_clean(symbols_to_get, kline_size, save = True)
if not new_data:
    time.sleep(5) # This should prevent API overflow in case of constant looping when checking encountering no new data.

Downloading data...

0:02:00 data available for XBTUSD (2 points of 1m data)
[#########################] 100.0% Downloading round 1 / 1
XBTUSD-1m-data.csv saved..!

0:03:00 data available for ETHUSD (3 points of 1m data)
[#########################] 100.0% Downloading round 1 / 1
ETHUSD-1m-data.csv saved..!

0:02:00 data available for XRPZ18 (2 points of 1m data)
[#########################] 100.0% Downloading round 1 / 1
XRPZ18-1m-data.csv saved..!

0:02:00 data available for LTCZ18 (2 points of 1m data)
[#########################] 100.0% Downloading round 1 / 1
LTCZ18-1m-data.csv saved..!

0:02:00 data available for EOSZ18 (2 points of 1m data)
[#########################] 100.0% Downloading round 1 / 1
EOSZ18-1m-data.csv saved..!

0:02:00 data available for BCHZ18 (2 points of 1m data)
[#########################] 100.0% Downloading round 1 / 1
BCHZ18-1m-data.csv saved..!

0:01:00 data available for ADAZ18 (1 points of 1m data)
[#########################] 100.0% Downloading round 1 / 1
