In [1]:
# OrderBook class in use
import gzip
import os
import datetime
import json
import requests
from threading import Thread
import time
import random
import sys
import doctest
from decimal import Decimal, Context, ROUND_HALF_EVEN

class OrderBook(Thread):
    """This class handle read and write methods for bitcoin data.

    Args:
        price_shift (int, optional): Number of digits to shift in price. Defaults to 2.
        price_num_digits (int, optional): Max number of digits to keep in price. Defaults to None.
        amount_shift (int, optional): Number of digits to shift in amount. Defaults to 8.
        amount_num_digits (int, optional): Max number of digits to keep in amount. Defaults to 3.
        exchange (str, optional): Exchange where data comes from (needs to be inside EXCHANGES attr). Defaults to None.

    Attributes:
        price_shift (int): Number of digits to shift in price. 
        price_num_digits (int): Max number of digits to keep in price. 
        amount_shift (int): Number of digits to shift in amount.
        amount_num_digits (int): Max number of digits to keep in amount.
        orderbook_dict (dict): Dict representing a colletion of orderbooks (all bid and asks for a given timestamp).
        exchange (str): Current exchange.
        EXCHANGES (dict): Dict of available exchanges.
    """
    
    DEFAULT_EXCHANGE = 'bitstamp'
    EXCHANGES = {
        'bitstamp':{
            'url':'http://www.bitstamp.net/api/order_book/',
            'params':None,
            'best_shift':{'price':2, 'amount':8},
            'delay':180,
            'pair':{'BTC','USD'},
            'asks':lambda d:sorted([[float(t[0]), float(t[1])] for t in d['asks']]),
            'bids':lambda d:sorted([[float(t[0]), float(t[1])] for t in d['bids']], reverse=True),
        },
        'mercado_bitcoin':{
            'url':'https://www.mercadobitcoin.com.br/api/orderbook/',
            'params':None,
            'best_shift':{'price':5, 'amount':5},
            'delay':300,
            'pair':{'BTC','BRL'},
            'asks':lambda d:sorted([[float(t[0]), float(t[1])] for t in d['asks']]),
            'bids':lambda d:sorted([[float(t[0]), float(t[1])] for t in d['bids']], reverse=True),
        },
        'okcoin':{
            'url':'https://www.okcoin.com/api/v1/depth.do',
            'params':{'symbol':'btc_usd'},
            'best_shift':{'price':2, 'amount':8},
            'delay':180,
            'pair':{'BTC','USD'},
            'asks':lambda d:sorted([[float(t[0]), float(t[1])] for t in d['asks']]),
            'bids':lambda d:sorted([[float(t[0]), float(t[1])] for t in d['bids']], reverse=True),
        },
        'korbit':{
            'url':'https://api.korbit.co.kr/v1/orderbook',
            'params':None,
            'best_shift':{'price':-2, 'amount':5},
            'delay':180,
            'pair':{'BTC','KRW'},
            'asks':lambda d:sorted([[float(t[0]), float(t[1])] for t in d['asks']]),
            'bids':lambda d:sorted([[float(t[0]), float(t[1])] for t in d['bids']], reverse=True),
        },
        'itbit':{
            'url':'https://api.itbit.com/v1/markets/XBTUSD/order_book',
            'params':None,
            'best_shift':{'price':2, 'amount':4},
            'delay':180,
            'pair':{'BTC','USD'},
            'asks':lambda d:sorted([[float(t[0]), float(t[1])] for t in d['asks']]),
            'bids':lambda d:sorted([[float(t[0]), float(t[1])] for t in d['bids']], reverse=True),
        },
        'bitfinex':{
            'url':'https://api.bitfinex.com/v1/book/btcusd',
            'params':None,
            'best_shift':{'price':2, 'amount':4},
            'delay':180,
            'pair':{'BTC','USD'},
            'asks':lambda d:sorted([[float(t['price']), float(t['amount'])] for t in d['asks']]),
            'bids':lambda d:sorted([[float(t['price']), float(t['amount'])] for t in d['bids']], reverse=True),
        },
        'hitbtc':{
            'url':'https://api.hitbtc.com/api/1/public/BTCUSD/orderbook',
            'params':None,
            'best_shift':{'price':2, 'amount':2},
            'delay':180,
            'pair':{'BTC','USD'},
            'asks':lambda d:sorted([[float(t[0]), float(t[1])] for t in d['asks']]),
            'bids':lambda d:sorted([[float(t[0]), float(t[1])] for t in d['bids']], reverse=True),
        },
        'poloniex':{#poloniex has much more pairs!, problem of decimals on price
            'url':'https://poloniex.com/public',
            'params':{'command':'returnOrderBook',
                    'currencyPair':'USDT_BTC',
                    'depth':'3000'},
            'best_shift':{'price':2, 'amount':8},
            'delay':120,
            'pair':{'BTC','USD'},
            'asks':lambda d:sorted([[float(t[0]), float(t[1])] for t in d['asks']]),
            'bids':lambda d:sorted([[float(t[0]), float(t[1])] for t in d['bids']], reverse=True),
        },
        'kraken':{#kraken has much more pairs!
            'url':'https://api.kraken.com/0/public/Depth',
            'params':{'pair':'XBTUSD'},
            'best_shift':{'price':2, 'amount':3},
            'delay':180,
            'pair':{'BTC','USD'},
            'asks':lambda d:sorted([[float(t[0]), float(t[1])] for t in d['result']['XXBTZUSD']['asks']]),
            'bids':lambda d:sorted([[float(t[0]), float(t[1])] for t in d['result']['XXBTZUSD']['bids']], reverse=True),
        },
        'coindesk':{#NOT ORDERBOOK! ONLY LAST TICK
            'url':'https://api.coindesk.com/v1/bpi/currentprice/btcusd',
            'params':None,
            'best_shift':{'price':4, 'amount':0},
            'delay':120,
            'pair':{'BTC','USD'},
            'asks':lambda d:sorted([[float(d['bpi']['USD']['rate_float']), 1.0]]),
            'bids':lambda d:sorted([[float(d['bpi']['USD']['rate_float']), 1.0]]),
        },
        'test':{
            'url':None,
            'params':None,
            'best_shift':{'price':2, 'amount':8},
            'delay':2,
            'pair':{'BTC','USD'},
            'asks':lambda d:[[6563.59, 1.61],[6563.62, 1.5],[6571.79, 1.8],[6571.8, 0.01]],
            'bids':lambda d:[[6559.9, 0.02],[6559.62, 0.02],[6559.61, 0.2],[6558.1, 0.01]],
        },
    }
    
    def __init__(self, exchange = DEFAULT_EXCHANGE, price_num_digits=8, amount_num_digits=3):
        super(OrderBook, self).__init__()
        self._keep_running = True
        self.price_num_digits = price_num_digits
        self.amount_num_digits = amount_num_digits
        self.orderbook_dict = {}
        self.exchange = exchange if exchange in self.EXCHANGES.keys() else self.DEFAULT_EXCHANGE

    def get_json(self):
        """Returns a dict representing bids/asks for the current exchange."""
        url = self.EXCHANGES[self.exchange]['url']
        params = self.EXCHANGES[self.exchange]['params']
        delay = self.EXCHANGES[self.exchange]['delay']
        return requests.get(url, params=params, timeout=delay).json() if url else ''

    def _add_orderbook(self, utcepoch, standard_orderbook):
        """Add an entry to the orderbook dict.

        Args:
            utcepoch (int): A integer representing an epoch.
            standard_orderbook (str): A standard orderbook.
        """
        self.orderbook_dict[utcepoch] = standard_orderbook

    def _convert_to_standard_orderbook(self, orderbook_json):
        """Returns a standard orderbook representing bids/asks for the current exchange."""
        asks  = self.EXCHANGES[self.exchange]['asks'](orderbook_json)
        bids  = self.EXCHANGES[self.exchange]['bids'](orderbook_json)
        return {'asks':asks, 'bids':bids}
        
    def fetch_one(self):
        """Fetch one snapshot of the current exchange's orderbook and add It to the orderbook dict."""
        try:
            orderbook_json = self.get_json()
            standard_orderbook = self._convert_to_standard_orderbook(orderbook_json)
            utctimestamp = datetime.datetime.utcnow()
            utcepoch = int(utctimestamp.strftime('%s'))
            self._add_orderbook(utcepoch, standard_orderbook)
        except Exception as err:
            print('ERR:', self.exchange, str(datetime.datetime.utcnow()), err)
        
    def _num_to_tuple(self, n, shift=0, max_num_digits=None):
        """Function to convert any number in it's normalized tuple.
        
        Args:
            n (float or str): Number to be converted.
            shift (int): Number of digits to be shifted.
            max_num_digits (int): Max number of digits to keep.
        
        Returns:
            DecimalTuple(sign, digits, exponent): A tuple representing the number.

        Examples:
            >>> c = OrderBook()
            >>> c._num_to_tuple('001')
            DecimalTuple(sign=0, digits=(1,), exponent=0)
            >>> c._num_to_tuple('00300.00')
            DecimalTuple(sign=0, digits=(3,), exponent=2)
            >>> c._num_to_tuple(008.003)
            DecimalTuple(sign=0, digits=(8, 0, 0, 3), exponent=-3)
            >>> c._num_to_tuple('500.43210000', max_num_digits=3)
            DecimalTuple(sign=0, digits=(5,), exponent=2)
            >>> c._num_to_tuple(500.43210000, max_num_digits=4, shift=1)
            DecimalTuple(sign=0, digits=(5, 0, 0, 4), exponent=0)
            >>> c._num_to_tuple('-00000000.0056000000', max_num_digits=2, shift=4)
            DecimalTuple(sign=1, digits=(5, 6), exponent=0)
            >>> c._num_to_tuple('-0000.0058064171659814651465', max_num_digits=1, shift=4)
            DecimalTuple(sign=1, digits=(6,), exponent=1)
        """
        number = Decimal(str(n)).normalize().as_tuple()
        l = len(number.digits)
        max_num_digits = max_num_digits if max_num_digits else l
        context = Context(prec=max_num_digits, rounding=ROUND_HALF_EVEN)
        digits_round = context.create_decimal((0, number.digits, -l)).normalize().as_tuple()
        return Decimal(
                (number.sign, 
                 digits_round.digits, 
                 number.exponent+digits_round.exponent+l+shift)
               ).normalize().as_tuple()
        
    def _textfy_orderbook_dict(self, epoch_list=None):
        """Returns a compressed text of the selected timestamps of a orderbook_dict.
        
        Args:
            epoch_list (list[int]): A list of valid epoch times from the orderbook keys.
        
        Returns:
            Compressed orderbook colletion(str): A text containing a compressed orderbook_dict.
        """
        price_shift = self.EXCHANGES[self.exchange]['best_shift']['price']
        amount_shift = self.EXCHANGES[self.exchange]['best_shift']['amount']
        orderbook_text = ""
        if not epoch_list:
            epoch_list = self.orderbook_dict.keys()
        epoch_list = sorted(epoch_list)
        for epoch_time in epoch_list:
            if epoch_time not in self.orderbook_dict.keys():
                continue
            orderbook_text += str(epoch_time)
            orderbook_text += '|'
            orderbook_text += str(len(self.orderbook_dict[epoch_time]['asks']))
            orderbook_text += ' '
            orderbook_text += str(len(self.orderbook_dict[epoch_time]['bids']))
            orderbook_text += '|'
            for type_order in ['asks', 'bids']:
                last_price = 0
                for order in self.orderbook_dict[epoch_time][type_order]:
                    current_price = order[0]
                    price = self._num_to_tuple(current_price-last_price, price_shift, self.price_num_digits)
                    last_price = current_price
                    amount = self._num_to_tuple(order[1], amount_shift, self.amount_num_digits)
                    orderbook_text += '-' if price.sign else ''
                    orderbook_text += ''.join(map(str,price.digits))
                    if price.exponent==0:
                        orderbook_text += ''
                    elif price.exponent==1:
                        orderbook_text += '0'
                    else:
                        orderbook_text += 'e'+str(price.exponent)
                    orderbook_text += ','
                    orderbook_text += '-' if amount.sign else ''
                    orderbook_text += ''.join(map(str,amount.digits))
                    if amount.exponent==0:
                        orderbook_text += ''
                    elif amount.exponent==1:
                        orderbook_text += '0'
                    else:
                        orderbook_text += 'e'+str(amount.exponent)
                    orderbook_text += ' '
                orderbook_text = orderbook_text[:-1] + '|'
            orderbook_text = orderbook_text[:-1] + '\n'
        return orderbook_text
    
    def _compress_bin(self, text):
        """Returns a gzip compressed binary from a text.

        Args:
            text (str): Text to be compressed.
        """
        return gzip.compress(bytes(text, 'utf-8'))
    
    def save_orderbook(self, base_path=None):
        """Save orderbook dict to a file."""
        if not base_path:
            base_path = './orderbook'
        base_path = os.path.join(base_path, self.exchange)
        if not os.path.exists(base_path):
            os.makedirs(base_path)
        epoch_list = sorted(self.orderbook_dict.keys())
        dates_dict = {str(datetime.datetime.fromtimestamp(k).date()):[] for k in epoch_list}
        for k in epoch_list:
            tstr = str(datetime.datetime.fromtimestamp(k).date())
            dates_dict[tstr].append(k)
        for dt in dates_dict.keys():
            dt_epoch_list = dates_dict[dt]
            orderbook_text = self._textfy_orderbook_dict(dt_epoch_list)
            out = self._compress_bin(orderbook_text)
            filename = '{}.gz'.format(dt)
            file_path = os.path.join(base_path, filename)
            aw = 'ab' if os.path.exists(file_path) else 'wb'
            with open( file_path, aw ) as f:
                f.write(out)

    def _clean_orderbook_dict(self):
        """Clean the orderbook dict."""
        self.orderbook_dict = {}

    def flush_orderbook(self, base_path=None):
        """Save orderbook dict to a file and clean the orderbook dict."""
        self.save_orderbook(base_path)
        self._clean_orderbook_dict()
    
    def read_orderbook_files(self, filepath_list):
        """Read orderbook files and converts It to a standard orderbook.

        Args:
            filepath_list (list[str]): List of paths to valid orderbook files.
        """
        pass

    def update_exchange(self, exchange):
        """Update current exchange.
        
        Args:
            exchange (str): A valid exchange.
        """
        if exchange!=self.exchange:
            self._clean_orderbook_dict()
        self.exchange = exchange if exchange in self.EXCHANGES.keys() else self.DEFAULT_EXCHANGE

    def run(self):
        self.keep_running = True
        delay = self.EXCHANGES[self.exchange]['delay']
        time.sleep(random.randint(1, delay))
        while self._keep_running:
            print(self.exchange, '-', str(datetime.datetime.utcnow()))
            self.fetch_one()
            if random.randint(1, 100)<=10:
                print('> Saving', self.exchange)
                self.flush_orderbook()
            time.sleep(delay)
        print('Finishing', self.exchange)

    def stop(self):
        self._keep_running = False
        
    def terminate(self):
        self.stop()
        self.flush_orderbook()

x="""
if __name__=="__main__":
    if len(sys.argv)>1:
        orderbook_instances = {}
        for k in sys.argv[1:]:
            exchange = k if k in OrderBook.EXCHANGES.keys() else 'test'
            orderbook_instances[exchange] = OrderBook(exchange)
    else:
        orderbook_instances = {k:OrderBook(k) for k in OrderBook.EXCHANGES.keys() if k!='test'}

    print(list(orderbook_instances.keys()))
    cmd = ''
    running = False
    print('Starting:')
    try:
        while True:
            if cmd=='r' and not running:
                running = True
                print('Running all')
                for k in orderbook_instances.keys():
                    orderbook_instances[k].start()
            if cmd=='q':
                print('Exiting')
                for k in orderbook_instances.keys():
                    orderbook_instances[k].terminate()
                sys.exit()
            if cmd=='t':
                doctest.testmod(verbose=True)
            cmd = input()
    except:
        for k in orderbook_instances.keys():
            orderbook_instances[k].terminate()
        sys.exit()
"""

In [None]:
#convert raw files to compact files
import gzip
import json
import os
M=1000
A = -28800
source = '/home/eol/Documents/bitcoin_listener/data/'
dest = '/home/eol/Documents/bitcoin_read_and_convert/convert/'
exchanges = os.listdir(source)
for e in exchanges:
    if not os.path.exists(os.path.join(dest, e)):
        os.makedirs(os.path.join(dest, e))
for e in exchanges:
    files = os.listdir(os.path.join(source, e))
    files = sorted(files)
    o = OrderBook(e)
    i = 0
    while M*i<len(files):
        for estr in files[M*i:M*(i+1)]:
            with gzip.open(os.path.join(source,e,estr),'r') as gf: 
                for line in gf.readlines():
                    try:
                        epoch = int(estr[:-3])+A
                        d = json.loads(line.decode("utf-8"))
                        d = o._convert_to_standard_orderbook(d)
                        o._add_orderbook(epoch, d)
                    except Exception as err:
                        print(e, epoch, err)
        i += 1
        o.flush_orderbook('/home/eol/Documents/bitcoin_read_and_convert/convert/')

In [87]:
from bitcoin_listener import OrderBook
import numpy
import datetime
def convert_file(text, exchange='bitstamp', lim=None):
    lim = lim if lim else len(text)
    adj_price = OrderBook.EXCHANGES[exchange]['best_shift']['price']
    adj_amount = OrderBook.EXCHANGES[exchange]['best_shift']['amount']
    lines = [l for l in text.split('\n') if l]
    list_data = []
    for line in lines:
        data = line.split('|')
        epoch = datetime.datetime.fromtimestamp(int(data[0]))
        qtd_askbid = [float(v) for v in data[1].split(' ')]
        list_asks = data[2].split(' ')[:lim]
        list_bids = data[3].split(' ')[:lim]
        list_ask_prices = numpy.cumsum([float(p.split(',')[0])/10**adj_price for p in list_asks])
        list_ask_amounts = [float(p.split(',')[1])/10**adj_amount for p in list_asks]
        list_bid_prices = numpy.cumsum([float(p.split(',')[0])/10**adj_price for p in list_bids])
        list_bid_amounts = [float(p.split(',')[1])/10**adj_amount for p in list_bids]
        list_ask_zip = list(zip(list_ask_prices, list_ask_amounts))
        list_bid_zip = list(zip(list_bid_prices, list_bid_amounts))
        list_data.append([epoch, qtd_askbid, list_ask_zip, list_bid_zip])
    return list_data

import os
source_orderbook = '/home/eol/Documents/bitcoin_compress/orderbook/'
dest = '/home/eol/Documents/bitcoin_compress/tests/'
list_exchanges = sorted(os.listdir(source_orderbook))
list_dates = [n.split('.')[0] for n in sorted(os.listdir(os.path.join(source_orderbook, 'bitstamp')))]

import gzip
import pandas
list_exchanges_filtered = ['bitstamp']
list_dates_filtered = list_dates[-3:]

df_dict = {}

for exchange in list_exchanges_filtered:
    s = ''
    for date in list_dates_filtered:
        with gzip.open(os.path.join(source_orderbook, exchange, "{}.gz".format(date)),'r') as gf: 
            file_text = gf.read().decode('utf-8')
            s += file_text
        df_dict[exchange] = pandas.DataFrame(convert_file(s, lim=3), columns=['dt', 'qtd', 'asks', 'bids'])
df_dict[exchange]

Unnamed: 0,dt,qtd,asks,bids
0,2017-11-16 00:02:40,"[1660.0, 4855.0]","[(7282.01, 1.0), (7292.13, 13.0), (7297.29, 11...","[(7282.0, 0.168), (7281.51, 0.528), (7280.5, 0..."
1,2017-11-16 00:05:41,"[1671.0, 4852.0]","[(7260.65, 1.0), (7265.16, 1.08), (7268.65, 13...","[(7260.64, 3.5), (7259.65, 0.57), (7254.57, 0...."
2,2017-11-16 00:08:43,"[1684.0, 4835.0]","[(7234.98, 0.2), (7235.0, 5.0), (7237.05, 0.135)]","[(7229.64, 1.34), (7227.4, 2.0), (7220.02, 1.79)]"
3,2017-11-16 00:11:44,"[1687.0, 4837.0]","[(7240.0, 0.0334), (7253.11, 0.2), (7256.57, 1...","[(7239.99, 1.33), (7232.25, 0.452), (7232.23, ..."
4,2017-11-16 00:14:46,"[1685.0, 4841.0]","[(7258.61, 0.755), (7259.0, 2.63), (7260.46, 0...","[(7243.46, 2.0), (7243.45, 0.572), (7242.95, 0..."
5,2017-11-16 00:17:48,"[1686.0, 4842.0]","[(7256.81, 0.992), (7256.84, 0.755), (7258.98,...","[(7240.0, 5.0), (7239.51, 0.529), (7239.01, 0...."
6,2017-11-16 00:20:49,"[1687.0, 4833.0]","[(7238.89, 1.71), (7239.0, 1.15), (7239.49, 0....","[(7224.05, 2.76), (7224.0, 2.0), (7221.38, 8.82)]"
7,2017-11-16 00:23:51,"[1689.0, 4842.0]","[(7245.0, 0.259), (7245.68, 0.13), (7247.49, 0...","[(7242.41, 0.0338), (7229.17, 1.38), (7229.08,..."
8,2017-11-16 00:26:59,"[1688.0, 4846.0]","[(7265.01, 0.17), (7266.83, 0.178), (7268.65, ...","[(7253.03, 4.0), (7252.03, 0.77), (7251.05, 0...."
9,2017-11-16 00:30:02,"[1687.0, 4849.0]","[(7275.09, 2.0), (7275.1, 11.8), (7275.11, 0.1...","[(7256.35, 0.755), (7256.33, 1.0), (7256.16, 1..."
