# Tick Bar, Volume Bar and Dollar Bar

In [1]:
import gc
import glob
import os
from typing import Dict, List

import dask.bag as db
import numpy as np
import pandas as pd
from dask.diagnostics import ProgressBar

In [2]:
from tqdm.notebook import tqdm
tqdm.pandas()

  from pandas import Panel


In [3]:
from utils import aggregate, aggregate_trade, convert_to_bar

In [4]:
aggregate(pd.Series([3,2,1,4,5,6]))

{'open': 3, 'high': 6, 'low': 1, 'close': 6, 'mean': 3.5, 'median': 3.5}

## Tick Bar

In [7]:
def generate_volume_bars(hdf_file: str, bar_type: str, bar_size: np.int64, output_file: str)->None:
    """Build time bars.
    Args:
        hdf_file: The input hDF5 file.
        bar_type: Bar type, one of TickBar, VolumeBar or DollarBar
        bar_size: Bar size, number of ticks, volume or dollar.

    Returns:
        Time bars.
  """
    assert bar_type == 'TickBar' or bar_type == 'VolumeBar' or bar_type == 'DollarBar'
    df = pd.read_hdf(hdf_file)
    df['timestamp']=df['timestamp'].astype(np.int64) // int(1e6)

    cur = 0
    trade_msges = []
    bars = []

    for index, row in tqdm(df.iterrows(), total=df.shape[0]):
        trade_msges.append(row)

        if bar_type == 'TickBar':
            cur += 1
        elif bar_type == 'VolumeBar':
            cur += row['quantity']
        elif bar_type == 'DollarBar':
            cur += row['quantity'] * row['price']
        else:
            raise ValueError(f'Unknown bar_type: {bar_type}')
        
        if cur >= bar_size:
            if len(trade_msges) <= 0:
                continue
            bar = convert_to_bar(bar_type, bar_size, pd.DataFrame(trade_msges))
            bars.append(bar)
            # reset
            cur = 0
            trade_msges = []

    bars_df = pd.DataFrame(bars)
    del df
    del bars
    gc.collect()

    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    bars_df.to_hdf(
        output_file,
        key=os.path.basename(output_file)[0:-len('.hdf5')].replace('.', '__'),
        mode='w',
        complevel=9)

In [8]:
generate_volume_bars('/data/hdf5/OKEx.Spot.XMR_USDT.hdf5', 'TickBar', 64, '/data/bars/TickBar/64/TickBar.64.OKEx.Spot.XMR_USDT.hdf5')

HBox(children=(FloatProgress(value=0.0, max=413724.0), HTML(value='')))




In [9]:
def generate_multi(hdf5_files: List[str], bar_type: str, bar_sizes: List[int], output_dir: str)->None:
    file_bar_sizes = [(file, bar_size) for file in hdf5_files for bar_size in bar_sizes]
    with ProgressBar():
        db.from_sequence(file_bar_sizes).map(
            lambda t: generate_volume_bars(t[0], bar_type, t[1], os.path.join(
                output_dir, str(t[1]), f'{bar_type}.{t[1]}.{os.path.basename(t[0])}'))).compute()

In [10]:
hdf_files = glob.glob('/data/hdf5/*XMR_USD*.hdf5')

In [11]:
hdf_files

['/data/hdf5/OKEx.Swap.XMR_USDT.hdf5',
 '/data/hdf5/MXC.Spot.XMR_USDT.hdf5',
 '/data/hdf5/Kraken.Spot.XMR_USD.hdf5',
 '/data/hdf5/Bitfinex.Spot.XMR_USD.hdf5',
 '/data/hdf5/Huobi.Spot.XMR_USDT.hdf5',
 '/data/hdf5/OKEx.Spot.XMR_USDT.hdf5',
 '/data/hdf5/Binance.Swap.XMR_USDT.hdf5',
 '/data/hdf5/OKEx.Swap.XMR_USD.hdf5',
 '/data/hdf5/Binance.Spot.XMR_USDT.hdf5']

In [12]:
generate_multi(
    hdf_files,
    'TickBar',
    [4, 8, 16, 32, 64, 128, 50, 100, 200, 500, 1000],
    '/data/bars/TickBar',
)

[########################################] | 100% Completed | 16min 28.0s


## References

* [Tick, Volume, Dollar Volume Bars.ipynb](https://github.com/BlackArbsCEO/Adv_Fin_ML_Exercises/blob/master/notebooks/Tick%2C%20Volume%2C%20Dollar%20Volume%20Bars.ipynb)