# Tick Bar, Volume Bar and Dollar Bar

In [2]:
import gc
import glob
import os
from operator import itemgetter
from typing import Dict, List, Tuple

import dask.bag as db
import numpy as np
import pandas as pd
from dask.diagnostics import ProgressBar

In [3]:
from tqdm.notebook import tqdm
tqdm.pandas()

  from pandas import Panel


In [4]:
from utils import aggregate, aggregate_trade, convert_to_bar

In [5]:
aggregate(pd.Series([3,2,1,4,5,6]))

{'open': 3, 'high': 6, 'low': 1, 'close': 6, 'mean': 3.5, 'median': 3.5}

## Tick Bar

In [6]:
def generate_volume_bars(input_csv_file: str, bar_type: str, bar_size: np.int64, output_csv_file: str)->None:
    """Build time bars.
    Args:
        input_csv_file: The input CSV file.
        bar_type: Bar type, one of TickBar, VolumeBar or DollarBar
        bar_size: Bar size, number of ticks, volume or dollar.
        output_csv_file: The output CSV file.

    Returns:
        None.
  """
    assert bar_type == 'TickBar' or bar_type == 'VolumeBar' or bar_type == 'DollarBar'

    cur = 0
    trade_msges = []
    bars = []

    for chunk in pd.read_csv(input_csv_file, chunksize=8192*32):
        for index, row in chunk.iterrows():
            trade_msges.append(row)

            if bar_type == 'TickBar':
                cur += 1
            elif bar_type == 'VolumeBar':
                cur += row['quantity']
            elif bar_type == 'DollarBar':
                cur += row['quantity'] * row['price']
            else:
                raise ValueError(f'Unknown bar_type: {bar_type}')

            if cur >= bar_size:
                if len(trade_msges) <= 0:
                    continue
                bar = convert_to_bar(bar_type, bar_size, pd.DataFrame(trade_msges))
                bars.append(bar)
                # reset
                cur = 0
                trade_msges = []

    bars_df = pd.DataFrame(bars)
    os.makedirs(os.path.dirname(output_csv_file), exist_ok=True)
    bars_df.to_csv(output_csv_file, index=False)
    # optional
    del bars_df
    del bars
    gc.collect()

In [7]:
generate_volume_bars('/data/csv/OKEx.Swap.XMR_USDT.csv', 'TickBar', 64, '/data/bars/TickBar/64/TickBar.64.OKEx.Spot.XMR_USDT.csv')

In [9]:
def generate_tasks(csv_files: List[str], bar_type: str, bar_sizes: List[int], output_dir: str)->List[Tuple[str, str, int, str]]:
    tasks = [(file, bar_type, bar_size, os.path.join(output_dir, str(bar_size),f'{bar_type}.{bar_size}.{os.path.basename(file)}'))
             for file in csv_files for bar_size in bar_sizes]
    return tasks

In [16]:
def generate_parallell(tasks: List[Tuple[str, str, int, str]])->None:
    with ProgressBar():
        db.from_sequence(tasks).map(lambda t: generate_volume_bars(t[0], t[1], t[2], t[3])).compute()

In [18]:
csv_files = glob.glob('/data/csv/*XMR_USD*.csv')

In [19]:
csv_files

['/data/csv/MXC.Spot.XMR_USDT.csv',
 '/data/csv/Binance.Swap.XMR_USDT.csv',
 '/data/csv/OKEx.Swap.XMR_USDT.csv',
 '/data/csv/Binance.Spot.XMR_USDT.csv',
 '/data/csv/Kraken.Spot.XMR_USD.csv',
 '/data/csv/Bitfinex.Spot.XMR_USD.csv',
 '/data/csv/OKEx.Spot.XMR_USDT.csv',
 '/data/csv/OKEx.Swap.XMR_USD.csv',
 '/data/csv/Huobi.Spot.XMR_USDT.csv']

In [20]:
tasks = generate_tasks(
    csv_files,
    'TickBar',
    [4, 8, 16, 32, 64, 128, 50, 100, 200, 500, 1000],
    '/data/bars/TickBar',
)

In [21]:
generate_parallell(tasks)

[########################################] | 100% Completed | 15min 29.2s


## References

* [Tick, Volume, Dollar Volume Bars.ipynb](https://github.com/BlackArbsCEO/Adv_Fin_ML_Exercises/blob/master/notebooks/Tick%2C%20Volume%2C%20Dollar%20Volume%20Bars.ipynb)