# itch_taq_trade_signs_load_test

#### Juan Camilo Henao Londono - 21.02.2019
#### AG Guhr - Universitaet Duisburg-Essen

In [1]:
# Modules

import numpy as np
import os

import gzip
import pandas as pd

import itch_data_tools

In [2]:
def itch_taq_trade_signs_load_test(ticker, year, month, day):
    """
    Obtain the reference time, trade signs, volumes and prices from an ITCH
    file. These data is used to test the trade sign classification models.
        :param ticker: string of the abbreviation of the stock to be analized
                       (i.e. 'AAPL')
        :param year: string of the year to be analized (i.e '2008')
        :param month: string of the month to be analized (i.e '07')
        :param day: string of the day to be analized (i.e '07')
    """

    function_name = itch_taq_trade_signs_load_test.__name__
    t_step = '-'
    itch_data_tools.itch_function_header_print_data(function_name, ticker,
                                                    ticker, year, month, day,
                                                    t_step)

    # Load full data using cols with values time, order, type, shares and price
    data = pd.read_csv(gzip.open('../../ITCH_{1}/{1}{2}{3}_{0}.csv.gz'
                       .format(ticker, year, month, day), 'rt'),
                       usecols=(0, 2, 3, 4, 5), dtype={'Time': 'uint32',
                       'Order': 'uint64', 'T': str, 'Shares': 'uint16',
                                                       'Price': 'float64'})

    data['Price'] = data['Price'] / 10000

    # Select only trade orders. Visible ('E' and 'F') and hidden ('T')
    trade_pos = np.array(data['T'] == 'E') + np.array(data['T'] == 'F') \
        + np.array(data['T'] == 'T')
    trade_data = data[trade_pos]
    # Converting the data in numpy arrays
    trade_data_time = trade_data['Time'].values
    trade_data_order = trade_data['Order'].values
    trade_data_types = 3 * np.array(trade_data['T'] == 'E') \
        + 4 * np.array(trade_data['T'] == 'F') \
        + 5 * np.array(trade_data['T'] == 'T')
    trade_data_volume = trade_data['Shares'].values
    trade_data_price = trade_data['Price'].values

    # Select only limit orders
    limit_pos = np.array(data['T'] == 'B') + np.array(data['T'] == 'S')
    limit_data = data[limit_pos]
    # Reduce the values to only the ones that have the same order number
    # as trade orders
    limit_data = limit_data[limit_data.Order.isin(trade_data['Order'])]
    # Converting the data in numpy arrays
    limit_data_order = limit_data['Order'].values
    limit_data_types = 1 * np.array(limit_data['T'] == 'S') \
        - 1 * np.array(limit_data['T'] == 'B')
    limit_data_volume = limit_data['Shares'].values
    limit_data_price = limit_data['Price'].values

    # Arrays to store the info of the identified trades
    length_trades = len(trade_data)
    trade_times = 1 * trade_data_time
    trade_signs = np.zeros(length_trades)
    trade_volumes = np.zeros(length_trades, dtype='uint16')
    trade_price = np.zeros(length_trades)

    for t_idx in range(len(trade_data)):

        try:

            # limit orders that have the same order as the trade order
            l_idx = np.where(limit_data_order == trade_data_order[t_idx])[0][0]

            # Save values that are independent of the type

            # Price of the trade (Limit data)
            trade_price[t_idx] = limit_data_price[l_idx]

            # Trade sign identification

            trade = limit_data_types[l_idx]

            if (trade == 1):
                trade_signs[t_idx] = 1.
            else:
                trade_signs[t_idx] = -1.

            # The volume depends on the trade type. If it is 4 the
            # value is taken from the limit data and the order number
            # is deleted from the data. If it is 3 the
            # value is taken from the trade data and then the
            # value of the volume in the limit data must be
            # reduced with the value of the trade data

            volume_type = trade_data_types[t_idx]

            if (volume_type == 4):

                trade_volumes[t_idx] = limit_data_volume[l_idx]
                limit_data_order[l_idx] = 0

            else:

                trade_volumes[t_idx] = trade_data_volume[t_idx]
                diff_volumes = limit_data_volume[l_idx] \
                    - trade_data_volume[t_idx]

                assert diff_volumes > 0

                limit_data_volume[l_idx] = diff_volumes

        except IndexError:

            pass

    assert len(trade_signs != 0) == len(trade_data_types != 5)

    # To use the hidden trades, I change the values in the computed arrays with
    # the information of visible trades to have the hidden information.

    hidden_pos = trade_data_types == 5
    trade_volumes[hidden_pos] = trade_data_volume[hidden_pos]
    trade_price[hidden_pos] = trade_data_price[hidden_pos]

    market_time = (trade_times / 3600 / 1000 >= 9.666666) & \
        (trade_times / 3600 / 1000 < 15.833333)

    trade_times_market = trade_times[market_time]
    trade_signs_market = trade_signs[market_time]
    trade_volumes_market = trade_volumes[market_time]
    trade_price_market = trade_price[market_time]

    return (trade_times_market, trade_signs_market, trade_volumes_market,
            trade_price_market)

In [3]:
ticker = 'AAPL'
year = '2008'
month = '01'
day = '07'

_, trade_signs, _, _ = itch_taq_trade_signs_load_test(ticker, year, month, day)

print('Number of identified trades:' , len(trade_signs[trade_signs != 0]))

ITCH data
itch_taq_trade_signs_load_test
Processing data for the stock AAPL the 2008.01.07
Time step: -ms
Number of identified trades: 120287
