# itch_taq_trade_signs_load_test

#### Juan Camilo Henao Londono - 21.02.2019
#### AG Guhr - Universitaet Duisburg-Essen

In [1]:
# Modules

import numpy as np
import os

import gzip

import itch_data_tools

In [2]:
def itch_taq_trade_signs_load_test(ticker, year, month, day):
    """
    Obtain the reference trade signs, prices, volumes and time from an ITCH
    file. These data is used to test the trade sign classification models.
        :param ticker: string of the abbreviation of the stock to be analized
                       (i.e. 'AAPL')
        :param year: string of the year to be analized (i.e '2008')
        :param month: string of the month to be analized (i.e '07')
        :param day: string of the day to be analized (i.e '07')
    """

    function_name = itch_taq_trade_signs_load_test.__name__
    t_step = 'irrelevant '
    itch_data_tools.itch_function_header_print_data(function_name, ticker,
                                                    ticker, year, month, day,
                                                    t_step)

    # Load data

    data = np.genfromtxt(gzip.open('../../ITCH_{1}/{1}{2}{3}_{0}.csv.gz'
                         .format(ticker, year, month, day)),
                         dtype='str', skip_header=1, delimiter=',')

    # Lists of times, ids, types, volumes and prices
    # List of all the available information available in the data excluding
    # the last two columns

    # List of order types:
    # "B" = 1 - > Add buy order
    # "S" = 2 - > Add sell order
    # "E" = 3 - > Execute outstanding order in part
    # "C" = 4 - > Cancel outstanding order in part
    # "F" = 5 - > Execute outstanding order in full
    # "D" = 6 - > Delete outstanding order in full
    # "X" = 7 - > Bulk volume for the cross event
    # "T" = 8 - > Execute non-displayed order
    times_ = np.array([int(mytime) for mytime in data[:, 0]])
    ids_ = np.array([int(myid) for myid in data[:, 2]])
    types_ = np.array([1 * (mytype == 'B') +
                       2 * (mytype == 'S') +
                       3 * (mytype == 'E') +
                       4 * (mytype == 'C') +
                       5 * (mytype == 'F') +
                       6 * (mytype == 'D') +
                       7 * (mytype == 'X') +
                       8 * (mytype == 'T') for mytype in data[:, 3]])
    volumes_ = np.array([int(myvolume) for myvolume in data[:, 4]])
    prices_ = np.array([int(myprice) for myprice in data[:, 5]])

    ids = ids_[types_ < 7]
    times = times_[types_ < 7]
    types = types_[types_ < 7]
    volumes = volumes_[types_ < 7]
    prices = prices_[types_ < 7]

    # Reference lists
    # Reference lists using the original values or the length of the original
    # lists

    prices_ref = 1 * prices
    types_ref = 0 * types
    times_ref = 0 * times
    volumes_ref = 0 * types
    newids = {}
    hv = 0

    # Help lists with the data of the buy orders and sell orders

    hv_prices = prices[types < 3]
    hv_types = types[types < 3]
    hv_times = times[types < 3]
    hv_volumes = volumes[types < 3]

    trade_sign = 0 * types
    price_sign = 0 * types
    volume_sign = 0 * types
    time_sign = 0 * types

    # Fill the reference lists where the values of 'T' are 'E', 'C', 'F', 'D'

    # For the data in the length of the ids list (all data)
    for iii in range(len(ids)):

        # If the data is a sell or buy order
        if (types[iii] < 3):

            # Insert in the dictionary newids a key with the valor of the id
            # and the value of hv (a counter) that is the index in hv_types
            newids[ids[iii]] = hv

            # Increase the value of hv
            hv += 1

        # If the data is not a sell or buy order
        elif (types[iii] == 3 or
                types[iii] == 5):

            # Fill the values of prices_ref with no prices ('E', 'C', 'F', 'D')
            # with the price of the order
            prices_ref[iii] = hv_prices[newids[ids[iii]]]

            # Fill the values of types_ref with no  prices ('E', 'C', 'F', 'D')
            # with the type of the order
            types_ref[iii] = hv_types[newids[ids[iii]]]

            # Fill the values of time_ref with no  prices ('E', 'C', 'F', 'D')
            # with the time of the order
            times_ref[iii] = hv_times[newids[ids[iii]]]

            # Fill the values of volumes_ref with no  prices ('E','C','F', 'D')
            # with the volume of the order
            volumes_ref[iii] = hv_volumes[newids[ids[iii]]]

            if (hv_types[newids[ids[iii]]] == 2):

                trade_sign[iii] = 1.
                price_sign[iii] = prices_ref[iii]
                volume_sign[iii] = volumes_ref[iii]
                time_sign[iii] = times_ref[iii]

            elif (hv_types[newids[ids[iii]]] == 1):

                trade_sign[iii] = - 1.
                price_sign[iii] = prices_ref[iii]
                volume_sign[iii] = volumes_ref[iii]
                time_sign[iii] = times_ref[iii]

        else:

            # Fill the values of types_ref with no  prices ('E', 'C', 'F', 'D')
            # with the type of the order
            types_ref[iii] = hv_types[newids[ids[iii]]]

            # Fill the values of time_ref with no  prices ('E', 'C', 'F', 'D')
            # with the time of the order
            times_ref[iii] = hv_times[newids[ids[iii]]]

    # Ordering the data in the open market time

    # This line behaves as an or.the two arrays must achieve a condition, in
    # this case, be in the market trade hours (09:40 - 15:50)
    day_times_ind = (1. * time_sign / 3600 / 1000 > 9.666666) * \
                    (1. * time_sign / 3600 / 1000 < 15.833333) > 0

    price_signs = price_sign[day_times_ind]
    trade_signs = trade_sign[day_times_ind]
    volume_signs = volume_sign[day_times_ind]
    times_signs = time_sign[day_times_ind]

    # The length of the executed oustanding order in part and in full must
    # be the same as the length of the identified trade signs
    assert (len(types[types == 3]) + len(types[types == 5]
            == len(trade_signs[trade_signs != 0])))
    # The length of the price, volume and time must be equal to the length of
    # the identified trade signs
    assert (len(price_signs[price_signs != 0])
            == len(trade_signs[trade_signs != 0]))
    assert (len(volume_signs[volume_signs != 0])
            == len(trade_signs[trade_signs != 0]))
    assert (len(times_signs[times_signs != 0])
            == len(trade_signs[trade_signs != 0]))

    return (price_signs, trade_signs, volume_signs, times_signs)

In [3]:
ticker = 'AAPL'
year = '2008'
month = '01'
day = '07'

_, trade_signs, _, _ = itch_taq_trade_signs_load_test(ticker, year, month, day)

print('Number of identified trades:' , len(trade_signs[trade_signs != 0]))

ITCH data
itch_taq_trade_signs_load_test
Processing data for the stock AAPL the 2008.01.07
Time step: irrelevant ms
Number of identified trades: 118489
