# itch_trade_sign_classification_V2_exp

#### Juan Camilo Henao Londono - 01.03.2019
#### AG Guhr - Universitaet Duisburg-Essen

As they are some difference between my classification results and the results of S. Wang, I reviewed the trade sign code to confirm the good behavior. In the test, one principal value to corroborate is the number of identified trades that is already known. The version modified of the trade signs is the one used in the ITCH - TAQ code. In the future it will be important to divide the trade sign function in obtaining the trade signs and filling the time. The first function should return trade signs, prices, time and volume.

In [1]:
# Modules

import numpy as np
import os

import gzip

In [2]:
ticker = 'AAPL'
year = '2008'
month = '01'
day = '07'

In [3]:
"""
Obtain the reference trade signs, prices, volumes and time from an ITCH
file. These data is used to test the trade sign classification models.
    :param ticker: string of the abbreviation of the stock to be analized
                   (i.e. 'AAPL')
    :param year: string of the year to be analized (i.e '2008')
    :param month: string of the month to be analized (i.e '07')
    :param day: string of the day to be analized (i.e '07')
"""

print('ITCH data')
print('Processing data for the stock ' + ticker + ' the ' + year
      + '.' + month + '.' + day)

# Load data

data = np.genfromtxt(gzip.open('../ITCH_{1}/{1}{2}{3}_{0}.csv.gz'
                     .format(ticker, year, month, day)),
                     dtype='str', skip_header=1, delimiter=',')

# Arrays of times, ids, types, volumes and prices
# Arrays of all the available information in the data.
# To obtain the trade signs we only use the limit orders ('B' and
# 'S') to identify the time, id, volume and price of the trade.
# 'C', 'D' and 'T' are not used.

# Array of order types:
# "B" = 1 - > Add buy order
# "S" = 2 - > Add sell order
# "E" = 3 - > Execute outstanding order in part
# "C" = 0 - > Cancel outstanding order in part
# "F" = 4 - > Execute outstanding order in full
# "D" = 0 - > Delete outstanding order in full
# "X" = 0 - > Bulk volume for the cross event
# "T" = 0 - > Execute non-displayed order
times_ = np.array([int(mytime) for mytime in data[:, 0]])
ids_ = np.array([int(myid) for myid in data[:, 2]])
types_ = np.array([1 * (mytype == 'B') +
                   2 * (mytype == 'S') +
                   3 * (mytype == 'E') +
                   4 * (mytype == 'C') +
                   3 * (mytype == 'F') +
                   4 * (mytype == 'D') +
                   0 * (mytype == 'X') +
                   0 * (mytype == 'T') for mytype in data[:, 3]])
volumes_ = np.array([int(myvolume) for myvolume in data[:, 4]])
prices_ = np.array([int(myprice) for myprice in data[:, 5]])

ids = ids_[types_ != 0]
times = times_[types_ != 0]
types = types_[types_ != 0]
volumes = volumes_[types_ != 0]
prices = prices_[types_ != 0]

ITCH data
Processing data for the stock AAPL the 2008.01.07


In [9]:
# Reference arrays using the original values or the length of the original
# lists

times_ref = np.zeros(len(ids))
types_ref = np.zeros(len(ids))
volumes_ref = np.zeros(len(ids))
prices_ref = np.zeros(len(ids))
newids = {}
hv = 0

# Help arrays with the data of the buy orders and sell orders

hv_times = times[types < 3]
hv_types = types[types < 3]
hv_volumes = volumes[types < 3]
hv_prices = prices[types < 3]

# Arrays to store the results

trades_pos_len = np.sum(types == 3)
trade_sign = np.zeros(trades_pos_len)
price_sign = np.zeros(trades_pos_len)
volume_sign = np.zeros(trades_pos_len)
time_sign = np.zeros(trades_pos_len)
count = 0

In [10]:
# Fill the reference lists where the values of 'T' are 'E','C','F' or 'D'

# For the data in the length of the ids list (all data)
for iii in range(len(ids)):

    # If the data is a sell or buy order
    if (types[iii] < 3):

        # Insert in the dictionary newids a key with the valor of the id
        # and the value of hv (a counter) that is the index in hv_types
        newids[ids[iii]] = hv

        # Increase the value of hv
        hv += 1

    # If the data is not a sell or buy order
    elif (types[iii] == 3):

        # Fill the values of prices_ref with no prices ('E','C','F' or 'D')
        # with the type of the order
        types_ref[iii] = hv_types[newids[ids[iii]]]

        # Fill the values of prices_ref with no prices ('E','C','F' or 'D')
        # with the time of the order
        times_ref[iii] = hv_times[newids[ids[iii]]]

        # Fill the values of prices_ref with no prices ('E','C','F' or 'D')
        # with the volume of the order
        volumes_ref[iii] = hv_volumes[newids[ids[iii]]]
        
        # Fill the values of prices_ref with no prices ('E','C','F' or 'D')
        # with the price of the order
        prices_ref[iii] = hv_prices[newids[ids[iii]]]
        
        # A sell order is a trade triggered by a market order to buy
        if (hv_types[newids[ids[iii]]] == 2):

            trade_sign[count] = 1.
            price_sign[count] = prices_ref[iii]
            volume_sign[count] = volumes_ref[iii]
            time_sign[count] = times_ref[iii]
            count += 1

        # A buy order is a trade triggered by a market order to sell
        else:

            trade_sign[count] = - 1.
            price_sign[count] = prices_ref[iii]
            volume_sign[count] = volumes_ref[iii]
            time_sign[count] = times_ref[iii]
            count += 1
print(sum(types_ref != 0))
print(count)
assert count == len(trade_sign)

133606
133606


In [7]:
# Ordering the data in the open market time

# This line behaves as an or.the two arrays must achieve a condition, in
# this case, be in the market trade hours (09:40 - 15:50)
day_times_ind = (1. * time_sign / 3600 / 1000 > 9.666666) * \
                (1. * time_sign / 3600 / 1000 < 15.833333) > 0

price_signs = price_sign[day_times_ind]
trade_signs = trade_sign[day_times_ind]
volume_signs = volume_sign[day_times_ind]
times_signs = time_sign[day_times_ind]

day_time = (1. * times / 3600 / 1000 > 9.666666) * \
                (1. * times / 3600 / 1000 < 15.833333) > 0
types_day_time = types[day_time]
# The length of the executed oustanding order in part and in full must
# be the same as the length of the identified trade signs
print(np.sum(types_day_time == 3))
print(len(trade_signs[trade_signs != 0]))

assert (np.sum(types_day_time == 3)
        == len(trade_signs[trade_signs != 0]))
# The length of the price, volume and time must be equal to the length of
# the identified trade signs
assert (len(price_signs[price_signs != 0])
        == len(trade_signs[trade_signs != 0]))
assert (len(volume_signs[volume_signs != 0])
        == len(trade_signs[trade_signs != 0]))
assert (len(times_signs[times_signs != 0])
        == len(trade_signs[trade_signs != 0]))

#return (price_signs, trade_signs, volume_signs, times_signs)

120287
118489


AssertionError: 

In [None]:
#_, trade_signs, _, _ = itch_taq_trade_signs_load_test(ticker, year, month, day)

print('Results:')
print('Expected number of identified trades: 120287')
print('Number of identified trades:         ' , len(trade_signs[trade_signs != 0]))
print('Difference: ', 120287 - len(trade_signs[trade_signs != 0]))
print('Percentage: ', round((120287 - len(trade_signs[trade_signs != 0])) / 120287 * 100, 2), '%')