# itch_trade_sign_classification_V1_exp

#### Juan Camilo Henao Londono - 28.02.2019
#### AG Guhr - Universitaet Duisburg-Essen

Test to find the number of identified trades and number of matches in the ITCH data using the two models used by S. Wang. They can be seen in this [paper](https://arxiv.org/pdf/1603.01580.pdf) identified as equation (2) and (3)

#### THIS IS NOT THE FINAL VERSION OF THE IMPLEMENTATION

In [1]:
# Modules

import numpy as np
import os
from matplotlib import pyplot as plt
%matplotlib inline

import gzip
import pickle

__tau__ = 1000

In [2]:
def itch_trade_signs_test(ticker, year, month, day):
    
    # Load data

    data = np.genfromtxt(gzip.open('../ITCH_{1}/{1}{2}{3}_{0}.csv.gz'
                         .format(ticker, year, month, day)),
                         dtype='str', skip_header=1, delimiter=',')

    # Lists of times, ids, types, volumes and prices
    # List of all the available information available in the data excluding
    # the last two columns

    # List of order types:
    # "B" = 1 - > Add buy order
    # "S" = 2 - > Add sell order
    # "E" = 3 - > Execute outstanding order in part
    # "C" = 4 - > Cancel outstanding order in part
    # "F" = 5 - > Execute outstanding order in full
    # "D" = 6 - > Delete outstanding order in full
    # "X" = 7 - > Bulk volume for the cross event
    # "T" = 8 - > Execute non-displayed order
    times_ = np.array([int(mytime) for mytime in data[:, 0]])
    ids_ = np.array([int(myid) for myid in data[:, 2]])
    types_ = np.array([1 * (mytype == 'B') +
                       2 * (mytype == 'S') +
                       3 * (mytype == 'E') +
                       4 * (mytype == 'C') +
                       5 * (mytype == 'F') +
                       6 * (mytype == 'D') +
                       7 * (mytype == 'X') +
                       8 * (mytype == 'T') for mytype in data[:, 3]])
    prices_ = np.array([int(myprice) for myprice in data[:, 5]])

    ids = ids_[types_ < 7]
    times = times_[types_ < 7]
    types = types_[types_ < 7]
    prices = prices_[types_ < 7]
    
    # Reference lists
    # Reference lists using the original values or the length of the original
    # lists

    prices_ref = 1 * prices
    types_ref = 0 * types
    times_ref = 0 * times
    newids = {}
    hv = 0

    # Help lists with the data of the buy orders and sell orders

    hv_prices = prices[types < 3]
    hv_types = types[types < 3]
    hv_times = times[types < 3]

    trade_sign = 0 * types
    price_sign = 0 * types
    
    # Fill the reference lists where the values of 'T' are 'E', 'C', 'F', 'D'

    # For the data in the length of the ids list (all data)
    for iii in range(len(ids)):

        # If the data is a sell or buy order
        if (types[iii] < 3):

            # Insert in the dictionary newids a key with the valor of the id
            # and the value of hv (a counter) that is the index in hv_types
            newids[ids[iii]] = hv

            # Increase the value of hv
            hv += 1

            trade_sign[iii] = 0
            price_sign[iii] = 0

        # If the data is not a sell or buy order
        elif (types[iii] == 3 or
                types[iii] == 5):

            # Fill the values of prices_ref with no prices ('E', 'C', 'F', 'D')
            # with the price of the order
            prices_ref[iii] = hv_prices[newids[ids[iii]]]

            # Fill the values of types_ref with no  prices ('E', 'C', 'F', 'D')
            # with the type of the order
            types_ref[iii] = hv_types[newids[ids[iii]]]

            # Fill the values of time_ref with no  prices ('E', 'C', 'F', 'D')
            # with the time of the order
            times_ref[iii] = hv_times[newids[ids[iii]]]

            if (hv_types[newids[ids[iii]]] == 2):

                trade_sign[iii] = 1.
                price_sign[iii] = prices_ref[iii]

            elif (hv_types[newids[ids[iii]]] == 1):

                trade_sign[iii] = - 1.
                price_sign[iii] = prices_ref[iii]

        else:

            # Fill the values of types_ref with no  prices ('E', 'C', 'F', 'D')
            # with the type of the order
            types_ref[iii] = hv_types[newids[ids[iii]]]

            # Fill the values of time_ref with no  prices ('E', 'C', 'F', 'D')
            # with the time of the order
            times_ref[iii] = hv_times[newids[ids[iii]]]

            trade_sign[iii] = 0
            price_sign[iii] = 0
            
        # Ordering the data in the open market time

    # This line behaves as an or.the two arrays must achieve a condition, in
    # this case, be in the market trade hours
    day_times_ind = (1. * times / 3600 / 1000 > 9.666666) * \
                    (1. * times / 3600 / 1000 < 15.833333) > 0

    price_signs = price_sign[day_times_ind]
    trade_signs = trade_sign[day_times_ind]
    times_signs = times[day_times_ind]
    
    return (price_signs, trade_signs, times_signs)

In [4]:
ticker = 'AAPL'
year = '2008'
month = '01'
day = '07'
price, trades, time = itch_trade_signs_test(ticker, year, month, day)

pickle.dump((price, trades, time), open('trade_classification.pickle', 'wb'))

In [5]:
print('Length Price: ', len(price))
print('Length Trades: ', len(trades))
print('Length Time: ', len(time))
print()

Length Price:  1516144
Length Trades:  1516144
Length Time:  1516144



In [6]:
print('Length Trades != 0: ', len(trades[trades != 0]))
print('Length Price  != 0: ', len(price[price != 0]))

Length Trades != 0:  120287
Length Price  != 0:  120287


In [7]:
# trades with values different to zero to obtain the theoretical value
price_no_0 = price[trades!= 0]
trades_no_0 = trades[trades!= 0]
time_no_0 = time[trades!= 0]
time_no_0_set = np.array(list(sorted(set(time_no_0))))

In [8]:
print('Length Time (set): ', len(time_no_0_set))

Length Time (set):  83411


In [9]:
# Theoric

trades_teo = np.zeros(len(time_no_0_set))

for t_idx, t_val in enumerate(time_no_0_set):
    
    trades_same_t = trades_no_0[time_no_0 == t_val]
    sign = np.sign(np.sum(trades_same_t))
    
    trades_teo[t_idx] = sign

In the experimental case, we only know 
```Python
    time_no_0
    time_no_0_set
    price_no_0
```
The idea is to reconstruct the trade signs.

In [10]:
trades_exp = np.zeros(len(time_no_0_set))

count = 0
last = 0

for t_idx, t_val in enumerate(time_no_0_set):
    
    if (len(time_no_0[time_no_0 == t_val]) == 1):
        
        trades_exp[t_idx] = np.sign(price_no_0[count])
        count += 1
    
    else:
    
        sign_sum = np.zeros(len(time_no_0[time_no_0 == t_val]))
        idx = 0

        while (count < len(time_no_0) and time_no_0[count] == t_val):

            diff = price_no_0[count] - price_no_0[count - 1]

            if (idx == 0):

                sign_sum[0] = np.sign(price_no_0[count] - last)
                count += 1
                idx += 1

            elif (diff):

                sign_sum[idx] = np.sign(diff)
                count += 1
                idx += 1

            else:

                sign_sum[idx] = sign_sum[idx - 1]
                count += 1
                idx += 1

        trades_exp[t_idx] = np.sign(np.sum(sign_sum))
        last = sign_sum[-1]

In [11]:
print('Percentage : ', round(sum(trades_teo == trades_exp) / len(trades_teo) * 100, 2), '%')

Percentage :  51.82 %


In [12]:
sum(trades_teo == trades_exp)

43225

In [13]:
len(trades_teo)

83411