# itch_trade_signs_data

#### Juan Camilo Henao Londono - 07.02.2019
#### AG Guhr - Universitaet Duisburg-Essen

In the following [link]() can be seen the plot of the function's results

In [1]:
# Import modules

import numpy as np
import os

import gzip
import pickle

import itch_data_tools

__tau__ = 1000

### List of order types: 

* "B" = 1 -- Add buy order
* "S" = 2 -- Add sell order
* "E" = 3 -- Execute outstanding order in part
* "C" = 4 -- Cancel outstanding order in part
* "F" = 5 -- Execute outstanding order in full
* "D" = 6 -- Delete outstanding order in full
* "X" = 7 -- Bulk volume for the cross event
* "T" = 8 -- Execute non-displayed order

In [2]:
def itch_trade_signs_data(ticker, year, month, day, t_step):
    """
    Obtain the trade signs from the ITCH data. For further calculations
    we use the whole time range from the opening of the market at 9h30 to the
    closing at 16h in milliseconds and then convert the values to hours (23.4
    million data). To fill the time spaces when nothing happens we just fill
    with zeros indicating that there were neither a buy nor a sell. Save in a
    pickle file the array of the trade signs
        :param ticker: string of the abbreviation of the stock to be analized
         (i.e. 'AAPL')
        :param year: string of the year to be analized (i.e '2016')
        :param month: string of the month to be analized (i.e '07')
        :param day: string of the day to be analized (i.e '07')
        :param t_step: time step in the data in ms
    """''

    function_name = itch_trade_signs_data.__name__
    itch_data_tools.itch_function_header_print_data(function_name, ticker,
                                                    ticker, year, month, day,
                                                    t_step)

    # Load data

    data = np.genfromtxt(gzip.open('../../ITCH_{1}/{1}{2}{3}_{0}.csv.gz'
                         .format(ticker, year, month, day)),
                         dtype='str', skip_header=1, delimiter=',')

    # Lists of times, ids, types, volumes and prices
    # List of all the available information available in the data excluding
    # the last two columns

    # List of order types:
    # "B" = 1 - > Add buy order
    # "S" = 2 - > Add sell order
    # "E" = 3 - > Execute outstanding order in part
    # "C" = 4 - > Cancel outstanding order in part
    # "F" = 5 - > Execute outstanding order in full
    # "D" = 6 - > Delete outstanding order in full
    # "X" = 7 - > Bulk volume for the cross event
    # "T" = 8 - > Execute non-displayed order
    times_ = np.array([int(mytime) for mytime in data[:, 0]])
    ids_ = np.array([int(myid) for myid in data[:, 2]])
    types_ = np.array([1 * (mytype == 'B') +
                       2 * (mytype == 'S') +
                       3 * (mytype == 'E') +
                       4 * (mytype == 'C') +
                       5 * (mytype == 'F') +
                       6 * (mytype == 'D') +
                       7 * (mytype == 'X') +
                       8 * (mytype == 'T') for mytype in data[:, 3]])

    ids = ids_[types_ < 7]
    times = times_[types_ < 7]
    types = types_[types_ < 7]

    # Reference lists
    # Reference lists using the original values or the length of the original
    # lists

    types_ref = 0 * types
    times_ref = 0 * times
    newids = {}
    hv = 0

    # Help lists with the data of the buy orders and sell orders

    hv_types = types[types < 3]
    hv_times = times[types < 3]

    trade_sign = 0 * types

    # Fill the reference lists where the values of 'T' are 'E', 'C', 'F', 'D'

    # For the data in the length of the ids list (all data)
    for iii in range(len(ids)):

        # If the data is a sell or buy order
        if (types[iii] < 3):

            # Insert in the dictionary newids a key with the valor of the id
            # and the value of hv (a counter) that is the index in hv_types
            newids[ids[iii]] = hv

            # Increase the value of hv
            hv += 1

            trade_sign[iii] = 0

        # If the data is not a sell or buy order
        elif (types[iii] == 3 or
                types[iii] == 5):

            # Fill the values of types_ref with no  prices ('E', 'C', 'F', 'D')
            # with the type of the order
            types_ref[iii] = hv_types[newids[ids[iii]]]

            # Fill the values of time_ref with no  prices ('E', 'C', 'F', 'D')
            # with the time of the order
            times_ref[iii] = hv_times[newids[ids[iii]]]

            if (hv_types[newids[ids[iii]]] == 2):

                trade_sign[iii] = 1.

            elif (hv_types[newids[ids[iii]]] == 1):

                trade_sign[iii] = - 1.

        else:

            # Fill the values of types_ref with no  prices ('E', 'C', 'F', 'D')
            # with the type of the order
            types_ref[iii] = hv_types[newids[ids[iii]]]

            # Fill the values of time_ref with no  prices ('E', 'C', 'F', 'D')
            # with the time of the order
            times_ref[iii] = hv_times[newids[ids[iii]]]

            trade_sign[iii] = 0

    # Ordering the data in the open market time

    # This line behaves as an or.the two arrays must achieve a condition, in
    # this case, be in the market trade hours
    day_times_ind = (1. * times / 3600 / 1000 > 9.5) * \
                    (1. * times / 3600 / 1000 < 16) > 0

    trade_signs = trade_sign[day_times_ind]
    times_signs = times[day_times_ind]

    # Completing the full time entrances

    # 34 200 000 ms = 9h30 - 57 600 000 ms = 16h
    full_time = np.array(range(34200000, 57600001))

    # As there can be several values for the same millisecond, we use the most
    # used trade value of each millisecond in the full time array as it
    # behaves quiet similar as the original input

    count = 0
    trade_signs_complete_most = 0. * full_time

    for t_idx, t_val in enumerate(full_time):

        most = 0

        if (count < len(times_signs) and t_val == times_signs[count]):

            most += trade_signs[count]

            count += 1

            while (count < len(times_signs) and
                    times_signs[count - 1] == times_signs[count]):

                most += trade_signs[count]
                count += 1

            if (most > 0):

                trade_signs_complete_most[t_idx] = 1.

            elif (most < 0):

                trade_signs_complete_most[t_idx] = -1.

    # Saving data

    itch_data_tools.itch_save_data(function_name, trade_signs_complete_most,
                                   ticker, ticker, year, month, day, t_step)

    return None

In [3]:
# Test
ticker = 'MSFT'
year = '2016'
month = '03'
days = ['07', '08', '09', '10', '11']
t_step = '1'

for day in days:
    itch_trade_signs_data(ticker, year, month, day, t_step)

ITCH data
itch_trade_signs_data
Processing data for the stock MSFT the 2016.03.07
Time step: 1ms


FileNotFoundError: [Errno 2] No such file or directory: '../itch_data_2016/itch_trade_signs_data_1ms/'