# TAQ trade signs data

En la implementacion de los trade signs, divido la tarea en dos partes. Primero obtengo los resultados para todos los eventos en un dia y luego con otra funcion los calculo para todos los segundos en el dia.

### trade signs data por evento por dia



In [1]:
# Modules

import numpy as np
import pandas as pd
import pickle

In [None]:
def taq_trade_signs_event_data_numpy(ticker, year, month, day):
    """Computes the trade signs of every event.

    Using the dayly TAQ data computes the trade signs of every event in a day.
    The trade signs are computed using the equation (1) of the
    `paper <https://arxiv.org/pdf/1603.01580.pdf>`_.
    As the trades signs are not directly given by the TAQ data, they must be
    infered by the trades prices.
    For further calculations, the function returns the values for the time
    range from 9h40 to 15h50.

    :param ticker: string of the abbreviation of the stock to be analized
        (i.e. 'AAPL').
    :param year: string of the year to be analized (i.e '2016').
    :param month: string of the month to be analized (i.e '07').
    :param day: string of the day to be analized (i.e '07').
    :return: tuple -- The function returns a tuple with numpy arrays.
    """

    function_name = taq_trade_signs_event_data.__name__
    taq_data_tools_article_reproduction \
        .taq_function_header_print_data(function_name, ticker, ticker, year,
                                        month, day)

    # Load data
    time_t, ask_t, _ = pickle.load(open(
        '../../taq_data/pickle_dayly_data_{1}/TAQ_{0}_trades_{1}{2}{3}.pickle'
        .format(ticker, year, month, day), 'rb'))

    # All the trades must have a price different to zero
    assert not np.sum(ask_t == 0)

    # Trades identified using equation (1)
    identified_trades = np.zeros(len(time_t))
    identified_trades[-1] = 1

    # Implementation of equation (1). Sign of the price change between
    # consecutive trades

    for t_idx in range(len(time_t)):

        diff = ask_t[t_idx] - ask_t[t_idx - 1]

        if (diff):
            identified_trades[t_idx] = np.sign(diff)

        else:
            identified_trades[t_idx] = identified_trades[t_idx - 1]

    # All the identified trades must be different to zero
    assert not np.sum(identified_trades == 0)

    return (time_t, ask_t, identified_trades)

### trade signs data por segundo por dia



In [None]:
def taq_trade_signs_time_data(ticker, date):
    """Computes the trade signs of every second.

    Using the taq_trade_signs_event_data function computes the trade signs of
    every second.
    The trade signs are computed using the equation (2) of the
    `paper <https://arxiv.org/pdf/1603.01580.pdf>`_.
    As the trades signs are not directly given by the TAQ data, they must be
    infered by the trades prices.
    For further calculations, the function returns the values for the time
    range from 9h40 to 15h50.
    To fill the time spaces when nothing happens I added zeros indicating that
    there were neither a buy nor a sell.

    :param ticker: string of the abbreviation of the stock to be analized
     (i.e. 'AAPL').
    :param date: string with the date of the data to be extracted
     (i.e. '2008-01-02').
    :return: tuple -- The function returns a tuple with numpy arrays.
    """

    date_sep = date.split('-')

    year = date_sep[0]
    month = date_sep[1]
    day = date_sep[2]

    function_name = taq_trade_signs_time_data.__name__
    taq_data_tools_article_reproduction \
        .taq_function_header_print_data(function_name, ticker, ticker, year,
                                        month, day)

    try:
        # Calculate the values of the trade signs for all the events
        (time_t, ask_t,
         identified_trades) = taq_trade_signs_event_data(ticker, year, month,
                                                         day)

        # Reproducing S. Wang values. In her results the time interval for the
        # trade signs is [34801, 57000]
        full_time = np.array(range(34801, 57001))

        trade_signs = 0. * full_time
        price_signs = 0. * full_time

        # Implementation of equation (2). Trade sign in each second
        for t_idx, t_val in enumerate(full_time):

            condition = (time_t >= t_val) * (time_t < t_val + 1)
            # Empirical
            trades_same_t_exp = identified_trades[condition]
            sign_exp = int(np.sign(np.sum(trades_same_t_exp)))
            trade_signs[t_idx] = sign_exp
            try:
                price_signs[t_idx] = ask_t[condition][-1]
            except IndexError as e:
                full_time[t_idx] = 0

        # Saving data
        taq_data_tools_article_reproduction \
            .taq_save_data(function_name,
                           (full_time, price_signs, trade_signs),
                           ticker, ticker, year, month, day)

        return (full_time, price_signs, trade_signs)

    except FileNotFoundError as e:
        print('No data')
        print(e)
        print()
        return None