# taq_cross_response_transactions_test

#### Juan Camilo Henao Londono - 21.05.2019
#### AG Guhr - Universitaet Duisburg-Essen

In [1]:
# Modules

import numpy as np
import os
import pickle

__tau__ = 1000

In [2]:
def taq_trade_signs_all_transactions_data(ticker, year, month, day, model='juan'):
    """
    Obtain the trade signs from the TAQ data. The trade signs are calculated
    using the equation (1) of https://arxiv.org/pdf/1603.01580.pdf.
    As the trades signs are not directly given by the TAQ data, they must be
    infered by the trades prices. For further calculations we use the whole
    time range from the opening of the market at 9h40 to the closing at 15h50
    in seconds (22200 seconds).
        :param ticker: string of the abbreviation of the stock to be analized
         (i.e. 'AAPL')
        :param year: string of the year to be analized (i.e '2016')
        :param month: string of the month to be analized (i.e '07')
        :param day: string of the day to be analized (i.e '07')
    """''

    # Load data

    time_t, ask_t = pickle.load(open(
        '../TAQ_2008/TAQ_py/TAQ_{}_trades_{}{}{}.pickle'
        .format(ticker, year, month, day), 'rb'))

    # Reproducing S. Wang values. In her results the time interval for the
    # trade signs is [34801, 57000]
    if (model == 'juan'):
        condition = time_t != 57000
    elif (model == 'wang'):
        condition = time_t != 34800
        
    time_t = time_t[condition]
    ask_t = ask_t[condition]

    # All the trades must have a price different to zero
    assert not np.sum(ask_t == 0)

    # Trades identified using equation (1)
    identified_trades = np.zeros(len(time_t))
    identified_trades[-1] = 1

    # Implementation of equation (1). Sign of the price change between
    # consecutive trades

    for t_idx, t_val in enumerate(time_t):

        diff = ask_t[t_idx] - ask_t[t_idx - 1]

        if (diff):

            identified_trades[t_idx] = np.sign(diff)

        else:

            identified_trades[t_idx] = identified_trades[t_idx - 1]

    # All the identified trades must be different to zero
    assert not np.sum(identified_trades == 0)

    return (time_t, ask_t, identified_trades)

In [3]:
def taq_midpoint_all_transactions_data(ticker, year, month, day):
    """
    Obtain the midpoint price from the TAQ data for all the transactions.
    For further calculations we use the full time range from 9h40 to 15h50 in
    seconds (22200 seconds).
    Return best bid, best ask, spread, midpoint price and time.
        :param ticker: string of the abbreviation of the stock to be analized
                       (i.e. 'AAPL')sys
        :param year: string of the year to be analized (i.e '2008')
        :param month: string of the month to be analized (i.e '07')
        :param day: string of the day to be analized (i.e '07')
    """

    # Load data
    # TAQ data gives directly the quotes data in every second that there is
    # a change in the quotes
    time_q_, bid_q_, ask_q_ = pickle.load(open(
        '../TAQ_2008/TAQ_py/TAQ_{}_quotes_{}{}{}.pickle'
        .format(ticker, year, month, day), 'rb'))

    # Some files are corrupted, so there are some zero values that
    # does not have sense
    condition_1 = ask_q_ != 0.
    time_q = time_q_[condition_1]
    bid_q = bid_q_[condition_1]
    ask_q = ask_q_[condition_1]
    # Reproducing S. Wang values. In her results the time interval for the
    # midpoint is [34800, 56999]
    condition_2 = time_q != 57000
    time_q = time_q[condition_2]
    bid_q = bid_q[condition_2]
    ask_q = ask_q[condition_2]

    assert len(bid_q) == len(ask_q)

    midpoint = (bid_q + ask_q) / 2
    spread = ask_q - bid_q

    return time_q, bid_q, ask_q, midpoint, spread

In [15]:
ticker_i = 'AAPL'
ticker_j = 'MSFT'
year = '2008'
month = '01'
day = '07'

time_m, _, _, midpoint, _ = taq_midpoint_all_transactions_data(ticker_i, year, month, day)
time_t, _, identified_trades = taq_trade_signs_all_transactions_data(ticker_j, year, month, day)

In [16]:
print(len(midpoint))
print(len(identified_trades))

574108
84124


In [17]:
print(time_m)
print(time_t)

[34800 34800 34800 ... 56999 56999 56999]
[34800 34800 34800 ... 56999 56999 56999]


In [18]:
print(np.sum(time_m == time_t) / len(time_m))

0.0


  """Entry point for launching an IPython kernel.
