In [4]:
import numpy as np
from matplotlib import pyplot as plt
import gzip

import pickle

%matplotlib inline

In [None]:
def trade_sign_reshape(trade_sign, time_t_step):
    # Reshape the array in group of values of t_step ms and infer the number
    # of rows, then sum all rows.
    trade_sign_j_sec_sum = np.sum(np.reshape(trade_sign, (len(time_t_step), -1)),
                                  axis=1)

    # Reasign the trade sign, if the value of the array is greater than 0
    # gives a 1 and -1 for the contrary.
    trade_sign_j_sec_avg = -1 * (trade_sign_j_sec_sum > 0) \
         +1 * (trade_sign_j_sec_sum < 0)
    # Reshape the array in group of values of t_step ms and infer the number
    # rows, then sum the absolute value of all rows. This is used to know
    # where a trade sign is cero.
    trade_sign_j_sec_nr = np.sum(np.reshape(np.absolute(trade_sign),
                                 (len(time_t_step), -1)), axis=1)
    return (trade_sign_j_sec_avg, trade_sign_j_sec_nr, trade_sign_j_sec_sum)

In [None]:
def cross_response_data(ticker_i, ticker_j, day, tau_val, t_step):
    
    print('Cross response function data')
    print('Processing data for the stock i ' + ticker_i + ' and stock j ' +
          ticker_j + ' the day ' + day + ' March, 2016')
    print('Time step: ', t_step, 'ms')

    # Load data
    midpoint_i = pickle.load(open(
                '../Cross_response_individual_stock/Data/midpoint_data/midpoint_201603{}_{}.pickl'
                .format(day, ticker_i), 'rb'))
    trade_sign_j = pickle.load(open(
                '../Cross_response_individual_stock/Data/trade_signs_data/trade_signs_most_201603{}_{}.pickl'
                .format(day, ticker_j), 'rb'))
    time = pickle.load(open('../Cross_response_individual_stock/Data/midpoint_data/time.pickl', 'rb'))

    # Setting variables to work with t_step ms accuracy
    # Array of the average of each tau. 10^3 s used by Wang
    cross_response_tau = np.zeros(tau_val)

    # Using values each second
    midpoint_i_sec = midpoint_i[::t_step]
    # Changing time from 1 ms to t_step ms
    time_t_step = time[::t_step]
    
    # Trade signs

    trade_sign_j_sec_avg, trade_sign_j_sec_nr, trade_sign_j_sec_sum = trade_sign_reshape(trade_sign_j, time_t_step)

    # Calculating the midpoint log return and the cross response function

    for tau in range(1, tau_val):

        # Every second have a log-return
        #log_return_i_sec = 0. * time_t_step

        # Obtain the midpoint log return. Displace the numerator tau values to
        # the right and compute the return, and append the remaining values of
        # tau with zeros
        log_return_i_sec = np.append(np.log(
            midpoint_i_sec[tau:]/midpoint_i_sec[:-tau]), np.zeros(tau))

        cross_response_tau[tau] = np.mean(
            log_return_i_sec[trade_sign_j_sec_nr != 0] *
            trade_sign_j_sec_avg[trade_sign_j_sec_nr != 0])

    return (cross_response_tau, trade_sign_j_sec_avg, trade_sign_j_sec_nr, trade_sign_j_sec_sum, time, trade_sign_j)

In [2]:
# Using the only two common stocks in both data sets
ticker_i = 'AAPL'
ticker_j = 'AAPL'
day = '08'
tau_val = 1000
t_step = 1000
n = 50

In [None]:
cross_response_tau, trade_sign_j_sec_avg, trade_sign_j_sec_nr, trade_sign_j_sec_sum, time, trade_sign_j = cross_response_data(ticker_i, ticker_j, day, tau_val, t_step)

plt.figure(figsize=(16,9))
plt.semilogx(cross_response_tau, '-g', label='Stock i {} - Stock j {} - Day {}'
                     .format(ticker_i, ticker_j, day))
plt.xlabel(r'Time lag $[\tau]$')
plt.ylabel(r'Cross response $ R_{ij} (\tau) $')
plt.legend(loc='best')
plt.title('Cross response - ticker i {} ticker j {} - {}ms'
          .format(ticker_i, ticker_j, t_step))
plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
plt.grid(True)
plt.tight_layout()

In [None]:
time_t_step = time[:n*t_step] / 3600 / 1000 # time of the original data
time_reshape = time[:n * t_step: t_step] / 3600 / 1000 + 1/7200
trade_sign_orig = trade_sign_j[:n*t_step]
trade_sum = trade_sign_j_sec_sum[:n]
sec = time[:n*t_step:t_step]/ 3600 / 1000
zero = np.zeros(len(time[:n*t_step:t_step]))

In [None]:
plt.figure(figsize=(16, 9))
plt.plot(time_t_step, trade_sign_orig)
plt.plot(time_reshape, trade_sum, '*-')#, markersize=5)
plt.plot(sec,zero , '|')#, markersize=3)
plt.grid(True)
plt.savefig('../../a.svg', format='svg', dpi=1200)

In [6]:
# Using the only two common stocks in both data sets
ticker_i = 'AAPL'
ticker_j = 'AAPL'
ticker = 'AAPL'
day = '08'
tau_val = 1000
t_step = 100
n = 50

In [None]:
cross_response_tau, trade_sign_j_sec_avg, trade_sign_j_sec_nr, trade_sign_j_sec_sum, time, trade_sign_j = cross_response_data(ticker_i, ticker_j, day, tau_val, t_step)

plt.figure(figsize=(16,9))
plt.semilogx(cross_response_tau, '-g', label='Stock i {} - Stock j {} - Day {}'
                     .format(ticker_i, ticker_j, day))
plt.xlabel(r'Time lag $[\tau]$')
plt.ylabel(r'Cross response $ R_{ij} (\tau) $')
plt.legend(loc='best')
plt.title('Cross response - ticker i {} ticker j {} - {}ms'
          .format(ticker_i, ticker_j, t_step))
plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
plt.grid(True)
plt.tight_layout()

In [None]:
time_t_step = time[:n*t_step] / 3600 / 10 # time of the original data
time_reshape = time[:n * t_step: t_step] / 3600 / 10 + 0.001
trade_sign_orig = trade_sign_j[:n*t_step]
trade_sum = trade_sign_j_sec_sum[:n]
sec = time[:n*t_step:t_step]/ 3600 / 10
zero = np.zeros(len(time[:n*t_step:t_step]))

In [None]:
plt.figure(figsize=(16, 9))
plt.plot(time_t_step, trade_sign_orig)
plt.plot(time_reshape, trade_sum, '*-')#, markersize=5)
plt.plot(sec,zero , '|')#, markersize=3)
plt.grid(True)
plt.savefig('../../a.svg', format='svg', dpi=1200)

# Test of the trade signs function generator

In [8]:
# Load data

data = np.genfromtxt(gzip.open('../ITCH_2016/201603{}_{}.csv.gz'
                     .format(day, ticker)),
                     dtype='str', skip_header=1, delimiter=',')

# Lists of times, ids, types, volumes and prices
# List of all the available information available in the data excluding
# the last two columns

# List of order types:
# "B" = 1 - > Add buy order
# "S" = 2 - > Add sell order
# "E" = 3 - > Execute outstanding order in part
# "C" = 4 - > Cancel outstanding order in part
# "F" = 5 - > Execute outstanding order in full
# "D" = 6 - > Delete outstanding order in full
# "X" = 7 - > Bulk volume for the cross event
# "T" = 8 - > Execute non-displayed order
times_ = np.array([int(mytime) for mytime in data[:, 0]])
ids_ = np.array([int(myid) for myid in data[:, 2]])
types_ = np.array([1 * (mytype == 'B') +
                   2 * (mytype == 'S') +
                   3 * (mytype == 'E') +
                   4 * (mytype == 'C') +
                   5 * (mytype == 'F') +
                   6 * (mytype == 'D') +
                   7 * (mytype == 'X') +
                   8 * (mytype == 'T') for mytype in data[:, 3]])

ids = ids_[types_ < 7]
times = times_[types_ < 7]
types = types_[types_ < 7]

In [None]:
# Reference lists
# Reference lists using the original values or the length of the original
# lists

types_ref = 0 * types # Referencia de tipo en cero con len inicial
times_ref = 0 * times # Referencia de tiempo en cero con len inicial
newids = {} # Diccionario vacio, se le van a dar keys con los valores de las identificaciones de las transacciones y
            # values con un contador que va a ir aumentando para recorrer las listas de ayuda.
hv = 0      # Contador que aumenta cada que se encuentra un tipo 1 o 2, recorre las listas de ayuda

# Help lists with the data of the buy orders and sell orders

hv_types = types[types < 3]
hv_times = times[types < 3]

trade_sign = 0 * types

# Fill the reference lists where the values of 'T' are 'E', 'C', 'F', 'D'

# For the data in the length of the ids list (all data)
for iii in range(len(ids)):

    # If the data is a sell or buy order
    if (types[iii] < 3):

        # Insert in the dictionary newids a key with the valor of the id of the order
        # and the value of hv (a counter) that is the index in hv_types and hv_times
        newids[ids[iii]] = hv

        # Increase the value of hv, so the next value can be added
        hv += 1

        trade_sign[iii] = 0 # 'Asegura' que en esos valores de limit orders el trade sign sea cero

    # If the data is not a sell or buy order
    elif (types[iii] == 3 or
            types[iii] == 5): # En este caso las ordenes son de transaccion completa o por partes

        # Fill the values of types_ref with no  prices ('E' or 'F')
        # with the type of the order
        types_ref[iii] = hv_types[newids[ids[iii]]]

        # Fill the values of time_ref with no  prices ('E', 'C', 'F', 'D')
        # with the time of the order
        times_ref[iii] = hv_times[newids[ids[iii]]]

        if (hv_types[newids[ids[iii]]] == 2): # hv_types es una lista con los valores de los tipos de ordenes de compra o
                # venta. Asi se busca el id de la posicion en la que esta el for, ese id se pasa al diccionario como key
                # y da el valor de la posicion de ese elemento en la lista de ayuda, y teniendo en cuenta si el tipo es
                # 1 -> compra o 2 -> venta, se le da un valor al trade sign 

            trade_sign[iii] = 1.

        elif (hv_types[newids[ids[iii]]] == 1):

            trade_sign[iii] = - 1.

    else:

        # Fill the values of types_ref with no  prices ('E', 'C', 'F', 'D')
        # with the type of the order
        types_ref[iii] = hv_types[newids[ids[iii]]]

        # Fill the values of time_ref with no  prices ('E', 'C', 'F', 'D')
        # with the time of the order
        times_ref[iii] = hv_times[newids[ids[iii]]]

        trade_sign[iii] = 0

# Ordering the data in the open market time

# This line behaves as an or.the two arrays must achieve a condition, in
# this case, be in the market trade hours
day_times_ind = (1. * times / 3600 / 1000 > 9.5) * \
                (1. * times / 3600 / 1000 < 16) > 0

trade_signs = trade_sign[day_times_ind]
times_signs = times[day_times_ind]

# Completing the full time entrances

# 34 200 000 ms = 9h30 - 57 600 000 ms = 16h
full_time = np.array(range(34200000, 57600000))

# As there can be several values for the same millisecond, we use the most
# used trade value of each millisecond in the full time array as it
# behaves quiet similar as the original input

count = 0
trade_signs_complete_most = 0. * full_time

for t_idx, t_val in enumerate(full_time):

    most = 0

    if (count < len(times_signs) and t_val == times_signs[count]):

        most += trade_signs[count]

        count += 1

        while (count < len(times_signs) and
                times_signs[count - 1] == times_signs[count]):

            most += trade_signs[count]
            count += 1

        if (most > 0):

            trade_signs_complete_most[t_idx] = 1.

        elif (most < 0):

            trade_signs_complete_most[t_idx] = -1.