# itch_wang_res_comparison

#### Juan Camilo Henao Londono - 02.05.2019
#### AG Guhr - Universitaet Duisburg-Essen

In [1]:
import gzip
import numpy as np
import pandas as pd
import pickle

import itch_taq_trade_sign_classification_test_basic as itch_sign_clas

## Trade sign comparison full data

Check if the original data is the same

In [2]:
time_t, ask_t = pickle.load(open(
        '../TAQ_2008/TAQ_py/TAQ_AAPL_trades_20080102.pickle'
        , 'rb'))

assert not np.sum(ask_t == 0)

In [3]:
# Trades identified using equation (1)
identified_trades = np.zeros(len(time_t))
identified_trades[-1] = 1

In [4]:
ask_t

array([1989900, 1989900, 1990000, ..., 1947400, 1947400, 1947700])

In [5]:
# Implementation of equation (1). Sign of the price change between
# consecutive trades

for t_idx, t_val in enumerate(time_t):

    diff = ask_t[t_idx] - ask_t[t_idx - 1]

    if (diff):

        identified_trades[t_idx] = np.sign(diff)

    else:

        identified_trades[t_idx] = identified_trades[t_idx - 1]

In [6]:
# All the identified trades must be different to zero
assert not np.sum(identified_trades == 0)

In [7]:
d_trade_sign_full = {'Time': time_t, 'Price': ask_t, 'Trade': identified_trades}
trade_sign_juan_full = pd.DataFrame(data=d_trade_sign_full)

In [8]:
trade_sign_wang_full = pd.read_csv('AAPLprices_full_time.txt', sep='   ', header=None, engine='python')
trade_sign_wang_full.columns = ['Time', 'Price', 'Trade']

In [9]:
# Comparison

trade_time_comp_full = np.sum(trade_sign_wang_full['Time'] == trade_sign_juan_full['Time']) / len(trade_sign_wang_full['Time'])
trade_price_comp_full = np.sum(trade_sign_wang_full['Price'] == trade_sign_juan_full['Price']) / len(trade_sign_wang_full['Price'])
trade_trade_comp_full = np.sum(trade_sign_wang_full['Trade'] == trade_sign_juan_full['Trade']) / len(trade_sign_wang_full['Trade'])

In [10]:
print('The similarity of the full time is {:.2f}%'.format(trade_time_comp_full * 100))
print('The similarity of the full prices is {:.2f}%'.format(trade_price_comp_full * 100))
print('The similarity of the full trades is {:.2f}%'.format(trade_trade_comp_full * 100))

The similarity of the full time is 100.00%
The similarity of the full prices is 52.82%
The similarity of the full trades is 62.41%


In [11]:
trade_sign_wang_full

Unnamed: 0,Time,Price,Trade
0,34801,1990100,1
1,34801,1990000,-1
2,34801,1989900,-1
3,34801,1989900,-1
4,34801,1990100,1
5,34801,1990000,-1
6,34802,1990400,1
7,34802,1990400,1
8,34802,1990500,1
9,34802,1990500,1


In [12]:
trade_sign_juan_full

Unnamed: 0,Time,Price,Trade
0,34801,1989900,1.0
1,34801,1989900,1.0
2,34801,1990000,1.0
3,34801,1990000,1.0
4,34801,1990100,1.0
5,34801,1990100,1.0
6,34802,1990100,1.0
7,34802,1990200,1.0
8,34802,1990200,1.0
9,34802,1990200,1.0


## Trade sign comparison only one data per second

Compare the trade sign for the first day of the TAQ data.

In [13]:
full_time = np.array(range(34801, 57001))
trade_signs = 0. * full_time
price_signs = 0. * full_time

# Implementation of equation (2). Trade sign in each second
for t_idx, t_val in enumerate(full_time):

    condition = (time_t >= t_val) \
                * (time_t  < t_val + 1)
    # Experimental
    trades_same_t_exp = identified_trades[condition]
    sign_exp = np.sign(np.sum(trades_same_t_exp))
    trade_signs[t_idx] = sign_exp
    try:
        price_signs[t_idx] = ask_t[condition][-1]
    except IndexError:
        full_time[t_idx] = 0

In [14]:
d_trade_sign_perse = {'Time': full_time, 'Price': price_signs, 'Trade': trade_signs}
trade_sign_juan_perse = pd.DataFrame(data=d_trade_sign_perse)

In [15]:
trade_sign_wang_perse = pd.read_csv('AAPLtrade_signs.txt', sep='   ', usecols=(1,2,3), header=None, engine='python')
trade_sign_wang_perse.columns = ['Time', 'Price', 'Trade']

In [16]:
# Comparison

trade_time_comp_perse = np.sum(trade_sign_wang_perse['Time'] == trade_sign_juan_perse['Time']) / len(trade_sign_wang_perse['Time'])
trade_price_comp_perse = np.sum(trade_sign_wang_perse['Price'] == trade_sign_juan_perse['Price']) / len(trade_sign_wang_perse['Price'])
trade_trade_comp_perse = np.sum(trade_sign_wang_perse['Trade'] == trade_sign_juan_perse['Trade']) / len(trade_sign_wang_perse['Trade'])

In [17]:
print('The similarity of the time per second is {:.2f}%'.format(trade_time_comp_perse * 100))
print('The similarity of the price per second is {:.2f}%'.format(trade_price_comp_perse * 100))
print('The similarity of the trades per second is {:.2f}%'.format(trade_trade_comp_perse * 100))

The similarity of the time per second is 100.00%
The similarity of the price per second is 73.47%
The similarity of the trades per second is 80.31%
