# taq_wang_comparison

#### Juan Camilo Henao Londono - 02.05.2019
#### AG Guhr - Universitaet Duisburg-Essen

In [1]:
import gzip
import numpy as np
import pandas as pd
import pickle

import taq_data_tools_basic
import taq_data_analysis_basic

## Trade sign comparison

Compare the S. Wang trade sign data with my values

In [2]:
def trade_sign_comparison(ticker, year, month, day):

    # Load data
    # All transactions data
    time_t, ask_t, identified_trades = taq_data_analysis_basic.taq_trade_signs_all_transactions_data(ticker, year, month, day)
    
    condition = time_t != 34800
    
    d_trade_sign_all = {'Date': '{}-{}-{}'.format(year, month, day), 'Time': time_t[condition],
                         'Price': ask_t[condition], 'Trade': identified_trades[condition]}
    trade_sign_juan_all = pd.DataFrame(data=d_trade_sign_all)
    
    trade_sign_wang_all = pd.read_csv('{}prices_all_transactions.txt'.format(ticker), sep='   ', header=None, engine='python')
    trade_sign_wang_all.columns = ['Date', 'Time', 'Price', 'Trade']
    
    assert len(trade_sign_juan_all) == len(trade_sign_wang_all)
    
    # Full time data
    full_time, price_signs, trade_signs = taq_data_analysis_basic.taq_trade_signs_full_time_data(ticker, year, month, day)
    
    d_trade_sign_perse = {'Time': full_time, 'Price': price_signs, 'Trade': trade_signs}
    trade_sign_juan_perse = pd.DataFrame(data=d_trade_sign_perse)
    
    trade_sign_wang_perse = pd.read_csv('{}trade_signs.txt'.format(ticker), sep='   ', usecols=(1,2,3), header=None, engine='python')
    trade_sign_wang_perse.columns = ['Time', 'Price', 'Trade']
    
    # Comparison

    trade_date_comp_all = np.sum(trade_sign_wang_all['Date'] == trade_sign_juan_all['Date']) / len(trade_sign_wang_all['Date'])
    trade_time_comp_all = np.sum(trade_sign_wang_all['Time'] == trade_sign_juan_all['Time']) / len(trade_sign_wang_all['Time'])
    trade_price_comp_all = np.sum(trade_sign_wang_all['Price'] == trade_sign_juan_all['Price']) / len(trade_sign_wang_all['Price'])
    trade_trade_comp_all = np.sum(trade_sign_wang_all['Trade'] == trade_sign_juan_all['Trade']) / len(trade_sign_wang_all['Trade'])
    print()
    print(ticker)
    print()
    print('Comparison of all the transactions')
    print('The similarity of all the transaction dates is {:.2f}%'.format(trade_date_comp_all * 100))
    print('The similarity of all the transaction times is {:.2f}%'.format(trade_time_comp_all * 100))
    print('The similarity of all the transaction prices is {:.2f}%'.format(trade_price_comp_all * 100))
    print('The similarity of all the transaction trades signs is {:.2f}%'.format(trade_trade_comp_all * 100))
    print()
    
    # Comparison

    trade_time_comp_perse = np.sum(trade_sign_wang_perse['Time'] == trade_sign_juan_perse['Time']) / len(trade_sign_wang_perse['Time'])
    trade_price_comp_perse = np.sum(trade_sign_wang_perse['Price'] == trade_sign_juan_perse['Price']) / len(trade_sign_wang_perse['Price'])
    trade_trade_comp_perse = np.sum(trade_sign_wang_perse['Trade'] == trade_sign_juan_perse['Trade']) / len(trade_sign_wang_perse['Trade'])
    
    print('Comparison of the full time values')
    print('The similarity of the time per second is {:.2f}%'.format(trade_time_comp_perse * 100))
    print('The similarity of the price per second is {:.2f}%'.format(trade_price_comp_perse * 100))
    print('The similarity of the trades per second is {:.2f}%'.format(trade_trade_comp_perse * 100))
    print()
    
    return None

In [3]:
tickers = ['AAPL', 'MSFT']
year = '2008'
month = '01'
day = '02'

for ticker in tickers:
    trade_sign_comparison(ticker, year, month, day)

TAQ data
taq_trade_signs_all_transactions_data
Processing data for the stock AAPL the 2008.01.02
TAQ data
taq_trade_signs_full_time_data
Processing data for the stock AAPL the 2008.01.02
TAQ data
taq_trade_signs_all_transactions_data
Processing data for the stock AAPL the 2008.01.02

AAPL

Comparison of all the transactions
The similarity of all the transaction dates is 100.00%
The similarity of all the transaction times is 100.00%
The similarity of all the transaction prices is 100.00%
The similarity of all the transaction trades signs is 100.00%

Comparison of the full time values
The similarity of the time per second is 100.00%
The similarity of the price per second is 100.00%
The similarity of the trades per second is 100.00%

TAQ data
taq_trade_signs_all_transactions_data
Processing data for the stock MSFT the 2008.01.02
TAQ data
taq_trade_signs_full_time_data
Processing data for the stock MSFT the 2008.01.02
TAQ data
taq_trade_signs_all_transactions_data
Processing data for the s

## Midpoint price comparison

Compare the S. Wang midpoint price data with my values

In [4]:
def midpoint_comparison(ticker, year, month, day):

    # Load data
    # All transactions data
    time_q, bid_q, ask_q, midpoint, spread = taq_data_analysis_basic.taq_midpoint_all_transactions_data(ticker, year, month, day)

    condition = time_q != 57000

    d_midpoint_all = {'Date': '{}-{}-{}'.format(year, month, day), 'Time': time_q[condition],
                         'Bid': bid_q[condition], 'Ask': ask_q[condition], 'Midpoint': midpoint[condition], 'Spread': spread[condition]}
    midpoint_juan_all = pd.DataFrame(data=d_midpoint_all)

    midpoint_wang_all = pd.read_csv('{}midpoint_all_transactions.txt'.format(ticker), sep='   ', header=None, engine='python')
    midpoint_wang_all.columns = ['Date', 'Time', 'Bid', 'Ask', 'Midpoint', 'Spread']

    assert len(midpoint_juan_all) == len(midpoint_wang_all)

    # Full time data
    midpoint_juan = taq_data_analysis_basic.taq_midpoint_full_time_data(ticker, year, month, day)
    d_midpoint_perse = {'Midpoint': midpoint_juan}
    midpoint_juan_perse = pd.DataFrame(data=d_midpoint_perse)

    midpoint_wang_perse = pd.read_csv('{}midpoint.txt'.format(ticker), sep='   ', usecols=[2], header=None, engine='python')
    midpoint_wang_perse.columns = ['Midpoint']
    
    print(len(midpoint_juan_perse))
    print(len(midpoint_wang_perse))
    
    assert len(midpoint_juan_perse) == len(midpoint_wang_perse)

    # Comparison transactions

    midpoint_date_comp_all = np.sum(midpoint_wang_all['Date'] == midpoint_juan_all['Date']) / len(midpoint_wang_all['Date'])
    midpoint_time_comp_all = np.sum(midpoint_wang_all['Time'] == midpoint_juan_all['Time']) / len(midpoint_wang_all['Time'])
    midpoint_bid_comp_all = np.sum(midpoint_wang_all['Bid'] == midpoint_juan_all['Bid']) / len(midpoint_wang_all['Bid'])
    midpoint_ask_comp_all = np.sum(midpoint_wang_all['Ask'] == midpoint_juan_all['Ask']) / len(midpoint_wang_all['Ask'])
    midpoint_midpoint_comp_all = np.sum(midpoint_wang_all['Midpoint'] == midpoint_juan_all['Midpoint']) / len(midpoint_wang_all['Midpoint'])
    midpoint_spread_comp_all = np.sum(midpoint_wang_all['Spread'] == midpoint_juan_all['Spread']) / len(midpoint_wang_all['Spread'])

    print()
    print(ticker)
    print()
    print('Comparison of all the transactions')
    print('The similarity of all the transaction dates is {:.2f}%'.format(midpoint_date_comp_all * 100))
    print('The similarity of all the transaction times is {:.2f}%'.format(midpoint_time_comp_all * 100))
    print('The similarity of all the transaction bids is {:.2f}%'.format(midpoint_bid_comp_all * 100))
    print('The similarity of all the transaction asks is {:.2f}%'.format(midpoint_ask_comp_all * 100))
    print('The similarity of all the transaction midpoints is {:.2f}%'.format(midpoint_midpoint_comp_all * 100))
    print('The similarity of all the transaction spreads is {:.2f}%'.format(midpoint_spread_comp_all * 100))
    print()

    # Comparison full time
    midpoint_midpoint_comp_perse = np.sum(midpoint_wang_perse['Midpoint'] == midpoint_juan_perse['Midpoint']) / len(midpoint_wang_perse['Midpoint'])

    print('Comparison of the full time values')
    print('The similarity of midpoints is {:.2f}%'.format(midpoint_midpoint_comp_perse * 100))
    print()

In [5]:
tickers = ['AAPL', 'MSFT']
year = '2008'
month = '01'
day = '02'

for ticker in tickers:
    midpoint_comparison(ticker, year, month, day)

TAQ data
taq_midpoint_all_transactions_data
Processing data for the stock AAPL the 2008.01.02
TAQ data
taq_midpoint_full_time_data
Processing data for the stock AAPL the 2008.01.02
TAQ data
taq_midpoint_all_transactions_data
Processing data for the stock AAPL the 2008.01.02
22200
22200

AAPL

Comparison of all the transactions
The similarity of all the transaction dates is 100.00%
The similarity of all the transaction times is 100.00%
The similarity of all the transaction bids is 100.00%
The similarity of all the transaction asks is 100.00%
The similarity of all the transaction midpoints is 100.00%
The similarity of all the transaction spreads is 100.00%

Comparison of the full time values
The similarity of midpoints is 100.00%

TAQ data
taq_midpoint_all_transactions_data
Processing data for the stock MSFT the 2008.01.02
TAQ data
taq_midpoint_full_time_data
Processing data for the stock MSFT the 2008.01.02
TAQ data
taq_midpoint_all_transactions_data
Processing data for the stock MSFT t