# Test to fin the number of identified trades and number of matches

In [1]:
# Modules

import numpy as np
import os
from matplotlib import pyplot as plt
%matplotlib inline

import gzip
import pickle

__tau__ = 1000

In [2]:
price, trades, time = pickle.load(open('trade_classification.pickle', 'rb'))

In [3]:
# trades with values different to zero to obtain the theoretical value
price_no_0 = price[trades!= 0]
trades_no_0 = trades[trades!= 0]
time_no_0 = time[trades!= 0]
time_no_0_set = np.array(list(sorted(set(time_no_0))))

In [4]:
print('Number of identified trades:', len(price_no_0))

Number of identified trades: 120287


In [5]:
identified_trades = np.zeros(len(time_no_0))

count = 0

for t_idx, t_val in enumerate(time_no_0_set):
    
    while (count < len(time_no_0) and time_no_0[count] == t_val):
            
        diff = price_no_0[count] - price_no_0[count - 1]

        if (diff):

            identified_trades[count] = np.sign(diff)
            count += 1

        else:

            identified_trades[count] = identified_trades[count - 1]
            count += 1        

In [6]:
print('Accuracy of the classification:', round(sum(trades_no_0 == identified_trades) / len(trades_no_0) * 100, 2), '%')
print('The number of identified trades is', len(trades_no_0))
print('The number of matches is', sum(trades_no_0 == identified_trades))

Accuracy of the classification: 83.03 %
The number of identified trades is 120287
The number of matches is 99871


In [7]:
trades_exp_ms = np.zeros(len(time_no_0_set))

for t_idx, t_val in enumerate(time_no_0_set):
    
    trades_same_t = identified_trades[time_no_0 == t_val]
    sign = np.sign(np.sum(trades_same_t))
    trades_exp_ms[t_idx] = sign

In [8]:
# Theoric

trades_teo_ms = np.zeros(len(time_no_0_set))

for t_idx, t_val in enumerate(time_no_0_set):
    
    trades_same_t = trades_no_0[time_no_0 == t_val]
    sign = np.sign(np.sum(trades_same_t))
    trades_teo_ms[t_idx] = sign

In [9]:
print('Accuracy of the classification:', round(sum(trades_teo_ms == trades_exp_ms) / len(trades_teo_ms) * 100, 2), '%')
print('The number of identified trades signs is', len(trades_teo_ms))
print('The number of matches is', sum(trades_teo_ms == trades_exp_ms))

Accuracy of the classification: 81.51 %
The number of identified trades signs is 83411
The number of matches is 67988


In [13]:
full_time = np.array(range(34800, 57000))
trades_teo_s_0 = 0. * full_time
trades_exp_s_0 = 0. * full_time

for t_idx, t_val in enumerate(full_time):
    trades_teo_s_0[t_idx] = np.sign(np.sum(trades_teo_ms[(time_no_0_set > t_val * 1000) & (time_no_0_set < (t_val + 1) * 1000)]))
    trades_exp_s_0[t_idx] = np.sign(np.sum(trades_exp_ms[(time_no_0_set > t_val * 1000) & (time_no_0_set < (t_val + 1) * 1000)]))

trades_teo_s = trades_teo_s_0[trades_teo_s_0 != 0]
trades_exp_s = trades_exp_s_0[trades_teo_s_0 != 0]

In [14]:
len(trades_teo_s) == len(trades_exp_s)

True

In [15]:
print('Reducing the trades to 1 per second:')
print('Accuracy of the classification:', round(sum(trades_teo_s == trades_exp_s) / len(trades_teo_s) * 100, 2), '%')
print('Number of identified trades signs:', len(trades_teo_s))
print('Number of matches:', sum(trades_teo_s == trades_exp_s))
print()

Reducing the trades to 1 per second:
Accuracy of the classification: 78.05 %
Number of identified trades signs: 15591
Number of matches: 12169

