# Classification of trades signs

In [1]:
# Import modules

from matplotlib import pyplot as plt
import numpy as np
import gzip
import pickle
import multiprocessing
from itertools import product
import datetime

%matplotlib inline

### List of order types: 

* "B" = 1 -- Add buy order
* "S" = 2 -- Add sell order
* "E" = 3 -- Execute outstanding order in part
* "C" = 4 -- Cancel outstanding order in part
* "F" = 5 -- Execute outstanding order in full
* "D" = 6 -- Delete outstanding order in full
* "X" = 7 -- Bulk volume for the cross event
* "T" = 8 -- Execute non-displayed order

In [2]:
# List of stocks and days

tickers=["AAL", "AAPL","ADBE","ADI", "ADP", "ADSK","AKAM","ALXN","AMAT","AMGN",
         "AMZN","ATVI","AVGO","BBBY","BIDU","BIIB","BMRN","CA",  "CELG","CERN",
        "CHKP","CHRW","CHTR","CMCSA","COST","CSCO","CTSH","CTXS","DISCA","DISH",
         "DLTR","EA",  "EBAY","EQIX","ESRX","EXPD","FAST","FB",  "FISV","FOXA",
         "GILD","GOOG","GRMN","HSIC","ILMN","INTC","INTU","ISRG","JD",  "KHC",
         "KLAC","LBTYA","LLTC","LMCA","LRCX","LVNTA","MAR","MAT","MDLZ","MNST",
         "MSFT","MU",  "MYL", "NFLX","NTAP","NVDA","NXPI","ORLY","PAYX","PCAR",
         "PCLN","QCOM","REGN","ROST","SBAC","SBUX","SIRI","SNDK","SPLS","SRCL",
         "STX", "SYMC","TRIP","TSCO","TSLA","TXN", "VIAB","VIP", "VOD", "VRSK",
         "VRTX","WDC", "WFM", "WYNN","XLNX","YHOO"]

days=['07','08','09','10','11']

In [9]:
def trade_signs(ticker, day):
    '''
    Obtain the trade signs from the ITCH 2016 data. For further calculations we use the whole time range from the 
    opening of the market at 9h30 to the closing at 16h in milliseconds and then convert the values to hours (23.4
    million data). To fill the time spaces when nothing happens we just fill with
    zeros indicating that there were neither a buy nor a sell. Save in a pickle file the array of the trade signs
    
    ticker -- String of the abbreviation of the stock to be analized (i.e. 'AAPL')
    day -- String of the day to be analized (i.e '07')
    
    return None
    '''
    
# -----------------------------------------------------------------------------------------------------------------------        
    
    print('Trade signs data')
    print('Processing data for the stock', ticker, 'the day', day + ' March, 2016')
    
    # Load data

    data = np.genfromtxt(gzip.open('../../ITCH_2016/201603%s_%s.csv.gz' % (day,ticker)), 
                      dtype='str', skip_header = 1, delimiter = ',')
    
# -----------------------------------------------------------------------------------------------------------------------        

    # Lists of times, ids, types, volumes and prices
    # List of all the available information available in the data excluding the last two columns

    times_ = np.array([int(mytime) for mytime in data[:,0]])
    ids_ = np.array([int(myid) for myid in data[:,2]])             # List of order types: 
    types_ = np.array([1 * (mytype == 'B') +                       # "B" = 1 -> Add buy order
                    2 * (mytype == 'S') +                          # "S" = 2 -> Add sell order
                    3 * (mytype == 'E') +                          # "E" = 3 -> Execute outstanding order in part
                    4 * (mytype == 'C') +                          # "C" = 4 -> Cancel outstanding order in part
                    5 * (mytype == 'F') +                          # "F" = 5 -> Execute outstanding order in full
                    6 * (mytype == 'D') +                          # "D" = 6 -> Delete outstanding order in full
                    7 * (mytype == 'X') +                          # "X" = 7 -> Bulk volume for the cross event
                    8 * (mytype == 'T') for mytype in data[:,3]])  # "T" = 8 -> Execute non-displayed order
    volumes_ = np.array([int(myvolume) for myvolume in data[:,4]])
    prices_ = np.array([int(myprice) for myprice in data[:,5]])

    ids = ids_[types_<7]
    times = times_[types_<7]
    types = types_[types_<7]

# -----------------------------------------------------------------------------------------------------------------------        

    # Reference lists
    # Reference lists using the original values or the length of the original lists 

    types_ref = 0 * types
    times_ref = 0 * times
    index_ref= 0 * types
    newids = {}
    insertnr = {}
    hv = 0

    # Help lists with the data of the buy orders and sell orders

    hv_types = types[types < 3]
    hv_times = times[types < 3]

    trade_sign = 0 * types

# -----------------------------------------------------------------------------------------------------------------------        

    # Fill the reference lists where the values of 'T' are 'E','C','F','D'

    for iii in range(len(ids)):                             # For the data in the length of the ids list (all data)

        if (types[iii] < 3):                                # If the data is a sell or buy order

            newids[ids[iii]] = hv                           # Insert in the dictionary newids a key with the valor of the id
                                                            # and the value of hv (a counter) that is the index in hv_types
            hv += 1                                         # Increase the value of hv

            trade_sign[iii] = 0

        elif (types[iii] == 3 or
                types[iii] == 5):                                            # If the data is not a sell or buy order

            types_ref[iii] = hv_types[newids[ids[iii]]]     # Fill the values of types_ref with no  prices ('E','C','F','D') 
                                                            # with the type of the order
            times_ref[iii] = hv_times[newids[ids[iii]]]     # Fill the values of time_ref with no  prices ('E','C','F','D') 
                                                            # with the time of the order

            if (hv_types[newids[ids[iii]]] == 2):

                trade_sign[iii] = -1

            elif (hv_types[newids[ids[iii]]] == 1):

                trade_sign[iii] = 1

        else:

            types_ref[iii] = hv_types[newids[ids[iii]]]     # Fill the values of types_ref with no  prices ('E','C','F','D') 
                                                            # with the type of the order
            times_ref[iii] = hv_times[newids[ids[iii]]]     # Fill the values of time_ref with no  prices ('E','C','F','D') 
                                                            # with the time of the order

            trade_sign[iii] = 0

# -----------------------------------------------------------------------------------------------------------------------        

    # Ordering the data in the open market time 

    day_times_ind = (1. * times / 3600 / 1000 > 9.5) * (1. * times / 3600 / 1000 < 16) > 0 # This line behaves as an or.
                                                               # The two arrays must achieve a condition, in this case, be
                                                               # in the market trade hours
    trade_signs = trade_sign[day_times_ind]
    times_signs = times[day_times_ind] / 3600 / 1000

# -----------------------------------------------------------------------------------------------------------------------        

    # Saving data
    
    pickle.dump(trade_signs, open('../Data/trade_signs_201603%s_%s.pickl' % (day,ticker), 'wb'))
    pickle.dump(trade_sign, open('../Data/trade_signs_test%s_%s.pickl' % (day,ticker), 'wb'))
    pickle.dump(times, open('../Data/trade_signs_time_test%s_%s.pickl' % (day,ticker), 'wb'))

In [10]:
trade_signs('AAPL', '07')

Trade signs data
Processing data for the stock AAPL the day 07 March, 2016


In [4]:
with multiprocessing.Pool(processes = 8) as pool:
    pool.starmap(trade_signs, product(tickers, days))
    
pool.close()
pool.join()

Processing data for the stock AKAM the day 07 March, 2016
Processing data for the stock AAL the day 07 March, 2016
Processing data for the stock BIIB the day 07 March, 2016
Processing data for the stock CELG the day 07 March, 2016
Processing data for the stock AMGN the day 07 March, 2016
Processing data for the stock ADI the day 07 March, 2016
Processing data for the stock AVGO the day 07 March, 2016
Processing data for the stock CHRW the day 07 March, 2016
Processing data for the stock BIIB the day 08 March, 2016
Processing data for the stock ADI the day 08 March, 2016
Processing data for the stock CHRW the day 08 March, 2016
Processing data for the stock AKAM the day 08 March, 2016
Processing data for the stock AMGN the day 08 March, 2016
Processing data for the stock CELG the day 08 March, 2016
Processing data for the stock BIIB the day 09 March, 2016
Processing data for the stock CHRW the day 09 March, 2016
Processing data for the stock AVGO the day 08 March, 2016
Processing data f

Processing data for the stock DISCA the day 09 March, 2016
Processing data for the stock ESRX the day 08 March, 2016
Processing data for the stock EA the day 07 March, 2016
Processing data for the stock FB the day 07 March, 2016
Processing data for the stock CSCO the day 10 March, 2016
Processing data for the stock FOXA the day 07 March, 2016
Processing data for the stock DISCA the day 10 March, 2016
Processing data for the stock EA the day 08 March, 2016
Processing data for the stock ESRX the day 09 March, 2016
Processing data for the stock AAPL the day 09 March, 2016
Processing data for the stock FOXA the day 08 March, 2016
Processing data for the stock CMCSA the day 11 March, 2016
Processing data for the stock EA the day 09 March, 2016
Processing data for the stock ESRX the day 10 March, 2016
Processing data for the stock DISCA the day 11 March, 2016
Processing data for the stock CSCO the day 11 March, 2016
Processing data for the stock EA the day 10 March, 2016
Processing data for 

Processing data for the stock MDLZ the day 10 March, 2016
Processing data for the stock ISRG the day 10 March, 2016
Processing data for the stock ADBE the day 10 March, 2016
Processing data for the stock ISRG the day 11 March, 2016
Processing data for the stock GOOG the day 10 March, 2016
Processing data for the stock NXPI the day 07 March, 2016
Processing data for the stock LMCA the day 08 March, 2016
Processing data for the stock KLAC the day 11 March, 2016
Processing data for the stock NXPI the day 08 March, 2016
Processing data for the stock LMCA the day 09 March, 2016
Processing data for the stock ADBE the day 11 March, 2016
Processing data for the stock PCAR the day 07 March, 2016
Processing data for the stock MDLZ the day 11 March, 2016
Processing data for the stock LMCA the day 10 March, 2016
Processing data for the stock NFLX the day 08 March, 2016
Processing data for the stock GOOG the day 11 March, 2016
Processing data for the stock REGN the day 07 March, 2016
Processing dat

Processing data for the stock WDC the day 08 March, 2016
Processing data for the stock WYNN the day 11 March, 2016
Processing data for the stock VOD the day 10 March, 2016
Processing data for the stock TXN the day 10 March, 2016
Processing data for the stock XLNX the day 07 March, 2016
Processing data for the stock WDC the day 09 March, 2016
Processing data for the stock NVDA the day 10 March, 2016
Processing data for the stock XLNX the day 08 March, 2016
Processing data for the stock WDC the day 10 March, 2016
Processing data for the stock MU the day 07 March, 2016
Processing data for the stock TXN the day 11 March, 2016
Processing data for the stock XLNX the day 09 March, 2016
Processing data for the stock VOD the day 11 March, 2016
Processing data for the stock XLNX the day 10 March, 2016
Processing data for the stock NVDA the day 11 March, 2016
Processing data for the stock SIRI the day 09 March, 2016
Processing data for the stock WDC the day 11 March, 2016
Processing data for the 