# taq_data_extract

#### Juan Camilo Henao Londono - 26.03.2019
#### AG Guhr - Universitaet Duisburg-Essen

In [1]:
# Modules

import numpy as np
import os
import pandas as pd

import pickle

import taq_data_tools

__tau__ = 1000

In [2]:
def taq_data_extract(ticker, year, month, day):
    """
    Extract the trades and quotes (TAQ) data for a day from a CSV file with
    the full information of a year.
        :param ticker: string of the abbreviation of the stock to be analized
                       (i.e. 'AAPL')
        :param year: string of the year to be analized (i.e '2008')
        :param month: string of the month to be analized (i.e '07')
        :param month: string of the day to be analized (i.e '07')
    """
    function_name = taq_data_extract.__name__
    taq_data_tools.taq_function_header_print_data(function_name, ticker,
                                                  ticker, year, month, day)

    # Load data
    date = '{}-{}-{}'.format(year, month, day)
    quotes_filename = '../../TAQ_{1}/Data/{0}_{1}_NASDAQ_quotes.csv' \
                      .format(ticker, year)
    trades_filename = '../../TAQ_{1}/Data/{0}_{1}_NASDAQ_trades.csv' \
                      .format(ticker, year)
    quotes_day_list = []
    trades_day_list = []
    
    with open(quotes_filename) as f_quotes:
        for idx, line in enumerate(f_quotes):
            list_line = line.split()
            if (list_line[0] == date 
                    and list_line[1] >= '34801'
                    and list_line[1] <= '57000'):
                quotes_day_list.append(list_line[:4])
                
    assert len(quotes_day_list) != 0
    
    with open(trades_filename) as f_trades:
        for idx, line in enumerate(f_trades):
            list_line = line.split()
            if (list_line[0] == date 
                    and list_line[1] >= '34801'
                    and list_line[1] <= '57000'):
                trades_day_list.append(list_line[:3])
    
    assert len(trades_day_list) != 0
    
    quotes_df = pd.DataFrame(quotes_day_list, 
                             columns=['Date', 'Time', 'Bid', 'Ask'])
    trades_df = pd.DataFrame(trades_day_list, 
                             columns=['Date', 'Time', 'Ask'])
    
    # Data to arrays
    time_q = np.array(quotes_df['Time']).astype(int)
    bid_q = np.array(quotes_df['Bid']).astype(int)
    ask_q = np.array(quotes_df['Ask']).astype(int)

    time_t = np.array(trades_df['Time']).astype(int)
    ask_t = np.array(trades_df['Ask']).astype(int)
    
    if (not os.path.isdir('../../TAQ_{}/TAQ_py/'.format(year))):

        try:

            os.mkdir('../../TAQ_{}/TAQ_py/'.format(year))
            print('Folder to save data created')

        except FileExistsError:

            print('Folder exists. The folder was not created')

    pickle.dump((time_q, bid_q, ask_q),
                open('../../TAQ_{1}/TAQ_py/TAQ_{0}_quotes_{1}{2}{3}.pickle'
                     .format(ticker, year, month, day), 'wb'))

    pickle.dump((time_t, ask_t),
                open('../../TAQ_{1}/TAQ_py/TAQ_{0}_trades_{1}{2}{3}.pickle'
                     .format(ticker, year, month, day), 'wb'))

    print('Data Saved')
    print()

    return (time_q, bid_q, ask_q, time_t, ask_t)

In [3]:
ticker = 'AAPL'
year = '2008'
month = '01'
day = '02'
taq_data_extract(ticker, year, month, day)

TAQ data
taq_data_extract
Processing data for the stock AAPL the 2008.01.02
Data Saved



(array([34801, 34801, 34801, ..., 57000, 57000, 57000]),
 array([1989600, 1989600, 1989600, ..., 1947300, 1947300, 1947300]),
 array([1990100, 1990100, 1990400, ..., 1947600, 1947600, 1947600]),
 array([34801, 34801, 34801, ..., 57000, 57000, 57000]),
 array([1990100, 1990000, 1989900, ..., 1947400, 1947400, 1947400]))