# taq_data_extract

#### Juan Camilo Henao Londono - 26.03.2019
#### AG Guhr - Universitaet Duisburg-Essen

In [1]:
# Modules

import numpy as np
import os

import pickle
import dask.dataframe as dd

import taq_data_tools

__tau__ = 1000

In [2]:
def taq_data_extract(ticker, year, month):
    """
    Extract the trades and quotes (TAQ) data for a month from a CSV file with
    the full information of a year.
        :param ticker: string of the abbreviation of the stock to be analized
                       (i.e. 'AAPL')
        :param year: string of the year to be analized (i.e '2008')
        :param month: string of the month to be analized (i.e '07')
    """
    function_name = taq_data_extract.__name__
    taq_data_tools.taq_function_header_print_data(function_name, ticker,
                                                  ticker, year, month, '-')

    # Load data
    data_quotes = dd.read_csv('../../TAQ_{1}/Data/{0}_{1}_NASDAQ_quotes.csv'
                              .format(ticker, year),
                              usecols=range(4),
                              sep=' ',
                              names=['Date', 'Time', 'Bid', 'Ask'],
                              parse_dates=['Date']).set_index('Date')
    data_quotes = data_quotes['{}-{}'.format(year, month)]
    data_quotes = data_quotes.compute()

    data_trades = dd.read_csv('../../TAQ_{1}/Data/{0}_{1}_NASDAQ_trades.csv'
                              .format(ticker, year),
                              usecols=range(3),
                              sep=' ',
                              names=['Date', 'Time', 'Ask'],
                              parse_dates=['Date']).set_index('Date')
    data_trades = data_trades['{}-{}'.format(year, month)]
    data_trades = data_trades.compute()

    return (data_quotes, data_trades)

In [3]:
ticker = 'AAPL'
year = '2008'
month = '01'
taq_data_extract(ticker, year, month)

TAQ data
taq_data_extract
Processing data for the stock AAPL the 2008.01.-


(                Time     Bid      Ask
 Date                                 
 2008-01-02  07:00:07    0.01     0.00
 2008-01-02  07:00:07    0.01  2000.00
 2008-01-02  07:01:38  199.09  2000.00
 2008-01-02  07:02:00  199.09   471.00
 2008-01-02  07:02:09  199.70   471.00
 2008-01-02  07:02:09  199.70   199.96
 2008-01-02  07:03:03  199.70   199.96
 2008-01-02  07:03:40  199.70   199.96
 2008-01-02  07:03:40  199.70   199.96
 2008-01-02  07:04:02  199.09   199.96
 2008-01-02  07:04:22  199.09   199.96
 2008-01-02  07:04:22  199.09   199.96
 2008-01-02  07:04:23  199.09   199.96
 2008-01-02  07:04:23  199.09   199.91
 2008-01-02  07:04:30  199.09   199.90
 2008-01-02  07:04:37  199.10   199.90
 2008-01-02  07:05:28  199.10   199.91
 2008-01-02  07:05:28  199.10   199.91
 2008-01-02  07:06:01  199.67   199.91
 2008-01-02  07:06:14  199.67   199.91
 2008-01-02  07:08:22  199.10   199.91
 2008-01-02  07:08:22  199.50   199.91
 2008-01-02  07:10:08  199.61   199.91
 2008-01-02  07:10:16  19