# TAQ data load

In [1]:
import numpy as np
import pandas as pd
import dask.dataframe as dd
from dask.multiprocessing import get
import pickle

In [2]:
data_quotes = dd.read_csv('../TAQ_2008/Data/APPL_2008_NASDAQ_quotes.csv',
                          usecols=range(4),
                          sep=' ',
                          names=['Date', 'Time', 'Bid', 'Ask'],
                          parse_dates=['Date']).set_index('Date')
    
data_trades = dd.read_csv('../TAQ_2008/Data/APPL_2008_NASDAQ_trades.csv',
                          usecols=range(3),
                          sep=' ',
                          names=['Date', 'Time', 'Ask'],
                          parse_dates=['Date']).set_index('Date')

In [3]:
def get_sec(time_str):
    h, m, s = time_str.split(':')
    return int(h) * 3600 + int(m) * 60 + int(s)
print(get_sec('09:40:00'))
print(get_sec('15:50:00'))

34800
57000


In [4]:
# The data on the week of the 2008.04.07-2008.04.11 will be saved in Pickle files to test S. Wang paper.

In [6]:
dates = ['07', '08', '09', '10', '11']

print('Processing data')

for d in dates:
    
    data_q = data_quotes['2008-04-' + d]
    data_t = data_trades['2008-04-' + d]
    
    data_q['Time'] = data_q['Time'].apply(get_sec, meta='int')
    data_t['Time'] = data_t['Time'].apply(get_sec, meta='int')
    
    data_q = data_q.loc[(data_q['Time'] >= 34800) & (data_q['Time'] <= 57000)]
    data_t = data_t.loc[(data_t['Time'] >= 34800) & (data_t['Time'] <= 57000)]

    print('Saving data 2008-04-' + d)
    
    print('Quotes')
    print('Time, bid and ask')
    pickle.dump((np.array(data_q['Time']), np.array(data_q['Bid']), np.array(data_q['Ask'])),
                open('../TAQ_2008/TAQ_py/TAQ_quotes200804{}.pickl'.format(d), 'wb'))
    print('Trades')
    print('Time and ask')
    pickle.dump((np.array(data_t['Time']), np.array(data_t['Ask'])),
                open('../TAQ_2008/TAQ_py/TAQ_trades_time_200804{}.pickl'.format(d), 'wb'))

Processing data
Saving data 2008-04-07
Quotes
Time, bid and ask
Trades
Time and ask
Saving data 2008-04-08
Quotes
Time, bid and ask
Trades
Time and ask
Saving data 2008-04-09
Quotes
Time, bid and ask
Trades
Time and ask
Saving data 2008-04-10
Quotes
Time, bid and ask
Trades
Time and ask
Saving data 2008-04-11
Quotes
Time, bid and ask
Trades
Time and ask
