In [1]:
import gzip
import pandas as pd
import time
import os
from utils import *
from orderbook import OrderBook
from features import FeatureGenerator

## Create OrderBook and Spectrums

In [2]:
def make_orderbooks(order_log: list):
    """
    Function for making orderbooks for each spectrum
    
    Return orderbooks, dataframe with spectrums and dataframe with VWAPs
    """
    # creating order book for each seccode
    order_books = dict()
    for secc in SECCODES:
        order_books[secc] = OrderBook(secc)

    # creating spectrum for each seccode
    spectrums = dict()
    for secc in feature_seccodes:
        spectrums[secc] = FeatureGenerator(seccode=secc, px_step=instruments_info[secc]['PRICE_STEP'])

    start = time.time()

    list_spec = []
    list_vwap = []
    col_names = ['SECCODE', 'TIMESTAMP', 'BID_ASK']

    for row_log in order_log:
        if instruments_info[row_log['SECCODE']]['SCHEDULE'] <= row_log['TIME']:
            continue

        is_ask = row_log['BUYSELL'] == 'S'

        order_book = order_books[row_log['SECCODE']]
        spectrum = spectrums[row_log['SECCODE']]

        # handle post
        if row_log['ACTION'] == Action.POST:
            order_book.add_entry(entry=row_log, 
                                 ask=is_ask)
            spectrum.update_post(order_book=order_book, new_price=row_log['PRICE'], 
                                 volume=row_log['VOLUME'], ask=is_ask)

        # handle revoke
        elif row_log['ACTION'] == Action.REVOKE:
            order_book.revoke(orderno=row_log['ORDERNO'], volume=row_log['VOLUME'], 
                              ask=is_ask, row_numb=row_log['NO'])
            spectrum.update_revoke(order_book=order_book, new_price=row_log['PRICE'], 
                                 volume=row_log['VOLUME'], ask=is_ask)

        elif row_log['ACTION'] == Action.MATCH:
            order_book.match(orderno=row_log['ORDERNO'], volume=row_log['VOLUME'], 
                              ask=is_ask, row_numb=row_log['NO'])
            spectrum.update_match(order_book=order_book, new_price=row_log['PRICE'], 
                                 volume=row_log['VOLUME'], ask=is_ask)

        # print(order_book.bids, '-----', order_book.asks, '=====', sep='\n')
        
        # для каждой новой row считаем спектрум добавляем в df
        values = spectrum.bids_normalized.copy()
        values.extend(spectrum.asks_normalized.copy())
        d_values = [ row_log['SECCODE'], row_log['TIME'], values ]
        list_spec.append(d_values)
        
        # для каждой новой row считаем VWAPs и добавляем в df
        vwaps = list(spectrum.VWAP_bids_normalized.values()).copy()
        vwaps.extend(list(spectrum.VWAP_asks_normalized.values()).copy())
        d_vwaps = [ row_log['SECCODE'], row_log['TIME'], vwaps ]
        list_vwap.append(d_vwaps)
    
    # Saving spectrum
    df_spec = pd.DataFrame(list_spec, columns=col_names)
    # Saving VWAPs
    df_vwap = pd.DataFrame(list_vwap, columns=col_names)

    end = time.time()

    return order_books, df_spec, df_vwap, end - start

## Single File

In [3]:
# Reading
WORKING_DIR = r"D:\Data\MOEX-FX\2018-03\\"
WORKING_DIR = r'D:\Innopolis University\2021 Spring Semester\Data Mining\data-mining\\'

orderlog_filename = WORKING_DIR + 'OrderLog20180330.txt'

order_log = read_orderlog(orderlog_filename)

# Preprocessing
order_log = filter(order_log, lambda row: row['SECCODE'] in feature_seccodes)
order_log = preprocess_orderlog(order_log)

# Make orderbooks, spectrum, and vwaps
order_books, df_spec, df_vwap, exec_time = make_orderbooks(order_log)