In [1]:
import gzip
import pandas as pd
import time
import os
from utils import *
from orderbook import OrderBook
from features import FeatureGenerator

In [2]:
def make_orderbooks(order_log: list):
    """
    Function for making orderbooks for each spectrum
    
    Return orderbooks, dataframe with spectrums and dataframe with VWAPs
    """
    # creating order book for each seccode
    order_books = dict()
    for secc in SECCODES:
        order_books[secc] = OrderBook(secc)

    # creating spectrum for each seccode
    spectrums = dict()
    for secc in SECCODES:
        spectrums[secc] = FeatureGenerator(seccode=secc)

    start = time.time()

    list_spec = []
    list_vwap = []
    col_names = ['SECCODE', 'TIMESTAMP', 'BID_ASK']

    for row_log in order_log:
        if instruments_info[row_log['SECCODE']]['SCHEDULE'] <= row_log['TIME']:
            continue

        is_ask = row_log['BUYSELL'] == 'S'

        order_book = order_books[row_log['SECCODE']]
        spectrum = spectrums[row_log['SECCODE']]

        # handle post
        if row_log['ACTION'] == Action.POST:
            order_book.add_entry(entry=row_log, 
                                 ask=is_ask)
            spectrum.update_post(order_book=order_book, new_price=row_log['PRICE'], 
                                 volume=row_log['VOLUME'], ask=is_ask)

        # handle revoke
        elif row_log['ACTION'] == Action.REVOKE:
            order_book.revoke(orderno=row_log['ORDERNO'], volume=row_log['VOLUME'], 
                              ask=is_ask, row_numb=row_log['NO'])
            spectrum.update_revoke(order_book=order_book, new_price=row_log['PRICE'], 
                                 volume=row_log['VOLUME'], ask=is_ask)

        elif row_log['ACTION'] == Action.MATCH:
            order_book.match(orderno=row_log['ORDERNO'], volume=row_log['VOLUME'], 
                              ask=is_ask, row_numb=row_log['NO'])
            spectrum.update_match(order_book=order_book, new_price=row_log['PRICE'], 
                                 volume=row_log['VOLUME'], ask=is_ask)

        print(order_book.bids, '-----', order_book.asks, '=====', sep='\n')
        # для каждой новой row считаем спектрум добавляем в df
        values = spectrum.bids_normalized.copy()
        values.extend(spectrum.asks_normalized.copy())
        d_values = [ row_log['SECCODE'], row_log['TIME'], values ]
        list_spec.append(d_values)
        
        # для каждой новой row считаем VWAPs и добавляем в df
        vwaps = list(spectrum.VWAP_bids.values()).copy()
        vwaps.extend(list(spectrum.VWAP_asks.values()).copy())
        d_vwaps = [ row_log['SECCODE'], row_log['TIME'], vwaps ]
        list_vwap.append(d_vwaps)

        # Add bid-ask spread
        bid_ask_spread = spectrum.bid_ask_spread
    
    # Saving spectrum
    df_spec = pd.DataFrame(list_spec, columns=col_names)
    # Saving VWAPs
    df_vwap = pd.DataFrame(list_vwap, columns=col_names)

    end = time.time()

    return order_books, df_spec, df_vwap, bid_ask_spread, end - start

In [3]:
# Reading
WORKING_DIR = r"D:\Data\MOEX-FX\2018-03\\"
WORKING_DIR = r'D:\Innopolis University\2021 Spring Semester\Data Mining\data-mining\\'

orderlog_filename = WORKING_DIR + 'OrderLog20180330.txt'

order_log = read_orderlog(orderlog_filename)[:1000]

# Preprocessing
order_log = preprocess_orderlog(order_log)

# Make orderbooks, spectrum, and vwaps
order_books, df_spec, df_vwap, exec_time = make_orderbooks(order_log)
print(exec_time)

{}
-----
{1: {'ORDERNO': 1, 'SECCODE': 'USD000UTSTOM', 'PRICE': 57.61, 'VOLUME': 1000000}}
=====
{}
-----
{2: {'ORDERNO': 2, 'SECCODE': 'USD000000TOD', 'PRICE': 57.595, 'VOLUME': 505000}}
=====
{}
-----
{3: {'ORDERNO': 3, 'SECCODE': 'EUR_RUB__TOM', 'PRICE': 70.975, 'VOLUME': 808000}}
=====
{}
-----
{3: {'ORDERNO': 3, 'SECCODE': 'EUR_RUB__TOM', 'PRICE': 70.975, 'VOLUME': 808000}, 4: {'ORDERNO': 4, 'SECCODE': 'EUR_RUB__TOM', 'PRICE': 70.6525, 'VOLUME': 15000}}
=====
{6: {'ORDERNO': 6, 'SECCODE': 'EUR_RUB__TOM', 'PRICE': 70.47, 'VOLUME': 1000000}}
-----
{3: {'ORDERNO': 3, 'SECCODE': 'EUR_RUB__TOM', 'PRICE': 70.975, 'VOLUME': 808000}, 4: {'ORDERNO': 4, 'SECCODE': 'EUR_RUB__TOM', 'PRICE': 70.6525, 'VOLUME': 15000}}
=====
{7: {'ORDERNO': 7, 'SECCODE': 'USD000UTSTOM', 'PRICE': 57.18, 'VOLUME': 40000}}
-----
{1: {'ORDERNO': 1, 'SECCODE': 'USD000UTSTOM', 'PRICE': 57.61, 'VOLUME': 1000000}}
=====
{7: {'ORDERNO': 7, 'SECCODE': 'USD000UTSTOM', 'PRICE': 57.18, 'VOLUME': 40000}}
-----
{1: {'ORDERNO'

{49: {'ORDERNO': 49, 'SECCODE': 'USD000000TOD', 'PRICE': 57.805, 'VOLUME': 1000}, 77: {'ORDERNO': 77, 'SECCODE': 'USD000000TOD', 'PRICE': 57.88, 'VOLUME': 1000000}, 154: {'ORDERNO': 154, 'SECCODE': 'USD000000TOD', 'PRICE': 57.445, 'VOLUME': 16000}, 171: {'ORDERNO': 171, 'SECCODE': 'USD000000TOD', 'PRICE': 57.58, 'VOLUME': 20000}, 172: {'ORDERNO': 172, 'SECCODE': 'USD000000TOD', 'PRICE': 58.5, 'VOLUME': 1000}, 177: {'ORDERNO': 177, 'SECCODE': 'USD000000TOD', 'PRICE': 57.85, 'VOLUME': 20000}, 179: {'ORDERNO': 179, 'SECCODE': 'USD000000TOD', 'PRICE': 57.48, 'VOLUME': 60000}, 184: {'ORDERNO': 184, 'SECCODE': 'USD000000TOD', 'PRICE': 57.845, 'VOLUME': 505000}, 185: {'ORDERNO': 185, 'SECCODE': 'USD000000TOD', 'PRICE': 57.8425, 'VOLUME': 100000}, 188: {'ORDERNO': 188, 'SECCODE': 'USD000000TOD', 'PRICE': 57.44, 'VOLUME': 50000}}
=====
{54: {'ORDERNO': 54, 'SECCODE': 'USD000000TOD', 'PRICE': 56.6575, 'VOLUME': 1000}, 70: {'ORDERNO': 70, 'SECCODE': 'USD000000TOD', 'PRICE': 57.01, 'VOLUME': 10000

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [15]:
df_vwap.iloc[:50, 2]

0     [0, 0, 0, 0, 0, 0, 57.61, 57.61, 57.61, 57.61,...
1     [0, 0, 0, 0, 0, 0, 57.595, 57.595, 57.595, 57....
2     [0, 0, 0, 0, 0, 0, 70.975, 70.975, 70.975, 70....
3     [0, 0, 0, 0, 0, 0, 70.975, 70.975, 70.975, 70....
4     [70.47, 70.47, 70.47, 70.47, 70.47, 70.47, 70....
5     [57.18, 57.18, 57.18, 57.18, 57.18, 57.18, 57....
6     [57.18, 57.18, 57.18, 57.18, 57.18, 57.18, 57....
7     [57.18, 57.18, 57.18, 57.18, 57.18, 57.18, 57....
8     [57.18, 57.18, 57.18, 57.18, 57.18, 57.18, 57....
9     [57.18, 57.18, 57.18, 57.18, 57.18, 57.18, 57....
10    [57.18, 57.18, 57.18, 57.18, 57.18, 57.18, 57....
11    [57.18, 57.18, 57.18, 57.18, 57.18, 57.18, 57....
12    [57.18, 57.17586776859504, 57.17586776859504, ...
13    [57.18, 57.17689440993789, 57.17689440993789, ...
14    [57.18, 57.17689440993789, 57.17689440993789, ...
15    [70.47, 70.47, 70.47, 70.47, 70.4661012715713,...
16    [57.18, 57.1775, 57.17751243781095, 57.1775124...
17    [70.47, 70.47, 70.47, 70.47, 70.4661012715