In [1]:
import gzip
import pandas as pd
import time
import os
from utils import *
from orderbook import OrderBook
from features import FeatureGenerator

In [2]:
def make_orderbooks(order_log: list):
    """
    Function for making orderbooks for each spectrum
    
    Return orderbooks, dataframe with spectrums and dataframe with VWAPs
    """
    # creating order book for each seccode
    order_books = dict()
    for secc in feature_seccodes:
        order_books[secc] = OrderBook(secc)

    # creating spectrum for each seccode
    spectrums = dict()
    for secc in feature_seccodes:
        spectrums[secc] = FeatureGenerator(seccode=secc, px_step=instruments_info[secc]['PRICE_STEP'])

    start = time.time()

    list_spec = []
    list_vwap = []
    list_bid_ask_spread = []
    
    col_names = ['SECCODE', 'TIMESTAMP', 'BID_ASK']
    feature_names = ['SECCODE', 'TIMESTAMP', 'BID', 'ASK']
    bid_ask_spread_names = ['SECCODE', 'TIMESTAMP', 'SPREAD']

    for row_log in order_log:
        if instruments_info[row_log['SECCODE']]['SCHEDULE'] <= row_log['TIME']:
            continue

        is_ask = row_log['BUYSELL'] == 'S'

        order_book = order_books[row_log['SECCODE']]
        spectrum = spectrums[row_log['SECCODE']]
        correct = False
        
        # CATCHING AGGRESSORS
        if row_log['PRICE'] == 0:
            continue
        if is_ask:
            if spectrum.best_bid > 0 and row_log['PRICE'] >= spectrum.best_bid:
                continue
        else:
            if spectrum.best_ask < int(1e19) and row_log['PRICE'] <= spectrum.best_ask:
                continue

        # handle post
        if row_log['ACTION'] == Action.POST:
            order_book.add_entry(entry=row_log, 
                                 ask=is_ask)
            spectrum.update_post(order_book=order_book, new_price=row_log['PRICE'], 
                                 volume=row_log['VOLUME'], ask=is_ask)
            correct = True

        # handle revoke
        elif row_log['ACTION'] == Action.REVOKE:
            order_book.revoke(orderno=row_log['ORDERNO'], volume=row_log['VOLUME'], 
                              ask=is_ask, row_numb=row_log['NO'])
            if correct:
                spectrum.update_revoke(order_book=order_book, new_price=row_log['PRICE'], 
                                 volume=row_log['VOLUME'], ask=is_ask)

        elif row_log['ACTION'] == Action.MATCH:
            order_book.match(orderno=row_log['ORDERNO'], volume=row_log['VOLUME'], 
                              ask=is_ask, row_numb=row_log['NO'])
            if correct:
                spectrum.update_match(order_book=order_book, new_price=row_log['PRICE'], 
                                 volume=row_log['VOLUME'], ask=is_ask)

        # print(correct)
        # print(order_book.bids, '-----', order_book.asks, '-----', spectrum.best_ask - spectrum.best_bid, '=====', sep='\n')
        
        if correct:
            # для каждой новой row считаем спектрум добавляем в df
            values = spectrum.bids_normalized.copy()
            values.extend(spectrum.asks_normalized.copy())
            d_values = [ row_log['SECCODE'], row_log['TIME'], values ]
            list_spec.append(d_values)

            # для каждой новой row считаем VWAPs и добавляем в df
            vwaps_bids = list(spectrum.VWAP_bids.values()).copy()
            vwaps_asks = list(spectrum.VWAP_asks.values()).copy()
            d_vwaps = [ row_log['SECCODE'], row_log['TIME'], vwaps_bids, vwaps_asks ]
            list_vwap.append(d_vwaps)

            # Add bid-ask spread
            d_bid_ask_spread = [ row_log['SECCODE'], row_log['TIME'], spectrum.bid_ask_spread ]
            list_bid_ask_spread.append(d_bid_ask_spread)
    
    # Saving spectrum
    df_spec = pd.DataFrame(list_spec, columns=col_names)
    # Saving VWAPs
    df_vwap = pd.DataFrame(list_vwap, columns=feature_names)
    # Saving bid-ask spread
    df_bid_ask_spread = pd.DataFrame(list_bid_ask_spread, columns=bid_ask_spread_names)

    end = time.time()

    return order_books, df_spec, df_vwap, df_bid_ask_spread, end - start

## Single Day

In [None]:
# Reading
WORKING_DIR = r"D:\Data\MOEX-FX\2018-03\\"
WORKING_DIR = r'D:\Innopolis University\2021 Spring Semester\Data Mining\data-mining\\'

orderlog_filename = WORKING_DIR + 'OrderLog20180330.txt'

order_log = read_orderlog(orderlog_filename)

# Preprocessing
order_log = filter(order_log, lambda row: row['SECCODE'] in feature_seccodes)
order_log = preprocess_orderlog(order_log)

# Make orderbooks, spectrum, and vwaps
order_books, df_spec, df_vwap, df_spread, exec_time = make_orderbooks(order_log)

In [None]:
def generate_spectrums(moex_dir: str, months: list, save_dir: str) -> None:
    spectrums_dir = os.path.join(save_dir, 'spectrums')
    try:
        os.mkdir(spectrums_dir)
    except Exception:
        print(f"{spectrums_dir} dir already exists. It may be overwritten.")
    
    
    for month in months:
        working_dir = os.path.join(moex_dir, month)
        save_to = os.path.join(spectrums_dir, month)
        try:
            os.mkdir(save_to)
        except Exception:
            print(f"{save_to} dir already exists. It may be overwritten.")
        
        print(f"processing {month}")
        for filename in os.listdir(working_dir):
            # acquiring orderlog
            if 'orderlog' in filename.lower():
                print(f"processing {filename}")
                # reading it
                orderlog_path = os.path.join(working_dir, filename)
                order_log = read_orderlog(orderlog_path)
                # preprocessing
                order_log = filter(order_log, lambda row: row['SECCODE'] in feature_seccodes)
                order_log = preprocess_orderlog(order_log)
                
                # creating order book for each seccode
                order_books = dict()
                for secc in feature_seccodes:
                    order_books[secc] = OrderBook(secc)

                # creating spectrum for each seccode
                spectrums = dict()
                for secc in feature_seccodes:
                    spectrums[secc] = FeatureGenerator(seccode=secc, px_step=instruments_info[secc]['PRICE_STEP'])
                    
                _, df_spec, df_vwap, df_spread, exec_time = make_orderbooks(order_log)
                
                # saving spectrum
                for secc in feature_seccodes:
                    save_name = filename.lower().replace('orderlog', 'tradelog')[:-4] + '_' + secc + '.csv'
                    df_cur = df_spec[df_spec['SECCODE'] == secc].sort_values(by=['TIMESTAMP'])
                    df_cur.to_csv(os.path.join(save_to, save_name))
                    df_cur = None
                    
                    save_name = filename.lower().replace('orderlog', 'vwap')[:-4] + '_' + secc + '.csv'
                    df_cur = df_vwap[df_vwap['SECCODE'] == secc].sort_values(by=['TIMESTAMP'])
                    df_cur.to_csv(os.path.join(save_to, save_name))
                    df_cur = None
                    
                    save_name = filename.lower().replace('orderlog', 'spread')[:-4] + '_' + secc + '.csv'
                    df_cur = df_spread[df_spread['SECCODE'] == secc].sort_values(by=['TIMESTAMP'])
                    df_cur.to_csv(os.path.join(save_to, save_name))
                    df_cur = None
                
                order_log = None
                order_books = None
                spectrums = None
                df_spec = None
                
        print('\n')

# All Files

In [None]:
generate_spectrums(moex_dir=r"D:\Downloads\data", months=['2018-03', '2018-04', '2018-03'], save_dir='.')