In [1]:
import csv
import gzip
import math
import numpy as np
import pandas as pd
import time

In [2]:
# Logs

order_log = []
trade_log = []

class Action:
    REVOKE = 0
    POST = 1
    MATCH = 2

SECCODES = ['USD000000TOD', 'USD000UTSTOM', 'EUR_RUB__TOD', 'EUR_RUB__TOM', 'EURUSD000TOD', 'EURUSD000TOM']

instruments_info = {'USD000000TOD': {'SCHEDULE': 174500000000, 'PRICE_STEP': 0.0025, 'INDEX':0},
                    'USD000UTSTOM': {'SCHEDULE': 235000000000, 'PRICE_STEP': 0.0025, 'INDEX':1},
                    'EUR_RUB__TOD': {'SCHEDULE': 150000000000, 'PRICE_STEP': 0.0025, 'INDEX':2},
                    'EUR_RUB__TOM': {'SCHEDULE': 235000000000, 'PRICE_STEP': 0.0025, 'INDEX':3},
                    'EURUSD000TOM': {'SCHEDULE': 235000000000, 'PRICE_STEP': 0.00001, 'INDEX':4},
                    'EURUSD000TOD': {'SCHEDULE': 150000000000, 'PRICE_STEP': 0.00001, 'INDEX':5}}

In [3]:
# Reading
WORKING_DIR = r"D:\Data\MOEX-FX\2018-03\\"

orderlog_filename = WORKING_DIR + 'OrderLog20180301.txt'
tradelog_filename = WORKING_DIR + 'TradeLog20180301.txt'



# # OrderLog = "OrderLog20180301"
# with gzip.open(WORKING_DIR+'OrderLog20180302.txt.gz', 'rb') as f:
#     reader = csv.DictReader(f)
#     for row in reader:
#         order_log.append(row)

# with gzip.open(WORKING_DIR+'TradeLog20180302.txt.gz', 'rb') as f:
#     reader = csv.DictReader(f)
#     for row in reader:
#         trade_log.append(row)


# Order log
reader = csv.DictReader(open(orderlog_filename))
for row in reader:
    order_log.append(row)

# Trade log
# reader = csv.DictReader(open(tradelog_filename))
# for row in reader:
#     trade_log.append(row)

In [4]:
order_log[:3]

[{'NO': '1',
  'SECCODE': 'EUR_RUB__TOD',
  'BUYSELL': 'B',
  'TIME': '100000050299',
  'ORDERNO': '1',
  'ACTION': '1',
  'PRICE': '68.61',
  'VOLUME': '1000000',
  'TRADENO': '',
  'TRADEPRICE': ''},
 {'NO': '2',
  'SECCODE': 'USD000000TOD',
  'BUYSELL': 'S',
  'TIME': '100000050316',
  'ORDERNO': '2',
  'ACTION': '1',
  'PRICE': '56.6',
  'VOLUME': '1000000',
  'TRADENO': '',
  'TRADEPRICE': ''},
 {'NO': '3',
  'SECCODE': 'EUR_RUB__TOD',
  'BUYSELL': 'S',
  'TIME': '100000050325',
  'ORDERNO': '3',
  'ACTION': '1',
  'PRICE': '69.065',
  'VOLUME': '1000000',
  'TRADENO': '',
  'TRADEPRICE': ''}]

In [5]:
len(order_log)

6619412

In [6]:
# trade_log[:3]

In [7]:
def reformat_orderlog(log):
    """
    Change the column types (in place)
    """
    types_dict = {
        'NO'        : int,
        'SECCODE'   : lambda x: x,
        'BUYSELL'   : lambda x: x,
        'TIME'      : int,
        'ORDERNO'   : int,
        'ACTION'    : int,
        'PRICE'     : float,
        'VOLUME'    : int,
        'TRADENO'   : lambda x: float(x) if x != '' else np.nan,
        'TRADEPRICE': lambda x: float(x) if x != '' else np.nan
    }
    
    for row in log:
        for col in row:
            row[col] = types_dict[col](row[col])


def reformat_tradelog(log):
    """
    Change the column types (in place)
    """
    types_dict = {
        'TRADENO'    : int,
        'SECCODE'    : lambda x: x,
        'TIME'       : int,
        'BUYORDERNO' : int,
        'SELLORDERNO': int,
        'PRICE'      : float,
        'VOLUME'     : int
    }
    
    for row in log:
        for col in row:
            row[col] = types_dict[col](row[col])


# Reformat order log
start = time.time()

reformat_orderlog(order_log)
# reformat_tradelog(trade_log)

end = time.time()
print(end - start)

17.76948857307434


In [8]:
order_log[:3]

[{'NO': 1,
  'SECCODE': 'EUR_RUB__TOD',
  'BUYSELL': 'B',
  'TIME': 100000050299,
  'ORDERNO': 1,
  'ACTION': 1,
  'PRICE': 68.61,
  'VOLUME': 1000000,
  'TRADENO': nan,
  'TRADEPRICE': nan},
 {'NO': 2,
  'SECCODE': 'USD000000TOD',
  'BUYSELL': 'S',
  'TIME': 100000050316,
  'ORDERNO': 2,
  'ACTION': 1,
  'PRICE': 56.6,
  'VOLUME': 1000000,
  'TRADENO': nan,
  'TRADEPRICE': nan},
 {'NO': 3,
  'SECCODE': 'EUR_RUB__TOD',
  'BUYSELL': 'S',
  'TIME': 100000050325,
  'ORDERNO': 3,
  'ACTION': 1,
  'PRICE': 69.065,
  'VOLUME': 1000000,
  'TRADENO': nan,
  'TRADEPRICE': nan}]

In [9]:
# trade_log[:3]

In [10]:
def filter(df, predicate):
    """
    Filter out rows that satisfy a predicate
    """
    if not df:
        return []
    
    return [ row for row in df if predicate(row) ]


# Filtering orders
order_log = filter(order_log, lambda row:
                      row['SECCODE'] in SECCODES and
                      row['TIME'] < 2350 * 1E8)

# dic_trade = filter(trade_log, lambda row:
#                       row['SECCODE'] in SECCODES);


# Sort actions by the following order: post, match, revoke
prep_dic = { Action.POST: 0, Action.MATCH: 1, Action.REVOKE: 2 }
unprep_dic = { v: k for k, v in prep_dic.items() } # Inverse `prep_dic`


def apply(df, f):
    """
    Apply function to df
    """
    return [ f(row) for row in df ]


def sort(df, cols):
    """
    Sort df by columns
    """ 
    if not df:
        return []
    
    return sorted(df, key = lambda row: [row[col] for col in cols])


def harvard(row):
    """
    this function was written by 2 harvard professors 
    """
    for col in row:
        if col == 'ACTION':
            row[col] = prep_dic[row[col]]
            
    return row


def harvard_inverse(row):
    """
    the same as harvard()
    """
    for col in row:
        if col == 'ACTION':
            row[col] = unprep_dic[row[col]]
            
    return row


# harvard professor 2
order_log = apply(order_log, harvard)
order_log = sort(order_log, ['TIME', 'ACTION'])
order_log = apply(order_log, harvard_inverse)

In [11]:
order_log[:3]

[{'NO': 1,
  'SECCODE': 'EUR_RUB__TOD',
  'BUYSELL': 'B',
  'TIME': 100000050299,
  'ORDERNO': 1,
  'ACTION': 1,
  'PRICE': 68.61,
  'VOLUME': 1000000,
  'TRADENO': nan,
  'TRADEPRICE': nan},
 {'NO': 2,
  'SECCODE': 'USD000000TOD',
  'BUYSELL': 'S',
  'TIME': 100000050316,
  'ORDERNO': 2,
  'ACTION': 1,
  'PRICE': 56.6,
  'VOLUME': 1000000,
  'TRADENO': nan,
  'TRADEPRICE': nan},
 {'NO': 3,
  'SECCODE': 'EUR_RUB__TOD',
  'BUYSELL': 'S',
  'TIME': 100000050325,
  'ORDERNO': 3,
  'ACTION': 1,
  'PRICE': 69.065,
  'VOLUME': 1000000,
  'TRADENO': nan,
  'TRADEPRICE': nan}]

In [12]:
class OrderBook:
    total_mistakes = 0
    
    def __init__(self, seccode: str):
        self.seccode = seccode
        self.asks = dict()
        self.bids = dict()
        
    def __repr__(self):
        return self.seccode
    
    @staticmethod
    def print_error(error: str, row_numb: int) -> None:
        """
        error: description of the error
        row_numb: number of the entry in OrderLog which resulted the error
        
        prints the error to screen 
        """
        print('-' * 40)
        print(f'in row: {row_numb}')
        print(f"ERROR: {error}")
        print('-' * 40)
        print()
        OrderBook.total_mistakes += 1
        
    def order_exists(self, orderno: int, ask: bool) -> bool:
        """
        checks if the order with given orderno exists
        """
        
        dic = self.asks if ask else self.bids
        
        return orderno in dic
        
    
    def add_entry(self, entry, ask: bool):
        """
        adds an entry to ask or bid side of the OB
        """
        # what will be kept in the order book
        columns = ['ORDERNO', 'SECCODE', 'PRICE', 'VOLUME']
        
        key = entry['ORDERNO']
        values = dict()
        for col in columns:
            values[col] = entry[col]
            
        dic = self.asks if ask else self.bids
        
        dic[key] = values
        
        
    def revoke(self, orderno: int, volume:int, ask:bool, row_numb: int):
        """
        revokes given volume from order with the give orderno
        """
        
        if 52067 == orderno:
            print("OUTSIDE")
        
        if self.order_exists(orderno=orderno, ask=ask):
            dic = self.asks if ask else self.bids
            # acquiring the corresponding order
            order = dic[orderno]
            
            # check if revoking volume is not greater than the current one
            if order['VOLUME'] >= volume:
                # removing the order
                if order['VOLUME'] == volume:
                    if 52067 == orderno:
                        print("PERFECT MATCH")
                    del dic[orderno]
                # reducing volume of the order
                else:
                    order['VOLUME'] -= volume
            else:
                if 52067 == orderno:
                    print("INSIDE ERROR")
                OrderBook.print_error(error="Cannot revoke more than there is",
                           row_numb = row_numb)
                # removing the order to avoid negative numbers
                del dic[orderno]
            
        else:
            OrderBook.print_error(error=f"Record with the given ORDERNO={orderno} doesn't exist",
                       row_numb = row_numb)
        
    
    def match(self, orderno: int, volume: int, ask: bool, row_numb: int):
        """
        handling match
        """
        # discuss with leva and ruphina
        # for one order match is the same as revoke
        self.revoke(orderno=orderno, volume=volume, ask=ask, row_numb=row_numb)
        
        
        

In [16]:
class Spectrum:
    def __init__(self, seccode):
        self.seccode = seccode
        self.best_ask = 10000000000000000000
        self.best_bid = -1
        self.bids = [0] * 10
        self.asks = [0] * 10
        self.bids_normalized = [0] * 10
        self.asks_normalized = [0] * 10
        
    @staticmethod
    def distance_idx(dif, step):
        return math.floor(dif/(step*5))
    
    def normalize(self):
        """
        calculates normalized version of spectrums
        """
        
        total_bids = sum(self.bids)
        total_asks = sum(self.asks)
        
        if total_bids > 0:
            self.bids_normalized = [bid / total_bids for bid in self.bids]
        
        if total_asks > 0:
            self.asks_normalized = [ask / total_asks for ask in self.asks]
            
            
    
    def change_bids(self, price: float, volume: int, step: float, add: bool):
        dif = self.best_bid - price
        
        if dif <= step * 49:
            if add:
                self.bids[9 - Spectrum.distance_idx(dif, step)] += volume
            else:
                self.bids[9 - Spectrum.distance_idx(dif, step)] = \
                    max(0, self.bids[9 - Spectrum.distance_idx(dif, step)] - volume)
            
    def change_asks(self, price: float, volume: int, step:float, add: bool):
        dif = price - self.best_ask
        
        if dif <= step * 49:
            if add:
                self.asks[Spectrum.distance_idx(dif, step)] += volume
            else:
                self.asks[Spectrum.distance_idx(dif, step)] = \
                    max(0, self.asks[Spectrum.distance_idx(dif, step)] - volume)
        
    def update_post(self, order_book: OrderBook, new_price: float, volume: int, ask: bool):
        step = instruments_info[self.seccode]['PRICE_STEP']
            
        if not ask:
            if new_price > self.best_bid:
                
                self.best_bid = new_price
                self.bids = [0] * 10
                
                for bid in order_book.bids.values():
                    self.change_bids(price=bid['PRICE'], volume=bid['VOLUME'], step=step, add=True)
                
                
            elif new_price == self.best_bid:
                self.bids[9] += volume
                    
            else:
                self.change_bids(price=new_price, volume=volume, step=step, add=True)
        
        else:
            if new_price < self.best_ask:
                
                self.best_ask = new_price
                self.asks = [0] * 10
                
                for ask in order_book.asks.values():
                    self.change_asks(price=ask['PRICE'], volume=ask['VOLUME'], step=step, add=True)
            
            elif new_price == self.best_ask:
                self.asks[0] += volume
            
            else:
                self.change_asks(price=new_price, volume=volume, step=step, add=True)
                
        
        self.normalize()
                
    
    def update_revoke(self, order_book: OrderBook, new_price: float, volume: int, ask: bool):
        step = instruments_info[self.seccode]['PRICE_STEP']
        
        if not ask:
            if new_price == self.best_bid:
                if volume >= self.bids[9]:
                    
                    self.best_bid = -1
                    
                    for bid in order_book.bids.values():
                        if bid['PRICE'] > self.best_bid:
                            self.best_bid = bid['PRICE']
                            
                    self.bids = [0] * 10
                    
                    for bid in order_book.bids.values():
                        self.change_bids(price=bid['PRICE'], volume=bid['VOLUME'], step=step, add=True)
                else:
                    self.bids[9] -= volume
                
            elif new_price < self.best_bid:
                self.change_bids(price=new_price, volume=volume, step=step, add=False)
                
        else:
            if new_price == self.best_ask:
                if volume >= self.asks[0]:
                    
                    self.best_ask = 10000000000000000000
                    
                    for ask in order_book.asks.values():
                        if ask['PRICE'] < self.best_ask:
                            self.best_ask = ask['PRICE']
                    
                    self.asks = [0] * 10
                    
                    for ask in order_book.asks.values():
                        self.change_asks(price=ask['PRICE'], volume=ask['VOLUME'], step=step, add=True)
                        
                else:
                    self.asks[0] -= volume
                
            elif new_price > self.best_ask:
                self.change_asks(price=new_price, volume=volume, step=step, add=False)
                
        self.normalize()
                
                    
                    
    def update_match(self, order_book: OrderBook, new_price: float, volume: int, ask: bool):
        
        self.update_revoke(order_book, new_price, volume, ask)
                
                    
        
        

In [17]:
# creating order book for each seccode
order_books = dict()
for secc in SECCODES:
    order_books[secc] = OrderBook(secc)

# creating spectrum for each seccode
spectrums = dict()
for secc in SECCODES:
    spectrums[secc] = Spectrum(seccode=secc)

start = time.time()


k = 0
list_spec = []
col_names = ['SECCODE', 'TIMESTAMP', 'BID_ASK']


for row_log in order_log:
    is_ask = row_log['BUYSELL'] == 'S'
    
    order_book = order_books[row_log['SECCODE']]
    spectrum = spectrums[row_log['SECCODE']]
    
    # handle post
    if row_log['ACTION'] == Action.POST:
        order_book.add_entry(entry=row_log, 
                             ask=is_ask)
        spectrum.update_post(order_book=order_book, new_price=row_log['PRICE'], 
                             volume=row_log['VOLUME'], ask=is_ask)
    
    # handle revoke
    elif row_log['ACTION'] == Action.REVOKE:
        order_book.revoke(orderno=row_log['ORDERNO'], volume=row_log['VOLUME'], 
                          ask=is_ask, row_numb=row_log['NO'])
        spectrum.update_revoke(order_book=order_book, new_price=row_log['PRICE'], 
                             volume=row_log['VOLUME'], ask=is_ask)
    
    elif row_log['ACTION'] == Action.MATCH:
        order_book.match(orderno=row_log['ORDERNO'], volume=row_log['VOLUME'], 
                          ask=is_ask, row_numb=row_log['NO'])
        spectrum.update_match(order_book=order_book, new_price=row_log['PRICE'], 
                             volume=row_log['VOLUME'], ask=is_ask)
        
    
        
    
    # для каждой новой row считаем спектрум и добавляем в файлик
    values = spectrum.bids_normalized.copy()
    values.extend(spectrum.asks_normalized.copy())
    d = [ row_log['SECCODE'], row_log['TIME'], values ]
    list_spec.append(d)
#     k += 1
#     if k == 1000:
#         df_spec = pd.DataFrame(list_spec, columns=col_names)
#         df_spec.to_csv('spectrums-'+str(k//1000)+'.csv')

# TODO: do spectrum opearation only if no error during order book update

# saving spectrum
df_spec = pd.DataFrame(list_spec, columns=col_names)
df_spec.to_csv('spectrum.csv')

end = time.time()

print(end - start)

----------------------------------------
in row: 103309
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 103344
ERROR: Cannot revoke more than there is
----------------------------------------

OUTSIDE
PERFECT MATCH
----------------------------------------
in row: 103458
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 105116
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 106942
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 106943
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 107564
ERROR: Cannot revoke more than there is
----------------------------------------

------------

----------------------------------------
in row: 284911
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 285331
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 286419
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 289766
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 297237
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 298931
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 311772
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------

----------------------------------------
in row: 694812
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 705991
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 705992
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 705998
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 706007
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 706014
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 706020
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------

----------------------------------------
in row: 1104638
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1105624
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1105774
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1118051
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1120676
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1120693
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1120708
ERROR: Cannot revoke more than there is
----------------------------------------

---------------------------

----------------------------------------
in row: 1553012
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1558569
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1566263
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1571145
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1573729
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1573901
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1577661
ERROR: Cannot revoke more than there is
----------------------------------------

---------------------------

----------------------------------------
in row: 1888555
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1888594
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1888676
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1888688
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1888809
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1888859
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 1888922
ERROR: Cannot revoke more than there is
----------------------------------------

---------------------------

----------------------------------------
in row: 2006901
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2007120
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2008162
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2008351
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2011997
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2014943
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2023539
ERROR: Cannot revoke more than there is
----------------------------------------

---------------------------

----------------------------------------
in row: 2247366
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2276464
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2327814
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2327834
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2335511
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2367058
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2367541
ERROR: Cannot revoke more than there is
----------------------------------------

---------------------------

----------------------------------------
in row: 2854812
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2854823
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2864236
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2864914
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2864916
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2870303
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 2873651
ERROR: Cannot revoke more than there is
----------------------------------------

---------------------------

----------------------------------------
in row: 3172658
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 3172698
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 3172718
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 3174171
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 3174252
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 3174912
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 3174938
ERROR: Cannot revoke more than there is
----------------------------------------

---------------------------

----------------------------------------
in row: 3622053
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 3622449
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 3623225
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 3623415
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 3656976
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 3719410
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 3719458
ERROR: Cannot revoke more than there is
----------------------------------------

---------------------------

----------------------------------------
in row: 4014701
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4017932
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4018005
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4025686
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4025721
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4034315
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4038632
ERROR: Cannot revoke more than there is
----------------------------------------

---------------------------

----------------------------------------
in row: 4387653
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4387858
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4388029
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4392179
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4392180
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4392194
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4392331
ERROR: Cannot revoke more than there is
----------------------------------------

---------------------------

----------------------------------------
in row: 4585961
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4586103
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4586337
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4586790
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4588363
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4588942
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4594046
ERROR: Cannot revoke more than there is
----------------------------------------

---------------------------

----------------------------------------
in row: 4714150
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4714163
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4718484
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4718545
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4718650
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4718711
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 4720489
ERROR: Cannot revoke more than there is
----------------------------------------

---------------------------

----------------------------------------
in row: 5121564
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 5124252
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 5258384
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 5259356
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 5272617
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 5272784
ERROR: Cannot revoke more than there is
----------------------------------------

----------------------------------------
in row: 5272935
ERROR: Cannot revoke more than there is
----------------------------------------

---------------------------

## Kolmogorov-Smirnov Test

`Notice` that code uses the file produced on the previous step

In [60]:
df_spec = pd.read_csv('spectrum.csv')

df_spec.head()

Unnamed: 0.1,Unnamed: 0,SECCODE,TIMESTAMP,BID_ASK
0,0,EUR_RUB__TOD,100000050299,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,1,USD000000TOD,100000050316,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 0.0, 0.0, ..."
2,2,EUR_RUB__TOD,100000050325,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,3,EUR_RUB__TOM,100000050353,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,4,USD000000TOD,100000050361,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [61]:
# separating spectrums by their seccodes
spectrums_by_seccode = dict()
for secc in SECCODES:
    spectrums_by_seccode[secc] = df_spec[df_spec['SECCODE'] == secc].copy()
    spectrums_by_seccode[secc].sort_values(by=['TIMESTAMP'], inplace=True)

# release memory
df_spec = None

In [63]:
# class for storing time and count means of spectrums
class PdfSpec:
    def __init__(self, seccode: str):
        seccode = seccode
        self.bids_count = [0] * 10
        self.bids_time = [0] * 10
        self.asks_count = [0] * 10
        self.asks_time = [0] * 10
        self.n = 0
    
    
    def calc_avgs(self, df_spectrum: pd.DataFrame):
        """
        calculates averages by count and by time
        pd: DataFrame with the following columns ['NO', 'SECCODE', 'TIMESTAMP', 'BID_ASK']
        """
        prev_time = df_spectrum.iloc[0]['TIMESTAMP']
        total_count = len(df_spectrum)
        total_time = 0
        
        for _, _, timestamp, spectrum_str in df_spectrum.values:
            spectrum = [ float(el) for el in spectrum_str[1:-1].split(',')]
            # calculating avg by count
            for i in range(len(self.bids_count)):
                self.bids_count[i] += spectrum[i]
                self.asks_count[i] += spectrum[len(self.bids_count) + i]
            
            # calculating avg by time
            w = timestamp - prev_time
            for i in range(len(self.bids_time)):
                self.bids_time[i] += (spectrum[i] * w)
                self.asks_time[i] += (spectrum[len(self.bids_time) + i] * w)
            total_time += w
        
        self.bids_count = [bid / total_count for bid in self.bids_count]
        self.asks_count = [ask / total_count for ask in self.asks_count]
        
        self.bids_time = [bid / total_time for bid in self.bids_time]
        self.asks_time = [ask / total_time for ask in self.asks_time]
    
    @staticmethod
    def calc_cdf(lst):
        """
        calculates CDF
        
        lst: a list of numbers (pdf)
        """
        
        s = 0
        cdf = [0] * len(lst)
        for index, numb in enumerate(lst):
            cdf[index] = s + numb
            s += numb
        
        return cdf
            
        

        
start = time.time()               
            
# pdfs of spectrums separated by seccode
pdfs_spec = dict()
for secc in SECCODES:
    pdfs_spec[secc] = PdfSpec(seccode=secc)
    
# calculating averages
for secc in SECCODES:
    pdfs_spec[secc].calc_avgs(spectrums_by_seccode[secc])
    # just to make sure that sums are approximately equtal to 1
    print(sum(pdfs_spec[secc].bids_count), sum(pdfs_spec[secc].asks_time))

# release memory
spectrums_by_seccode = None

print(time.time() - start)

0.9999988863410828 1.0000000000000153
0.9999993639653042 1.0000000000000007
0.9999999999999947 1.0000000000000089
1.0000000000001537 0.9999999999999946
1.0000000000000004 0.9999999999999998
0.9999986448433577 0.9999999999999991
352.8968765735626


In [71]:
from scipy.stats import kstest

for secc in SECCODES:
    print(f"Performing KS-test for {secc}")
    pval = kstest(PdfSpec.calc_cdf(pdfs_spec[secc].bids_count),
          PdfSpec.calc_cdf(pdfs_spec[secc].asks_count)).pvalue
    print(f"\tbid count vs ask count - pval = {pval}")
    pval = kstest(PdfSpec.calc_cdf(pdfs_spec[secc].bids_time),
          PdfSpec.calc_cdf(pdfs_spec[secc].asks_time)).pvalue
    print(f"\tbid time vs ask time - pval = {pval}")
    pval = kstest(PdfSpec.calc_cdf(pdfs_spec[secc].bids_time),
          PdfSpec.calc_cdf(pdfs_spec[secc].bids_count)).pvalue
    print(f"\tbid time vs bid count - pval = {pval}")
    pval = kstest(PdfSpec.calc_cdf(pdfs_spec[secc].asks_time),
          PdfSpec.calc_cdf(pdfs_spec[secc].asks_count)).pvalue
    print(f"\task time vs ask count - pval = {pval}")
    

Performing KS-test for USD000000TOD
	bid count vs ask count - pval = 0.7869297884777761
	bid time vs ask time - pval = 0.7869297884777761
	bid time vs bid count - pval = 1.0
	ask time vs ask count - pval = 1.0
Performing KS-test for USD000UTSTOM
	bid count vs ask count - pval = 0.9944575548290717
	bid time vs ask time - pval = 1.0
	bid time vs bid count - pval = 1.0
	ask time vs ask count - pval = 0.9944575548290717
Performing KS-test for EUR_RUB__TOD
	bid count vs ask count - pval = 1.0
	bid time vs ask time - pval = 0.9944575548290717
	bid time vs bid count - pval = 1.0
	ask time vs ask count - pval = 1.0
Performing KS-test for EUR_RUB__TOM
	bid count vs ask count - pval = 0.9944575548290717
	bid time vs ask time - pval = 0.7869297884777761
	bid time vs bid count - pval = 1.0
	ask time vs ask count - pval = 1.0
Performing KS-test for EURUSD000TOD
	bid count vs ask count - pval = 0.00021650176448938054
	bid time vs ask time - pval = 0.00021650176448938054
	bid time vs bid count - pval