In [4]:
from psycopg2 import connect as cnct
from sklearn.linear_model import LinearRegression as lrn
from datetime import datetime, timedelta

import copy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

# 0. Объявление глобальных переменных

In [5]:
DB_NAME = 'orderlogs'
DB_SETTINGS = {
    'dbname': DB_NAME,
    'user': 'postgres',
    'password': '',
    'host': 'localhost',
    'port': 5432
}

with cnct(**DB_SETTINGS) as conn:
    cur = conn.cursor()
    cur.execute("SELECT table_name FROM information_schema.tables WHERE table_schema='public'")
    TABLE_NAMES = cur.fetchall()
    TABLE_NAMES = sorted(list(map(lambda x: x[0], TABLE_NAMES)))

SECCODES = sorted(list(pd.read_excel(
    os.path.join(
        os.path.abspath(''), 
        'data_results/selection/volume_instruments_specification.xlsx'))['Торговый код']))

# 1. Построение функции издержек по Перольду

## 1.1. Построение книги лимитированных заявок

In [6]:
class LOB:
    '''This class replicates the logic for the limit order book building
    '''
    def __init__(self, ticker):
        self._ticker = ticker
    
    def table_build(self, date, time):
        '''This method builds the limit order book table on the specified date and time
        
        :date: the date in int format (for example, 20190603)
        :time: the time in int format (for example, 121059123456)
        :return: the pandas.DataFrame object with columns BUYSELL, ORDERNO, PRICE, VOLUME
        '''
        with cnct(**DB_SETTINGS) as conn:
            cur = conn.cursor()

            table_name = date
            seccode = self._ticker

            query = '''\
            SELECT 
                tt."BUYSELL", tt."PRICE", sum("VOLUME") as "VOLUME"
            FROM
                (SELECT 
                    t."BUYSELL", t."ORDERNO", max(t."PRICE") as "PRICE", sum("VOLUME") as "VOLUME"
                FROM 
                    (SELECT
                        "BUYSELL", "ORDERNO", "ACTION", max("PRICE") as "PRICE",
                        CASE "ACTION" WHEN 1 THEN sum("VOLUME")
                                      WHEN 2 THEN sum(-"VOLUME")
                                      WHEN 0 THEN sum(-"VOLUME")
                        END as "VOLUME"
                    FROM 
                        "{}"
                    WHERE
                        "SECCODE" = '{}' AND "TIME" <= {} AND "PRICE" != 0
                    GROUP BY
                        "BUYSELL", 
                        "ORDERNO",
                        "ACTION") t
                GROUP BY
                    "BUYSELL",
                    "ORDERNO"
                HAVING
                    sum("VOLUME") > 0) tt
            GROUP BY
                "BUYSELL",
                "PRICE"
            ORDER BY
                "BUYSELL" DESC, 
                "PRICE" DESC;
            '''.format(table_name, seccode, time)
            query = " ".join(query.split())

            cur.execute(query)

            limit_order_book = pd.DataFrame(cur.fetchall(), 
                                            columns=['BUYSELL', 'PRICE', 'VOLUME'])
        return limit_order_book
    
    def lob_plot(self, date, time, buylevels=20, selllevels=20, path=None):
        '''This method builds the limit order book table and plot it on the specified date and time
        
        :date: the date in int format (for example, 20190603)
        :time: the time in int format (for example, 121059123456)
        :buylevels: the number of price levels of BID side, int
        :selllevels: the number of price levels of ASK side, int
        :path: whether you want to save the plot, set the path
        :return: None, matplotlib.pyplot hist returns
        '''
        
        lob = self.table_build(date, time)
        buy = (lob.loc[lob['BUYSELL'] == 'B', ['PRICE', 'VOLUME']]).reset_index(drop=True)
        sell = (lob.loc[lob['BUYSELL'] == 'S', ['PRICE', 'VOLUME']]).reset_index(drop=True)
        
        # code to make beautiful barplot
        bid = np.max(buy.PRICE)
        ask = np.min(sell.PRICE)
        plt.figure()
        plt.barh(sell.PRICE[-selllevels:].reset_index(drop=True), 
                 sell.VOLUME[-selllevels:].reset_index(drop=True), 
                 height=0.01, label = 'SELL', color = 'r')
        plt.barh(buy.PRICE[:buylevels].reset_index(drop=True), 
                 -buy.VOLUME[:buylevels].reset_index(drop=True),
                 height=0.01, label = 'BUY', color = 'g')
        plt.xticks(rotation=15)

        plt.xlabel('VOLUME')
        plt.ylabel('PRICE')
        plt.title('LOB for ' + self._ticker +\
                  ' at ' + str(date)[:4] + '-' + str(date)[4:6] + '-' + str(date)[6:] +\
                  ': ' +\
                  str(time)[:2] + ':' + str(time)[2:4] + ':' + str(time)[4:6] + '.' + str(time)[6:])
        plt.legend(['ASK = ' + str(ask), 
                    'BID = ' + str(bid)])
        plt.grid()
        
        if path is None:
            plt.show()
        else:
            plt.savefig(os.path.join(os.path.abspath(''), path), bbox_inches='tight')
            plt.close()

In [7]:
lob = LOB(ticker='GAZP')
lob.lob_plot(date=20190620, 
             time=115612000000,
             buylevels=10,
             selllevels=10, 
             path='./data_results/lob_building/lob_example.pdf')

## 1.2. Построение квадратичной функции издержек

\begin{equation}
\theta(p, v) = \sum_{i=1}^n (p - p_i) * v_i,
\end{equation}

где
$p$ - прокси для текущей рыночной цены (пусть полубидаск), $v_i$ - положительный объем на продажу, отрицательный - на покупку (в уровнях). Всего считаем $n$ ценовых уровней книги.  

In [5]:
class Perold(LOB):
    '''This class implements the logic for Perold transaction costs calculation
    '''
    def __init__(self, ticker, date, time):
        '''
        :ticker: the string of fin. instrument's name ('SBER', for example)
        :date: the date in int (or str) format (for example, 20190603)
        :time: the time in int format (for example, 111050000000)
        '''
        super().__init__(ticker)
        self._date = date
        self._time = time
        self._lob = self.table_build(date, time)
        self._bid = np.max(self._lob.loc[self._lob['BUYSELL'] == 'B', 'PRICE'])
        self._ask = np.min(self._lob.loc[self._lob['BUYSELL'] == 'S', 'PRICE'])
        self._halfbidask = (self._bid + self._ask) / 2
        
        self._tr_costs = None
    
    def empirical_cost_function_build(self):
        '''This method builds the pd.DataFrame object that consists the following fields:
        VOLUME, COSTS according to the Perold's function θ(·)= Σ(p_i - p)V_i
        '''
        lob = self._lob
        lob['VOLUME'] = lob['VOLUME'].astype(int)
        buy = lob.loc[lob['BUYSELL'] == 'B', ['PRICE', 'VOLUME']].sort_values(by='PRICE', ascending=False).reset_index(drop=True)
        
        tr_costs_sell = copy.deepcopy(buy)
        tr_costs_sell['COSTS'] = np.cumsum(np.abs(tr_costs_sell['PRICE'] - self._halfbidask) * tr_costs_sell['VOLUME'])
        tr_costs_sell['VOLUME'] = np.cumsum(tr_costs_sell['VOLUME'])
        tr_costs_sell = tr_costs_sell[['VOLUME', 'COSTS']]
        
        
        sell = lob.loc[lob['BUYSELL'] == 'S', ['PRICE', 'VOLUME']].sort_values(by='PRICE', ascending=True).reset_index(drop=True)

        tr_costs_buy = copy.deepcopy(sell)
        tr_costs_buy['COSTS'] = np.cumsum(np.abs(tr_costs_buy['PRICE'] - self._halfbidask) * tr_costs_buy['VOLUME'])
        tr_costs_buy['VOLUME'] = np.cumsum(tr_costs_buy['VOLUME'])
        tr_costs_buy = tr_costs_buy[['VOLUME', 'COSTS']]
        tr_costs_buy['VOLUME'] = -tr_costs_buy['VOLUME']
        
        tr_costs_all = pd.concat((tr_costs_buy, tr_costs_sell)).sort_values(by='VOLUME').reset_index(drop=True)
        
        
        self._tr_costs = tr_costs_all

        return self._tr_costs

    def plot_empirical_function(self, path=None):
        '''This method plots the empirical function of costs and saves it into the given path (if None, 
        it will not be saved)
        :path: the string value (relative path where to save the plot), for example (./graph.pdf)
        '''
        if self._tr_costs is None:
            print('Just build the cost table at once')
        else:
            plt.figure()
            plt.plot(self._tr_costs['VOLUME'], self._tr_costs['COSTS'])
            plt.xticks(rotation=15)
            plt.title('Empirical Function of Transaction Costs')
            plt.xlabel('VOLUME, positive for sell, negative for buy')
            plt.ylabel('TRANSACTION COSTS')
            if path is None:
                plt.show() 
            else:
                plt.savefig(os.path.join(os.path.abspath(''), path), bbox_inches='tight')
                plt.close()

In [6]:
lob = Perold('SBER', 20190603, 125959123123)
costs = lob.empirical_cost_function_build()
costs = costs.loc[costs['VOLUME'] >= 0].reset_index(drop=True)

lob.plot_empirical_function(path='./data_results/costs_building/costs_example.pdf')

# 2. Ликвидность

Под простотой торговли мы понимаем издержки, которые несет инвестор в случае игры на рынке. Таким образом, оценка ликвидности неотрывно связана с выставленной позицией по активу. 

Будем оценивать ликвидность без потери общности на выход из позиции (на продажу актива). 

Функция трансакционных издержек Перольда похожа на полином высокой степени, выпуклый вокруг нулевой точки. Это значит, что будем моделировать издержки следующим образом

$$
\theta(\cdot) = \alpha V^{\beta}
$$

Или
$$
\ln\theta(\cdot) = \ln\alpha + \beta \ln V
$$

In [7]:
class Liquidity(Perold):
    '''This class implements the logic for α and β estimation
    '''
    def __init__(self, ticker, date, time):
        super().__init__(ticker, date, time)
        
        self.empirical_cost_function_build()
        
        self._buycosts = self._tr_costs.loc[self._tr_costs['VOLUME'] < 0].reset_index(drop=True)
        self._buycosts['VOLUME'] = -self._buycosts['VOLUME']
        self._sellcosts = self._tr_costs.loc[self._tr_costs['VOLUME'] > 0].reset_index(drop=True)
        
        self._buycosts = self._buycosts.sort_values(by='VOLUME')
        self._sellcosts = self._sellcosts.sort_values(by='VOLUME')
    
    def calculate_alpha_beta(self, side='B'):
        '''This method implements the logic for the α and β estimation
        
        :side: the char 'B' or 'S' - buy or sell
        '''
        if side == 'B':
            V = np.array(self._buycosts.VOLUME)
            costs = np.array(self._buycosts.COSTS)
        elif side == 'S':
            V = np.array(self._sellcosts.VOLUME)
            costs = np.array(self._sellcosts.COSTS)
        else:
            raise ValueError('You should give the char B or S')
        
        reg = lrn()
        reg.fit(np.log(V).reshape(-1, 1), np.log(costs))
        a = np.exp(reg.intercept_)
        b = reg.coef_[0]
        
        return a, b

In [8]:
liq = Liquidity(ticker='SBER', date=20190603, time=110000000000)

liq.calculate_alpha_beta(side='S')

(0.000167399655002501, 1.6464666420042098)

In [9]:
class Times:
    def __init__(self, start, end):
        self._start = start
        self._end = end
    
    @staticmethod
    def time_convertion(time):
        '''This function converts date in int format like 125959000000 to POSIX

        :time: time in int format
        '''
        year = 2019
        month = 6
        day = 1
        
        hours = int(str(time)[:2])
        minutes = int(str(time)[2:4])
        seconds = int(str(time)[4:6])
        microseconds = int(str(time)[6:])
    
        return datetime(year, month, day, hours, minutes, seconds, microseconds)
    
    @staticmethod
    def time_convertion_int(time):
        '''This method converts the time of format datetime.datetime into int format
        '''
        hours = time.hour
        minutes = time.minute
        seconds = time.second
        microseconds = time.microsecond
        
        time = f'{hours:02}' + f'{minutes:02}' + f'{seconds:02}' + f'{microseconds:06}'
        
        return int(time)
    
    def times_map_calculation(self, interval=1):
        '''This method calculates the times in int format using the interval in minutes between them
        '''
        start = Times.time_convertion(self._start)
        end = Times.time_convertion(self._end)
        
        new_value = start
        lst = [new_value]
        while new_value >= start and new_value < end:
            new_value = new_value + timedelta(minutes=interval)
            if new_value <= end:
                lst.append(new_value)
            else:
                lst.append(end)
        
        return list(map(Times.time_convertion_int, lst))
        

In [10]:
class BuildLiquidityEstimates:
    '''This class implements the logic for the mapped estimation of the liquidity parameters
    '''
    def __init__(self, ticker, date, time_start, time_end, time_interval):
        self._ticker = ticker
        self._date = date
        self._time_start = time_start
        self._time_end = time_end
        self._time_interval = time_interval
    
    def liquidity_parameters_estimate(self):
        times = Times(start=self._time_start, 
                      end=self._time_end).times_map_calculation(interval=self._time_interval)
        estimates_buy = []
        estimates_sell = []
        for time in times:
            costs = Liquidity(ticker=self._ticker, date=self._date, time=time)
            print(self._ticker, self._date, time)
            a, b = costs.calculate_alpha_beta(side='B')
            estimates_buy.append((time, a, b))
            a, b = costs.calculate_alpha_beta(side='S')
            estimates_sell.append((time, a, b))
        
        estimates_buy = pd.DataFrame(estimates_buy, columns=['TIME', 'Alpha', 'Beta'])
        estimates_sell = pd.DataFrame(estimates_sell, columns=['TIME', 'Alpha', 'Beta'])
        
        estimates = estimates_buy.merge(estimates_sell, how='outer', on='TIME', suffixes=['_B', '_S'])
        estimates.to_csv(os.path.join(os.path.abspath(''), 
                                      f'./data_results/liquidity_time_series/{self._ticker}_{self._date}.csv'))
        
        return estimates

In [14]:
for seccode in SECCODES:
    for date in TABLE_NAMES:
        try:
            build = BuildLiquidityEstimates(ticker=seccode, date=date, 
                                            time_start=100000000,
                                            time_end = 180000000,
                                            time_interval = 30)
            df = build.liquidity_parameters_estimate()
        except:
            pass

FEES 20190603 100000000000
FEES 20190603 103000000000
FEES 20190603 110000000000
FEES 20190603 113000000000
FEES 20190603 120000000000
FEES 20190603 123000000000
FEES 20190603 130000000000
FEES 20190603 133000000000
FEES 20190603 140000000000
FEES 20190603 143000000000
FEES 20190603 150000000000
FEES 20190603 153000000000
FEES 20190603 160000000000
FEES 20190603 163000000000
FEES 20190603 170000000000
FEES 20190603 173000000000
FEES 20190603 180000000000
FEES 20190604 100000000000
FEES 20190604 103000000000
FEES 20190604 110000000000
FEES 20190604 113000000000
FEES 20190604 120000000000
FEES 20190604 123000000000
FEES 20190604 130000000000
FEES 20190604 133000000000
FEES 20190604 140000000000
FEES 20190604 143000000000
FEES 20190604 150000000000
FEES 20190604 153000000000
FEES 20190604 160000000000
FEES 20190604 163000000000
FEES 20190604 170000000000
FEES 20190604 173000000000
FEES 20190604 180000000000
FEES 20190605 100000000000
FEES 20190605 103000000000
FEES 20190605 110000000000
F

FEES 20190627 173000000000
FEES 20190627 180000000000
FEES 20190628 100000000000
FEES 20190628 103000000000
FEES 20190628 110000000000
FEES 20190628 113000000000
FEES 20190628 120000000000
FEES 20190628 123000000000
FEES 20190628 130000000000
FEES 20190628 133000000000
FEES 20190628 140000000000
FEES 20190628 143000000000
FEES 20190628 150000000000
FEES 20190628 153000000000
FEES 20190628 160000000000
FEES 20190628 163000000000
FEES 20190628 170000000000
FEES 20190628 173000000000
FEES 20190628 180000000000
HYDR 20190603 100000000000
HYDR 20190603 103000000000
HYDR 20190603 110000000000
HYDR 20190603 113000000000
HYDR 20190603 120000000000
HYDR 20190603 123000000000
HYDR 20190603 130000000000
HYDR 20190603 133000000000
HYDR 20190603 140000000000
HYDR 20190603 143000000000
HYDR 20190603 150000000000
HYDR 20190603 153000000000
HYDR 20190603 160000000000
HYDR 20190603 163000000000
HYDR 20190603 170000000000
HYDR 20190603 173000000000
HYDR 20190603 180000000000
HYDR 20190604 100000000000
H

HYDR 20190626 163000000000
HYDR 20190626 170000000000
HYDR 20190626 173000000000
HYDR 20190626 180000000000
HYDR 20190627 100000000000
HYDR 20190627 103000000000
HYDR 20190627 110000000000
HYDR 20190627 113000000000
HYDR 20190627 120000000000
HYDR 20190627 123000000000
HYDR 20190627 130000000000
HYDR 20190627 133000000000
HYDR 20190627 140000000000
HYDR 20190627 143000000000
HYDR 20190627 150000000000
HYDR 20190627 153000000000
HYDR 20190627 160000000000
HYDR 20190627 163000000000
HYDR 20190627 170000000000
HYDR 20190627 173000000000
HYDR 20190627 180000000000
HYDR 20190628 100000000000
HYDR 20190628 103000000000
HYDR 20190628 110000000000
HYDR 20190628 113000000000
HYDR 20190628 120000000000
HYDR 20190628 123000000000
HYDR 20190628 130000000000
HYDR 20190628 133000000000
HYDR 20190628 140000000000
HYDR 20190628 143000000000
HYDR 20190628 150000000000
HYDR 20190628 153000000000
HYDR 20190628 160000000000
HYDR 20190628 163000000000
HYDR 20190628 170000000000
HYDR 20190628 173000000000
H