In [None]:
from psycopg2 import connect as cnct
from statsmodels.regression import linear_model
from sklearn.metrics import mean_squared_error as mse
from scipy.optimize import minimize, basinhopping, Bounds, LinearConstraint

import datetime
import statsmodels.api as sm
import copy
import os
import numpy as np
import pandas as pd 
import matplotlib
import matplotlib.pyplot as plt
import pyflux as pf
import seaborn

matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})

# 0. Глобальные настройки

In [None]:
DB_SETTINGS = {
    'dbname'   : 'orderlogs', 
    'port'     :  5432, 
    'user'     : 'postgres', 
    'host'     : 'localhost',
    'password' : ''
}

ENGINE_SETTINGS = 'postgresql://postgres@localhost:5432/orderlogs'

with cnct(**DB_SETTINGS) as conn:
    cur = conn.cursor()
    cur.execute("SELECT table_name FROM information_schema.tables WHERE table_schema='public'")
    TABLE_NAMES = cur.fetchall()
    TABLE_NAMES = sorted(list(map(lambda x: x[0], TABLE_NAMES)))

TABLE_NAMES_ORDERLOGS = list(filter(lambda x: len(x) == 8, TABLE_NAMES))
TABLE_NAMES_CHAINS = list(filter(lambda x: len(x) > 8, TABLE_NAMES))
TICKERS = list(set(map(lambda x: x[9:], TABLE_NAMES_CHAINS)))
PATH = os.path.abspath('')

# 1. Проблема ликвидации

## 1.1. Реализация

### 1.1.0. Получение оценок параметров

#### 1.1.0.1. Получение оценок для функции издержек

In [None]:
class LOB:
    '''This class replicates the logic for the limit order book building
    '''
    def __init__(self, ticker, date=None, time=None):
        self._ticker = ticker
        self._date = date
        self._time = time
        
        self._lob = None
    
    def table_build(self, date=None, time=None):
        '''This method builds the limit order book table on the specified date and time
        
        :date: the date in int format (for example, 20190603)
        :time: the time in int format (for example, 121059123456)
        :return: the pandas.DataFrame object with columns BUYSELL, ORDERNO, PRICE, VOLUME
        '''
        if date is None and time is None:
            date = self._date
            time = self._time
        elif date is None or time is None:
            raise ValueError('You must specify both date and time')
            
        with cnct(**DB_SETTINGS) as conn:
            cur = conn.cursor()

            table_name = date
            seccode = self._ticker

            query = '''\
            SELECT 
                tt."BUYSELL", tt."PRICE", sum("VOLUME") as "VOLUME"
            FROM
                (SELECT 
                    t."BUYSELL", t."ORDERNO", max(t."PRICE") as "PRICE", sum("VOLUME") as "VOLUME"
                FROM 
                    (SELECT
                        "BUYSELL", "ORDERNO", "ACTION", max("PRICE") as "PRICE",
                        CASE "ACTION" WHEN 1 THEN sum("VOLUME")
                                      WHEN 2 THEN sum(-"VOLUME")
                                      WHEN 0 THEN sum(-"VOLUME")
                        END as "VOLUME"
                    FROM 
                        "{}"
                    WHERE
                        "SECCODE" = '{}' AND "TIME" <= {} AND "PRICE" != 0
                    GROUP BY
                        "BUYSELL", 
                        "ORDERNO",
                        "ACTION") t
                GROUP BY
                    "BUYSELL",
                    "ORDERNO"
                HAVING
                    sum("VOLUME") > 0) tt
            GROUP BY
                "BUYSELL",
                "PRICE"
            ORDER BY
                "BUYSELL" DESC, 
                "PRICE" DESC;
            '''.format(table_name, seccode, time)
            query = " ".join(query.split())

            cur.execute(query)

            limit_order_book = pd.DataFrame(cur.fetchall(), 
                                            columns=['BUYSELL', 'PRICE', 'VOLUME'])
        self._lob = limit_order_book    
        
        return limit_order_book
    
    def lob_plot(self, date, time, buylevels=20, selllevels=20, path=None):
        '''This method builds the limit order book table and plot it on the specified date and time
        
        :date: the date in int format (for example, 20190603)
        :time: the time in int format (for example, 121059123456)
        :buylevels: the number of price levels of BID side, int
        :selllevels: the number of price levels of ASK side, int
        :path: whether you want to save the plot, set the path
        :return: None, matplotlib.pyplot hist returns
        '''
        
        lob = self.table_build(date, time)
        buy = (lob.loc[lob['BUYSELL'] == 'B', ['PRICE', 'VOLUME']]).reset_index(drop=True)
        sell = (lob.loc[lob['BUYSELL'] == 'S', ['PRICE', 'VOLUME']]).reset_index(drop=True)
        
        # code to make beautiful barplot
        bid = np.max(buy.PRICE)
        ask = np.min(sell.PRICE)
        plt.figure()
        plt.barh(sell.PRICE[-selllevels:].reset_index(drop=True), 
                 sell.VOLUME[-selllevels:].reset_index(drop=True), 
                 height=0.01, label = 'SELL', color = 'r')
        plt.barh(buy.PRICE[:buylevels].reset_index(drop=True), 
                 -buy.VOLUME[:buylevels].reset_index(drop=True),
                 height=0.01, label = 'BUY', color = 'g')
        plt.xticks(rotation=15)

        plt.xlabel('VOLUME')
        plt.ylabel('PRICE')
        plt.title('LOB for ' + self._ticker +\
                  ' at ' + str(date)[:4] + '-' + str(date)[4:6] + '-' + str(date)[6:] +\
                  ': ' +\
                  str(time)[:2] + ':' + str(time)[2:4] + ':' + str(time)[4:6] + '.' + str(time)[6:])
        plt.legend(['ASK = ' + str(ask), 
                    'BID = ' + str(bid)])
        plt.grid()
        
        if path is None:
            plt.show()
        else:
            plt.savefig(os.path.join(os.path.abspath(''), path), bbox_inches='tight')
            plt.close()
    
    def price_calculate(self):
        ask = np.min(self._lob.loc[self._lob['BUYSELL'] == 'S', 'PRICE'])
        bid = np.max(self._lob.loc[self._lob['BUYSELL'] == 'B', 'PRICE'])
        
        return (ask + bid) / 2

In [None]:
class Perold(LOB):
    '''This class implements the logic for Perold transaction costs calculation
    '''
    def __init__(self, ticker, date, time):
        '''
        :ticker: the string of fin. instrument's name ('SBER', for example)
        :date: the date in int (or str) format (for example, 20190603)
        :time: the time in int format (for example, 111050000000)
        '''
        super().__init__(ticker)
        self._date = date
        self._time = time
        self._lob = self.table_build(date, time)
        self._bid = np.max(self._lob.loc[self._lob['BUYSELL'] == 'B', 'PRICE'])
        self._ask = np.min(self._lob.loc[self._lob['BUYSELL'] == 'S', 'PRICE'])
        self._halfbidask = (self._bid + self._ask) / 2
        
        self._tr_costs = None

    def empirical_cost_function_build(self):
        '''This method builds the pd.DataFrame object that consists the following fields:
        VOLUME, COSTS according to the Perold's function θ(·)= Σ(p_i - p)V_i
        '''
        lob = self._lob
        lob['VOLUME'] = lob['VOLUME'].astype(int)
        buy = lob.loc[lob['BUYSELL'] == 'B', ['PRICE', 'VOLUME']].sort_values(by='PRICE', ascending=False).reset_index(drop=True)
        
        tr_costs_sell = copy.deepcopy(buy)
        tr_costs_sell['COSTS'] = np.cumsum(np.abs(tr_costs_sell['PRICE'] - self._halfbidask) * tr_costs_sell['VOLUME'])
        tr_costs_sell['VOLUME'] = np.cumsum(tr_costs_sell['VOLUME'])
        tr_costs_sell = tr_costs_sell[['VOLUME', 'COSTS']]
        
        sell = lob.loc[lob['BUYSELL'] == 'S', ['PRICE', 'VOLUME']].sort_values(by='PRICE', ascending=True).reset_index(drop=True)

        tr_costs_buy = copy.deepcopy(sell)
        tr_costs_buy['COSTS'] = np.cumsum(np.abs(tr_costs_buy['PRICE'] - self._halfbidask) * tr_costs_buy['VOLUME'])
        tr_costs_buy['VOLUME'] = np.cumsum(tr_costs_buy['VOLUME'])
        tr_costs_buy = tr_costs_buy[['VOLUME', 'COSTS']]
        tr_costs_buy['VOLUME'] = -tr_costs_buy['VOLUME']
        
        tr_costs_all = pd.concat((tr_costs_buy, tr_costs_sell)).sort_values(by='VOLUME').reset_index(drop=True)
        
        
        self._tr_costs = tr_costs_all

        return self._tr_costs

    def plot_empirical_function(self, path=None):
        '''This method plots the empirical function of costs and saves it into the given path (if None, 
        it will not be saved)
        :path: the string value (relative path where to save the plot), for example (./graph.pdf)
        '''
        if self._tr_costs is None:
            print('Just build the cost table at once')
        else:
            plt.figure()
            plt.plot(self._tr_costs['VOLUME'], self._tr_costs['COSTS'])
            plt.xticks(rotation=15)
            plt.title('Empirical Function of Transaction Costs')
            plt.xlabel('VOLUME, positive for sell, negative for buy')
            plt.ylabel('TRANSACTION COSTS')
            if path is None:
                plt.show() 
            else:
                plt.savefig(os.path.join(os.path.abspath(''), path), bbox_inches='tight')
                plt.close()

In [None]:
class Liquidity(Perold):
    '''This class implements the logic for α and β estimation
    '''
    def __init__(self, ticker, date, time):
        super().__init__(ticker, date, time)
        self.empirical_cost_function_build()
    
    def calculate_costs_function_parameters(self, method='squared'):
        '''This method implements the logic for the α and β estimation
        
        :param: method is the linear model for transaction costs:
            squared: volume, volume squared
        '''
        V = np.array(self._tr_costs.VOLUME)
        costs = np.array(self._tr_costs.COSTS)
        
        if method == 'squared':
            y = costs
            X = np.stack([V, V**2], axis=1)
            
            model = linear_model.OLS(y, X)
            results = model.fit()
            
            return results.params, results.cov_params()

#### 1.1.0.2. Получение оценок для волатильности

Составление необходимых временных точек

In [None]:
def time_convertion(time: int, date=20190603) -> datetime.datetime:
    s_time = str(time)
    s_date = str(date)
    
    hours = int(s_time[:2])
    minutes = int(s_time[2:4])
    seconds = int(s_time[4:6])
    microseconds = int(s_time[6:])
    
    year = int(s_date[:4])
    month = int(s_date[4:6])
    day = int(s_date[6:])
    
    time_datetime = datetime.datetime(year=year, month=month, day=day, 
                                      hour=hours, minute=minutes, second=seconds, 
                                      microsecond=microseconds)
    
    return time_datetime

def time_convertion_back(time):
    hour = time.hour
    minute = time.minute
    second = time.second
    microsecond = time.microsecond
    
    result = ('{:02}'.format(hour) + 
              '{:02}'.format(minute) + 
              '{:02}'.format(second) + 
              '{:06}'.format(microsecond))
    
    return int(result)

In [None]:
def time_creation(start: int, end: int, delta=5) -> list:
    start = time_convertion(start)
    end = time_convertion(end)
    new_value = start
    result = []
    while new_value <= end:
        result.append(new_value)
        new_value += datetime.timedelta(seconds=delta)
        
    result = list(map(lambda x: time_convertion_back(x), result))
    
    return result

Получение временных рядов

In [None]:
def price_series(start, end, ticker='SBER', date=20190603, delta=5):
    times = time_creation(start, end, delta=delta)
    prices = []
    for time in times:
        print(time)
        lob = LOB(ticker=ticker, date=date, time=time)
        lob.table_build()
        prices.append(lob.price_calculate())
    
    return pd.DataFrame(np.array(list(zip(times, prices))), columns=['Time', 'Price'])

Получение прогнозов параметров волатильности

In [None]:
def vola_forecast(*, filename, time_multiplier, number_iterations):
    '''This function calculates the prediction of vola through N steps using the filaname.csv with the 
    time series of fin. instrument's prices. 
    
    :param filename: str (the name of file that consists of time series of prices)
    :param time_multiplier: if the volatility prediction is every 5 seconds. And the liquidation occurs 
    every minute then time_multiplier is 12
    :param number_iterations: the number of liquidation iterations
    '''
    path_to_filename = os.path.join(PATH, 'time_series_for_volatility/')
    path_to_filename = os.path.join(path_to_filename, filename)
    df = pd.read_csv(path_to_filename, index_col=0)

    price = df.Price

    dprice = np.log(price) - np.log(price.shift(1))
    
    dprice = pd.DataFrame(dprice.dropna().reset_index(drop=True))
    
    model = pf.GARCH(dprice, p=1, q=1)
    model.fit()
    
    forecast_initial = model.predict(h=number_iterations*time_multiplier)
    
    j = 0
    forecast_for_liquidation = []
    while j < len(forecast_initial):
        forecast_for_liquidation.append(np.sum(forecast_initial[j:j+time_multiplier]))
        j += time_multiplier
    
    forecast_for_liquidation = np.array(forecast_for_liquidation)
    
    return forecast_for_liquidation

### 1.1.1. Решение оптимизационной задачи методом доверительных областей

In [None]:
class Optimizer:
    '''This class solves the optimization problem according to the master's thesis
    '''
    def __init__(self, *, lambda_parameter, volume_to_liquidate, number_iterations):
        self._lambda = lambda_parameter
        self._volume = volume_to_liquidate
        self._N = number_iterations
            
    @staticmethod    
    def _expected_costs(v, params):
        return params[0] * v + params[1] * v**2
    
    @staticmethod
    def _expected_costs_d(v, params):
        return params[0] + 2 * params[1] * v
    
    @staticmethod
    def _expected_costs_dd(v, params):
        return 2 * params[1]
    
    @staticmethod
    def _var_costs(v, params, cov_params):
        var_b1 = cov_params[0, 0]
        var_b2 = cov_params[1, 1]
        cov_b1b2 = cov_params[0, 1]

        var = v**2 * var_b1 + v**4 * var_b2 + 2 * cov_b1b2 * v**3
        return var
    
    @staticmethod
    def _var_costs_d(v, params, cov_params):
        var_b1 = cov_params[0, 0]
        var_b2 = cov_params[1, 1]
        cov_b1b2 = cov_params[0, 1]
    
        var = 2 * v * var_b1 + 4 * v**3 * var_b2 + 6 * cov_b1b2 * v**2
        return var
    
    @staticmethod
    def _var_costs_dd(v, params, cov_params):
        var_b1 = cov_params[0, 0]
        var_b2 = cov_params[1, 1]
        cov_b1b2 = cov_params[0, 1]
    
        var = 2 * var_b1 + 12 * v**2 * var_b2 + 12 * cov_b1b2 * v
        return var
    
    def _solve(self, *, sigmas_sq, params_costs, cov_params_costs, x0=None):
        if len(sigmas_sq) != self._N:
            raise ValueError('You must specify the vector of parameters ' + 
                             'for sigma of the length of number iterations')
        if x0 is None:
            x0 = np.array([self._volume / self._N] * self._N)
        bounds = Bounds([0] * self._N, [np.inf] * self._N)
        linear_constraint = LinearConstraint([1] * self._N, self._volume, self._volume)
        
        V = self._volume
        lam = self._lambda
        s2 = sigmas_sq
        def functional(v):

            v2_new = np.cumsum(v[::-1])[::-1] ** 2 
            square = np.sqrt(np.sum(s2 * v2_new) + 
                             np.sum(self._var_costs(v, params_costs, cov_params_costs)))
            first = lam * square
            second = np.sum(self._expected_costs(v, params_costs))
            func = first + second

            return func

        def functional_jac(v):

            coeff = lam / 2
            v_new = np.cumsum(v[::-1])[::-1]
            v2_new = v_new  ** 2 
            denumerator = np.sqrt(np.sum(s2 * v2_new) + 
                                  np.sum(self._var_costs(v, params_costs, cov_params_costs)))

            jac = np.empty(shape=len(v))
            for i in range(len(v)):
                v_i = v[i]
                sl = (s2 * v_new)[:i+1]
                numerator_i = (self._var_costs_dd(v_i, params_costs, cov_params_costs) +
                               2 * np.sum(sl))         
                additional_i = self._expected_costs_d(v_i, params_costs)
                jac_i = coeff * numerator_i / denumerator + additional_i
                jac[i] = jac_i

            return jac

        def functional_hess(v):
            coeff = lam / 2
            v_new = np.cumsum(v[::-1])[::-1]
            v2_new = v_new**2
            g = (np.sqrt(np.sum(s2 * v2_new) + 
                         np.sum(self._var_costs(v, params_costs,cov_params_costs))))
            hess = np.empty(shape=(len(v), len(v)))
            for i in range(len(v)):
                for j in range(len(v)):

                    f_i = self._var_costs_d(v[i], params_costs, cov_params_costs) + 2 * np.sum((s2 * v_new)[:i])
                    f_ij = (self._var_costs_dd(v[j], params_costs, cov_params_costs) * (i == j) + 
                            2 * np.sum(s2[:np.min([i, j])]))

                    g_j = (0.5 * (self._var_costs_d(v[j], params_costs, cov_params_costs) + 2 * np.sum((s2 * v_new)[:j])) / 
                           np.sqrt(np.sum(s2 * v2_new) + np.sum(self._var_costs(v, params_costs, cov_params_costs))))

                    brackets = f_ij * g - g_j * f_i
                    hess_ij = (coeff * 
                               (brackets) / 
                               g**2 + 
                               self._expected_costs_dd(v, params_costs) * (i == j))

                    hess[i, j] = hess_ij

            return hess
        
        minimizer_kwargs = {'method':'trust-constr', 'jac':functional_jac, 'hess':functional_hess,
                            'constraints':[linear_constraint], 'bounds':bounds}
        res = basinhopping(functional, x0, minimizer_kwargs=minimizer_kwargs)
        
        return res
    
    def solve(self, *, sigmas_sq, params_costs, cov_params_costs, x0=None, return_left=False):
        result = self._solve(sigmas_sq=sigmas_sq, 
                             params_costs=params_costs, 
                             cov_params_costs=cov_params_costs, 
                             x0=x0)
        
        strategy = result.x 
        left_position = self._volume - np.cumsum(strategy)
        left_position = np.concatenate([np.array([self._volume]), left_position])
        
        if return_left:
            return left_position
        return strategy

# 2. Решение для всех поставленных задач

In [None]:
def problem_solution(*, ticker, date_of_liquidation, time_of_liquidation, 
                     number_of_iteration, duration_of_iteration, lambda_param, volume,
                     hour_interval=1, delta_prices=5, return_left=False):
    '''This function solves the optimal liquidation problem at all
    
    :param ticker: str value of ticker (for example, SBER)
    :param date_of_liquidation: int value of the date of liquidation (for example, 20190603)
    :param time_of_liquidation: int value of the time of liquidation (for example, 115023000000)
    :param number_of_iteration: int value of number of iterations of liquidation strategy (for example, 15)
    :param duration_of_iteration: int value of duration in seconds that each liquidation step occurs (for example,
        60 means that each 1 minute)
    :param lambda_param: lambda parameter of optimization problem
    :param volume: int value of sum volume that is needed to liquidate (for example, 10000)
    :param hour_interval: int value of backward time interval for prices calculation that are need for 
        volatility estimation (for example, 1 means that the interval is 105023000000 - 115023000000)
    :param delta_prices: the interval of price calculation in seconds (for example, 5 means each 5 seconds)
    :param return_left: boolean parameter (False - return strategy of liquidation, 
        True - return the left position after each step of liquidation)
    '''
    liq = Liquidity(ticker=ticker, date=date_of_liquidation, time=time_of_liquidation)
    params, cov_params = liq.calculate_costs_function_parameters()
    
    path_to_prices = os.path.join(PATH, 'time_series_for_volatility')
    filename = (ticker + '_' + str(date_of_liquidation) + '_' + str(time_of_liquidation) + '_' + 
                str(hour_interval) + '_' + str(delta_prices) + '.csv')
    
    if not os.path.isfile(os.path.join(path_to_prices, filename)):
        
        time_series_last = None
        listdir = list(filter(lambda x: x[-3:] == 'csv', 
                              os.listdir(path_to_prices)))
        if listdir:
            listdir = list(map(lambda x: x.split('_'), listdir))
            listdir = list(filter(lambda x: x[0] == ticker and x[1] == str(date_of_liquidation) and 
                                  x[3] == str(hour_interval) and x[4][:-4] == str(delta_prices), 
                                  listdir))
            if listdir:
                last_time = max(list(map(lambda x: x[2], listdir)))
                file_last = list(filter(lambda x: x[2] == last_time, listdir))[0]
                file_last = '_'.join(file_last)
                
                time_series_last = pd.read_csv(os.path.join(path_to_prices, file_last), index_col=0)

        if time_series_last is None:
            time_series = price_series(time_of_liquidation-hour_interval*10**10, 
                                       time_of_liquidation, delta=delta_prices, 
                                       date=date_of_liquidation)
        else:
            start = time_convertion(int(last_time)) + datetime.timedelta(seconds=delta_prices)
            start = time_convertion_back(start)
            time_series_new = price_series(int(start), 
                                           time_of_liquidation, delta=delta_prices, 
                                           date=date_of_liquidation)
            time_series = pd.concat((time_series_last, time_series_new), axis=0).reset_index(drop=True)
            length = len(time_series_last)
            time_series = (time_series.iloc[-length:, :]).reset_index(drop=True)

        time_series.to_csv(os.path.join(path_to_prices, filename))
        
    sigmas_sq_to_opt = vola_forecast(filename=filename, 
                                     time_multiplier=duration_of_iteration//delta_prices, 
                                     number_iterations=number_of_iteration)
        
    opt = Optimizer(lambda_parameter=lambda_param, 
                    volume_to_liquidate=volume, 
                    number_iterations=number_of_iteration)
    
    strategy = opt.solve(sigmas_sq=sigmas_sq_to_opt, 
                         params_costs=params, 
                         cov_params_costs=cov_params,
                         return_left=return_left)
    
    return strategy

In [None]:
np.random.seed(1234)
lams = [0.1, 1, 5, 100]

sols = []
for lam in lams:
    print(lam)
    PARAMS_TASK = {
        'ticker': 'SBER', 
        'date_of_liquidation': 20190603, 
        'time_of_liquidation': 114000000000, 
        'number_of_iteration': 15, 
        'duration_of_iteration': 60, 
        'lambda_param': lam, 
        'volume': 10000,
        'return_left': True
    }
    sol = problem_solution(**PARAMS_TASK), lam
    print(np.round(sol[0]))
    
    sols.append(sol)

In [None]:
figure, ax = plt.subplots(1, 1, sharey=False)

colors = ['#000000', '#606060', '#888888', '#D3D3D3']
for j in range(len(sols)):
    ax.plot(sols[j][0], color=colors[j])

ax.set_ylabel('Volume')
ax.set_xlabel('Number of iteration')

ax.legend([r'$\lambda = {}$'.format(x) for x in list(map(lambda x: x[1], sols))])
figure.set_size_inches(w=6, h=4)

plt.savefig(os.path.join(PATH, 'figures_to_report/optimization_demo.pgf'), 
            bbox_inches='tight')

In [None]:
np.random.seed(1234)
iters = [3, 5, 7, 15]
sols = []
for iter_ in iters:
    print(iter_)
    PARAMS_TASK = {
        'ticker': 'SBER',
        'date_of_liquidation': 20190603,
        'time_of_liquidation': 114000000000,
        'number_of_iteration': iter_,
        'duration_of_iteration': 60,
        'lambda_param': 5,
        'volume': 10000,
        'return_left': True
    }
    sol = problem_solution(**PARAMS_TASK), iter_
    print(np.round(sol[0]))
    
    sols.append(sol)

In [None]:
figure, ax = plt.subplots(1, 1, sharey=False)

colors = ['#000000', '#303030', '#888888', '#D3D3D3']
for j in range(len(sols)):
    ax.plot(sols[j][0], color=colors[j])

ax.set_ylabel('Volume')
ax.set_xlabel('Number of iteration')

ax.legend([r'$N = {}$'.format(x) for x in list(map(lambda x: x[1], sols))])
figure.set_size_inches(w=6, h=4)

plt.savefig(os.path.join(PATH, 'figures_to_report/optimization_demo_iters.pgf'), 
            bbox_inches='tight')

# 3. Проверка "оптимальности" ликвидации

## 3.1. Получение средних значений

In [None]:
def count_all_volume(*, ticker):
    result_all = {
        'date': [],
        'ticker': [],
        'volume': []
    }

    for table_name in TABLE_NAMES_ORDERLOGS:
        with cnct(**DB_SETTINGS) as conn:
            cur = conn.cursor()
            query = '''
            SELECT SUM("VOLUME") FROM "{}" WHERE
            "ACTION" = 2 and "SECCODE" = '{}';
            '''.format(table_name, ticker)
            query = ' '.join(query.split())
            cur.execute(query)
            s = int(cur.fetchone()[0])

        result_one = {
            'date': int(table_name), 
            'ticker': ticker, 
            'volume': s
        }

        for key in result_one.keys():
            result_all[key].append(result_one[key])
                
    return result_all

In [None]:
def all_volumes_by_day(*, ticker):
    path = os.path.join(PATH, 
                        'volumes_each_day/{}_traded_volumes.csv'.format(ticker))
    if not os.path.isfile(path):
        data = pd.DataFrame.from_dict(count_all_volume(ticker=ticker))
        data.to_csv(path)        

        return data
    else:
        data = pd.read_csv(path, index_col=0)
        
        return data   

## 3.2. Подстановка и графики

In [None]:
class LiquidationValue:
    '''This class implements the logic for backward estimation of liquidation value of 
    liquidation strategy
    '''
    def __init__(self, *, optimal_strategy, 
                 ticker, date_of_liquidation, time_of_liquidation, 
                 duration_of_iteration=60):
        '''Init the object to calculate liquidation value
        
        :param optimal_strategy: np.array that represents the liquidation strategy 
            (left_position variant)
        :param ticker: the ticker of liquidation
        :param date_of_liquidation: the date of liquidation
        :param time_of_liquidation: the time of liquidation
        :duration_of_iteration: the duration of each step of liquidation in seconds
        '''
        self._ticker = ticker
        self._date = date_of_liquidation
        self._time = time_of_liquidation
        self._duration = duration_of_iteration
        
        self._optimal = np.round(optimal_strategy)
        self._volume = self._optimal[0]
        self._N = len(self._optimal) - 1
        self._optimal = self._optimal[:-1] - self._optimal[1:]
        self._optimal = self._optimal.astype(int)
        
        self._equal = self._volume - np.cumsum(np.array([self._volume / 
                                                         self._N] * self._N))
        self._equal = np.concatenate([np.array([self._volume]), self._equal])
        self._equal = np.round(self._equal)
        self._equal = self._equal[:-1] - self._equal[1:]
        self._equal = self._equal.astype(int)
        
        self._first = np.concatenate([np.array([self._volume]), 
                                      np.array([0] * (self._N - 1))])
        
        self._times = None
        self._lobs = None
        self._initial_price = None
        
    
    def _calculate_times(self):
        if self._times is None:
            start_time = time_convertion(self._time) + datetime.timedelta(seconds=self._duration)
            end_time = start_time + datetime.timedelta(seconds=(self._N-1)*self._duration)

            start_time = time_convertion_back(start_time)
            end_time = time_convertion_back(end_time)

            times = time_creation(start_time, end_time, delta=self._duration)
            
            self._times = times
            
    def _calculate_lobs(self):
        if self._lobs is None:
            self._calculate_times()
            
            self._lobs = []
            for time in self._times:
                lob = LOB(ticker=self._ticker, date=self._date, time=time)
                lob.table_build()
                if time == self._times[0]:
                    self._initial_price = (np.max(lob._lob.loc[lob._lob['BUYSELL'] == 'B', 'PRICE']) +
                                           np.min(lob._lob.loc[lob._lob['BUYSELL'] == 'S', 'PRICE'])) / 2
                table = lob._lob.loc[lob._lob['BUYSELL'] == 'B', ['PRICE', 
                                                                  'VOLUME']].reset_index(drop=True)
                
                table['PRICE'] = table['PRICE'].astype(float)
                table['VOLUME'] = table['VOLUME'].astype(int)
                
                self._lobs.append(table)    
    
    def _calculate_value(self, strategy='optimal'):
        '''This method calculates the liquidation value according to the given strategy
        
        :param strategy: str ('optimal', 'equal' of 'first')
        '''
        if strategy == 'optimal':
            strategy = self._optimal
        elif strategy == 'equal':
            strategy = self._equal
        elif strategy == 'first':
            strategy = self._first
        else:
            raise(ValueError('You must specify the type of strategy correctly'))
            
        self._calculate_lobs()
        
        liq_values = []
        try:
            for step in range(len(strategy)):
                liquidate_volume_onestep = strategy[step]

                if liquidate_volume_onestep <= int(self._lobs[step].VOLUME[0]):
                    liq_value = liquidate_volume_onestep * float(self._lobs[step].PRICE[0])

                else:
                    table = copy.deepcopy(self._lobs[step])
                    table['VOLUME_CUM'] = np.cumsum(table['VOLUME'])
                    table_less = table.loc[table['VOLUME_CUM'] < liquidate_volume_onestep]
                    liq_value = np.sum(table_less.VOLUME * table_less.PRICE)

                    price_more = (table.loc[table['VOLUME_CUM'] >= liquidate_volume_onestep, 'PRICE'].reset_index(drop=True))[0]
                    left_volume = liquidate_volume_onestep - np.sum(table_less['VOLUME'])
                    liq_value += price_more * left_volume

                liq_values.append(liq_value)
        except:
            liq_values = np.nan
        
        return np.sum(liq_values)
    
    def calculate_liquidation_value(self):
        liquidation_values = {
            'time': self._time,
            'date': self._date
        }
        for strategy_type in ['optimal', 'equal', 'first']:
            liquidation_values[strategy_type] = self._calculate_value(strategy=strategy_type)
        
        return liquidation_values

In [None]:
data_volume = all_volumes_by_day(ticker='SBER')
data_volume

In [None]:
np.random.seed(1234)
result = {
    'date': [],
    'time': [],
    'optimal': [],
    'equal': [],
    'first': []
}
number_iters = 20
duration_iters = 30
for date in [20190624, 20190603]:
    if date == 20190603:
        volume_paste = int(np.round(164844820 / (8.75 * (6)), 0))
    elif date == 20190624:
        volume_paste = int(np.round(53628360 / (8.75 * (6)), 0))
    else:
        raise(ValueError('Something went wrong'))
    
    
    
    for time in time_creation(start=110000000000, 
                              end=130000000000, 
                              delta=5*60):

        PARAMS_TASK = {
            'ticker': 'SBER',
            'date_of_liquidation': date,
            'time_of_liquidation': time,
            'number_of_iteration': number_iters,
            'duration_of_iteration': duration_iters,
            'lambda_param': 1,
            'volume': volume_paste,
            'return_left': True
        }
        sol = problem_solution(**PARAMS_TASK)

        strategy = sol
        
        liq_value = LiquidationValue(optimal_strategy=strategy,
                                     ticker='SBER',
                                     date_of_liquidation=date,
                                     time_of_liquidation=time, 
                                     duration_of_iteration=duration_iters)

        value = liq_value.calculate_liquidation_value()

        for key in value.keys():
            result[key].append(value[key])
        
        print(pd.concat([pd.DataFrame.from_dict(result), (pd.DataFrame.from_dict(result)['optimal'] -
                                                          pd.DataFrame.from_dict(result)['equal']) ], 
                        axis=1))
    

data_diff = pd.DataFrame.from_dict(result)

In [None]:
# data_diff.to_csv(os.path.join(PATH, 'results_from_check/data_diff.csv'))
data_diff

In [None]:
data03 = copy.deepcopy(data_diff)
data03 = data03.loc[data03['date'] == 20190603]

data24 = copy.deepcopy(data_diff)
data24 = data24.loc[data24['date'] == 20190624]