### Done
* Generate only +1, -1 labels using an alpha = 0
* Clean the labels and ffill the 0s resulting from cleaning
*plot and compare dirty, cleaned and ffilled labels
<br />
<br />
### To Do
* Update get_strategy_pnl in func tool to generate cleaned ffilled labels and profit
* Use updated get_strategy_pnl to  optimize k_plus and k_minus

In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import plotly_express as px

from func_tools import get_labels, plot_labels_line, get_strategy_pnl

In [None]:
experiments_folder = 'Experiments'
cache_folder = f'{experiments_folder}/cache'

pair = 'USDT_BTC'
lob_depth = 10
frequency = timedelta(seconds=10)
frequency_seconds = int(frequency.total_seconds())
date_start = '2020_04_04'
date_end = '2021_01_03'
norm_type = 'dyn_z_score'
k_plus = 15
k_minus = 15
alpha = 0 # Zero to allow only for 1 and -1 labels
trading_fee=0.000712
roll = 7200 * 6

In [None]:
input_file_name = f'{pair}--{lob_depth}lev--{frequency_seconds}sec--{date_start}--{date_end}.csv.gz'
normalized_train_file = f'{cache_folder}/{pair}/TRAIN--{norm_type}-{roll}--{input_file_name}'
top_ob_train_file = f'{cache_folder}/{pair}/TRAIN_TOP--{input_file_name}'

train_dyn_df = pd.read_csv(normalized_train_file) # import standardized data
top_ob_train = pd.read_csv(top_ob_train_file) # import original data
mid_px_train = top_ob_train['Mid_Price']

# get dynamic mid price for get label
top_ob_train_dyn = train_dyn_df[train_dyn_df['Level'] == 0]
top_ob_train_dyn['Mid_Price'] = (top_ob_train_dyn['Ask_Price'] + top_ob_train_dyn['Bid_Price']) / 2
mid_px_train_dyn = top_ob_train_dyn['Mid_Price']


In [None]:
# will have leading and trailing "0" or NAs due to the nature of the rolling
# trailing "0" will be taken care of with ffill later
labels = get_labels(mid_px_train_dyn, k_plus, k_minus, alpha, long_only=False) 

In [None]:
# Dirty labels
plot_labels_line(mid_px_train[1626046-20000:1626046], labels[1626046-20000:1626046], title='Dirty')

In [None]:
# get profit from cleaned labels "cl"
profit_cl, df_profit_cl = get_strategy_pnl(mid_px_train, labels, trading_fee=trading_fee, min_profit=0.0020, plotting=False, return_df=True)

# Cleaned labels
plot_labels_line(df_profit_cl['px'][1626046-20000:1626046], df_profit_cl['cleaned_labels'][1626046-20000:1626046], title='Cleaned')

In [None]:
# Create new column with zeros ffilled
df_profit_cl['ffilled_labels'] = df_profit_cl['cleaned_labels'].replace(0, method='ffill')

# Ffilled labels
plot_labels_line(df_profit_cl['px'][1626046-20000:1626046], df_profit_cl['ffilled_labels'][1626046-20000:1626046], title='Fwd Filled Cleaned')

## Labels Optimization
<br />
<br />
### Brute force grid search
#### single process

In [None]:
alphas_range = 0#np.arange(0,0.04,1/1000)
k_plus_range = np.arange(1,200,1)
k_minus_range = np.arange(1,200,1)

mesh = np.array(np.meshgrid(alphas_range, k_plus_range, k_minus_range))
combinations = mesh.T.reshape(-1, 3)
print(combinations.shape)

In [None]:
%%time
min_profit = 0.0020
plotting = False
gs_labels = []

for comb in combinations:    
    alpha = comb[0]
    k_plus = int(comb[1])
    k_minus = int(comb[2])
    labels = get_labels(mid_px_test_dyn, k_plus, k_minus, alpha, long_only=False)
    profit = get_strategy_pnl(px_ts, labels, trading_fee=trading_fee, min_profit=min_profit, plotting=plotting, return_df=False)
    #gs_labels = np.vstack((gs_labels, np.array([alpha, k_plus, k_minus, profit])))
    gs_labels.append(np.array([alpha, k_plus, k_minus, profit]))

In [None]:
gs_df = pd.DataFrame(gs_labels, columns=['alpha', 'k_plus', 'k_minus', 'profit'])

In [None]:
# prepare data for surface plot
gs_df_0 = gs_df[gs_df['alpha']==0]
gs_df_0_pivot = gs_df_0.pivot(values='profit', index='k_minus', columns='k_plus')

gs_df_039 = gs_df[gs_df['alpha']==0.039]
gs_df_039_pivot = gs_df_039.pivot(values='profit', index='k_minus', columns='k_plus')

In [None]:
# 3d plot k_plus, k_minus and profit. One surfice shows a certain level of alpha

fig = go.Figure(data=[go.Surface(z=gs_df_0_pivot.values, x=gs_df_0_pivot.columns.values, y=gs_df_0_pivot.index.values, name='alpha=0'),
go.Surface(z=gs_df_039_pivot.values, x=gs_df_039_pivot.index.values, y=gs_df_039_pivot.columns.values,  name='alpha=0.039',showscale=False)])


fig.update_layout(title='<b>Labels profitability</b>', autosize=False,
                  width=1000, height=600, 
                  scene=dict(
                    xaxis_title='k_minus',
                    yaxis_title='k plus',
                    zaxis_title='profit'),
                  margin=dict(l=35, r=30, b=25, t=60))

fig.show()

#### multi-process

In [None]:
def opt_func(alpha, k_plus, k_minus, gs_labels):
    # pass gs labels as shared list from the multiprocess manager
    labels = get_labels(mid_px_test_dyn, int(k_plus), int(k_minus), alpha, long_only=False)
    profit = get_strategy_pnl(px_ts, labels, trading_fee=0.000712, min_profit=0.0020, plotting=False, return_df=False)
    gs_labels.append(np.array([alpha, k_plus, k_minus, profit]))
    return profit

In [None]:
manager = multiprocessing.Manager()
gs_labels = manager.list()

inputs = list(map(list, combinations))
inputs_l = [inputs[x] + [gs_labels] for x in range(len(inputs))] # append proxy list "column" wise

with multiprocessing.Pool(processes=12) as p:
    res = p.starmap(opt_func, inputs_l)
    res = list(res)
    p.close()   
    p.join()
print(res)

In [None]:
gs_df = pd.DataFrame([label for label in gs_labels], columns=['alpha', 'k_plus', 'k_minus', 'profit'])

#### Bayesian opt

In [None]:
from bayes_opt import BayesianOptimization
from bayes_opt import UtilityFunction

In [None]:
def bay_opt_func(k_plus, k_minus, alpha):
    labels = get_labels(mid_px_test_dyn, int(k_plus), int(k_minus), alpha, long_only=False)
    profit = get_strategy_pnl(px_ts, labels, trading_fee=0.000712, min_profit=0.0020, plotting=False, return_df=False)
    return profit

In [None]:
label_opt = BayesianOptimization(
    f=bay_opt_func,
    pbounds={"k_plus": (1, 200), "k_minus": (1, 200), "alpha": (0, 0.5)},
    verbose=2,
    random_state=7,
)

In [None]:
label_opt.maximize(
    init_points=20,
    n_iter=3,
)