In [1]:
import random
import numpy as np
import tensorflow as tf
import mlflow.keras

from ml_investing_wne import config
from ml_investing_wne.data_engineering.load_data import get_hist_data
from ml_investing_wne.data_engineering.prepare_dataset import prepare_processed_dataset
from ml_investing_wne.train_test_val_split import train_test_val_split
from ml_investing_wne.helper import get_ml_flow_experiment_name, get_callbacks, \
    get_final_model_path, evaluate_model
from ml_investing_wne.models import model_factory
from ml_investing_wne.utils import get_logger
from ml_investing_wne.data_engineering.crypto_factory import CryptoFactory
from ml_investing_wne.data_engineering.prepare_dataset import prepare_processed_dataset
random.seed(config.seed)
np.random.seed(config.seed)
tf.random.set_seed(config.seed)

logger = get_logger()



In [2]:

if config.RUN_TYPE == 'forex':
    if config.provider == 'hist_data':
        df = get_hist_data(currency=config.currency)
    else:
        logger.error('not implemented')
elif config.RUN_TYPE == 'crypto':
    crypto = CryptoFactory(config.provider, config.currency)
    # crypto.generate_volumebars(frequency=2000)
    crypto.time_aggregation(freq='60min')
    # df = crypto.df_time_aggregated
    # df = crypto.df_volume_bars
    crypto.run_3_barriers()
    df = crypto.df_3_barriers
    logger.info(f'df head: {df.head()}')
df = prepare_processed_dataset(df=df, add_target=False)


2022-12-07 16:53:41,685 - ml_investing_wne.data_engineering.crypto_factory - head of raw dataset:                 q        p       s              t           d  side  \
3513833  0.183071  5015.92  btcusd  1554296882802  2019-04-03     0   
3513834  0.001000  5014.88  btcusd  1554296895833  2019-04-03     1   
3513835  0.057873  5015.92  btcusd  1554296897405  2019-04-03     0   
3513836  0.012127  5015.92  btcusd  1554296897501  2019-04-03     0   
3513837  0.009431  5014.91  btcusd  1554296900647  2019-04-03     1   

             buy_id     sell_id                datetime  
3513833  3068695470  3068694863 2019-04-03 13:08:02.802  
3513834  3068694353  3068696446 2019-04-03 13:08:15.833  
3513835  3068696567  3068695586 2019-04-03 13:08:17.405  
3513836  3068696567  3068695628 2019-04-03 13:08:17.501  
3513837  3068696503  3068696746 2019-04-03 13:08:20.647  
2022-12-07 16:55:04,854 - root - number of ties: 269, share: 0.012f
2022-12-07 16:55:19,065 - root - df head:                  

In [3]:
logger.info(f' df shape before merge wiith 3 barriers additional info is {df.shape}')
df = df.merge(crypto.df_3_barriers_additional_info[['datetime', 'time_step']], on='datetime', how='inner')
logger.info(f' df shape after merge wiith 3 barriers additional info is {df.shape}')

2022-12-07 16:55:20,471 - root -  df shape before merge wiith 3 barriers additional info is (22508, 41)
2022-12-07 16:55:20,511 - root -  df shape after merge wiith 3 barriers additional info is (22508, 43)


In [4]:
X, y, X_val, y_val, X_test, y_test, y_cat, y_val_cat, y_test_cat, _ = train_test_val_split(df, 
                         nb_classes=config.nb_classes, freq=config.freq,
                         seq_len=config.seq_len, steps_ahead=config.steps_ahead,
                         train_end=config.train_end, val_end=config.val_end,
                         test_end=config.test_end, binarize_target=False, time_step=False)

Feature names unseen at fit time:
- level_0
Feature names seen at fit time, yet now missing:
- index

Feature names unseen at fit time:
- level_0
Feature names seen at fit time, yet now missing:
- index

2022-12-07 16:55:56,183 - ml_investing_wne.train_test_val_split - first sequence begins: 2019-04-05 00:00:00
2022-12-07 16:55:56,184 - ml_investing_wne.train_test_val_split - first sequence ends: 2019-04-05 23:00:00
2022-12-07 16:55:56,277 - ml_investing_wne.train_test_val_split - last sequence begins: 2021-03-30 00:00:00
2022-12-07 16:55:56,279 - ml_investing_wne.train_test_val_split - last sequence ends: 2021-03-30 23:00:00
2022-12-07 16:55:56,352 - ml_investing_wne.train_test_val_split - first sequence begins: 2021-03-30 01:00:00
2022-12-07 16:55:56,353 - ml_investing_wne.train_test_val_split - first sequence ends: 2021-03-31 00:00:00
2022-12-07 16:55:56,365 - ml_investing_wne.train_test_val_split - last sequence begins: 2021-06-30 00:00:00
2022-12-07 16:55:56,366 - ml_investing_wne

In [5]:
X.shape

(16724, 24, 42)

In [6]:
mlflow.tensorflow.autolog()
mlflow.set_experiment(experiment_name=get_ml_flow_experiment_name())
callbacks = get_callbacks()
model = model_factory(X)
history = model.fit(X, y_cat, batch_size=config.batch, epochs=15, verbose=2,
                    validation_data=(X_val, y_val_cat), callbacks=callbacks)
model.save(get_final_model_path())

2022-12-07 16:56:10.011550: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022/12/07 16:56:10 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '048462c36e0643d5af991dfa4bc9f55e', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow
2022-12-07 16:56:10.566231: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2022-12-07 16:56:10.566251: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2022-12-07 16:56:10.566376: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2022-12-07 16:56:10.730818: I tensorflow/compiler/mlir/ml

Epoch 1/15


2022-12-07 16:56:13.431945: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2022-12-07 16:56:13.431968: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2022-12-07 16:56:13.464961: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data.
2022-12-07 16:56:13.474090: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2022-12-07 16:56:13.483492: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: /var/folders/fy/kbq5nlv945s7pr1wvh78h5840000gn/T/tmpbzw2tlkm/train/plugins/profile/2022_12_07_16_56_13

2022-12-07 16:56:13.488443: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for trace.json.gz to /var/folders/fy/kbq5nlv945s7pr1wvh78h5840000gn/T/tmpbzw2tlkm/train/plugins/profile/2022_12_07_16_56_13/macC02X74LNJG5J.trace.json.gz
2022-12-07 16:56:13.509017: I tensorflow/core/profiler/rpc/client/sav

262/262 - 13s - loss: 0.6922 - accuracy: 0.5688 - val_loss: 0.6982 - val_accuracy: 0.5507

Epoch 00001: val_accuracy improved from -inf to 0.55072, saving model to /Users/i0495036/Documents/sandbox/ml_investing_wne/ml_investing_wne/src/ml_investing_wne/models/resnet_Bitstamp_BTCUSD_60min_1.h5
Epoch 2/15
262/262 - 8s - loss: 0.6499 - accuracy: 0.6232 - val_loss: 0.7349 - val_accuracy: 0.5122

Epoch 00002: val_accuracy did not improve from 0.55072
Epoch 3/15
262/262 - 7s - loss: 0.6161 - accuracy: 0.6590 - val_loss: 0.7660 - val_accuracy: 0.5457

Epoch 00003: val_accuracy did not improve from 0.55072
Epoch 4/15
262/262 - 7s - loss: 0.5798 - accuracy: 0.6939 - val_loss: 0.7946 - val_accuracy: 0.5380

Epoch 00004: val_accuracy did not improve from 0.55072
Epoch 5/15
262/262 - 7s - loss: 0.5349 - accuracy: 0.7276 - val_loss: 0.8694 - val_accuracy: 0.4977

Epoch 00005: val_accuracy did not improve from 0.55072
Epoch 6/15
262/262 - 7s - loss: 0.4977 - accuracy: 0.7581 - val_loss: 0.8574 - val

2022-12-07 16:58:36.991733: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: /var/folders/fy/kbq5nlv945s7pr1wvh78h5840000gn/T/tmpfej5va63/model/data/model/assets


2022-12-07 16:58:40,749 - tensorflow - Assets written to: /var/folders/fy/kbq5nlv945s7pr1wvh78h5840000gn/T/tmpfej5va63/model/data/model/assets


INFO:tensorflow:Assets written to: /Users/i0495036/Documents/sandbox/ml_investing_wne/ml_investing_wne/src/ml_investing_wne/models/production/resnet_Bitstamp_BTCUSD_60min_1_24/assets


2022-12-07 16:58:58,658 - tensorflow - Assets written to: /Users/i0495036/Documents/sandbox/ml_investing_wne/ml_investing_wne/src/ml_investing_wne/models/production/resnet_Bitstamp_BTCUSD_60min_1_24/assets


In [7]:
df['cost'] = (config.pips / 10000) / df['close']

In [8]:
import joblib
def load_test_dates():

    name = f'test_{config.currency}_{config.freq}.save'

    start_date = joblib.load(os.path.join(config.package_directory, 'models',
                                        f'first_sequence_ends_{name}'))
    end_date = joblib.load(os.path.join(config.package_directory, 'models',
                                        f'last_sequence_ends_{name}'))

    return start_date, end_date

In [9]:
test_loss, test_acc = model.evaluate(X_test, y_test_cat)
logger.info('Test accuracy : %.4f', test_acc)
logger.info('Test loss : %.4f', test_loss)
mlflow.log_metric("test_acc", test_acc)
mlflow.log_metric("test_loss", test_loss)
mlflow.log_metric("test_loss", test_loss)
mlflow.set_tag('currency', config.currency)
mlflow.set_tag('frequency', config.freq)
mlflow.set_tag('steps_ahead', config.steps_ahead)
mlflow.log_metric('y_distribution', y.mean())
mlflow.log_metric('y_val_distribution', y_val.mean())
mlflow.log_metric('y_test_distribution', y_test.mean())
mlflow.log_metric('cost', config.pips)
mlflow.log_metric('seq_len', config.seq_len)
y_pred = model.predict(X_test)
y_pred_class = y_pred.argmax(axis=-1)
start_date, end_date = load_test_dates()
lower_bounds = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
upper_bounds = [1 - lower for lower in lower_bounds]



2022-12-07 16:59:04,275 - root - Test accuracy : 0.5436
2022-12-07 16:59:04,277 - root - Test loss : 1.2532


In [8]:
df.head()

Unnamed: 0,datetime,open,close,high,low,y_pred,SMA_3,EMA_3,VAR_3,SMA_5,...,BBB_5_2.0,BBP_5_2.0,roc_1,hour,weekday,hour_sin,hour_cos,weekday_sin,weekday_cos,cost
0,2019-04-05 00:00:00,4911.14,4944.02,4964.68,4911.14,1,4911.03,4920.684999,995.9491,4902.774,...,2.644449,0.81813,1.007351,0,4,0.0,1.0,-0.866025,-0.5,0.0
1,2019-04-05 01:00:00,4941.19,4914.04,4950.36,4909.6,1,4922.0,4917.362499,372.9628,4914.92,...,1.705424,0.489501,0.993936,1,4,0.269797,0.962917,-0.866025,-0.5,0.0
2,2019-04-05 02:00:00,4911.47,4924.97,4929.59,4893.62,1,4927.676667,4921.16625,230.194633,4914.42,...,1.683017,0.627553,1.002224,2,4,0.519584,0.854419,-0.866025,-0.5,0.0
3,2019-04-05 03:00:00,4926.97,4890.92,4929.43,4883.83,1,4909.976667,4906.043125,302.233633,4916.378,...,1.43835,0.13999,0.993086,3,4,0.730836,0.682553,-0.866025,-0.5,0.0
4,2019-04-05 04:00:00,4893.68,4946.53,4958.88,4893.18,1,4920.806667,4926.286562,786.118033,4924.096,...,1.665888,0.773486,1.01137,4,4,0.887885,0.460065,-0.866025,-0.5,0.0


In [9]:
prediction = df.loc[(df.datetime >= start_date) & (df.datetime <= end_date)]
prediction

Unnamed: 0,datetime,open,close,high,low,y_pred,SMA_3,EMA_3,VAR_3,SMA_5,...,BBB_5_2.0,BBP_5_2.0,roc_1,hour,weekday,hour_sin,hour_cos,weekday_sin,weekday_cos,cost
18735,2021-07-01 00:00:00,35059.71,34722.86,35071.34,34722.86,0,34876.606667,34820.805209,24742.590621,34805.520,...,1.691118,0.359566,0.991028,0,3,0.000000e+00,1.000000,1.224647e-16,-1.0,0.0
18736,2021-07-01 01:00:00,34742.43,34957.77,35039.0,34742.43,0,34905.953333,34889.287605,26720.849421,34877.732,...,1.274744,0.680022,1.006765,1,3,2.697968e-01,0.962917,1.224647e-16,-1.0,0.0
18737,2021-07-01 02:00:00,34933.7,34314.85,34940.87,34224.27,0,34665.160000,34602.068802,105833.499087,34780.488,...,2.934141,0.04372,0.981609,2,3,5.195840e-01,0.854419,1.224647e-16,-1.0,0.0
18738,2021-07-01 03:00:00,34323.34,34130.0,34419.21,34130.0,0,34467.540000,34366.034401,188786.470287,34632.542,...,4.103620,0.146393,0.994613,3,3,7.308360e-01,0.682553,1.224647e-16,-1.0,0.0
18739,2021-07-01 04:00:00,34116.87,34342.53,34343.58,34053.76,0,34262.460000,34354.282201,13350.784287,34493.602,...,3.498151,0.374799,1.006227,4,3,8.878852e-01,0.460065,1.224647e-16,-1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22233,2021-11-25 19:00:00,59170.62,58915.91,59309.25,58723.51,0,59097.673333,59020.969800,25738.502241,59095.446,...,0.706772,0.070148,0.995915,19,3,-8.878852e-01,0.460065,1.224647e-16,-1.0,0.0
22234,2021-11-25 20:00:00,58910.1,58917.7,59149.98,58864.54,0,58997.060000,58969.334900,19323.396108,59052.818,...,0.833842,0.225597,1.00003,20,3,-7.308360e-01,0.682553,1.224647e-16,-1.0,0.0
22235,2021-11-25 21:00:00,58895.83,58863.09,59109.68,58757.34,0,58898.900000,58916.212450,962.568108,59014.762,...,0.979998,0.237747,0.999073,21,3,-5.195840e-01,0.854419,1.224647e-16,-1.0,0.0
22236,2021-11-25 22:00:00,58866.49,58937.88,58952.38,58586.64,0,58906.223333,58927.046225,1497.171441,58958.430,...,0.696096,0.449928,1.001271,22,3,-2.697968e-01,0.962917,1.224647e-16,-1.0,0.0


In [10]:
crypto.df_3_barriers_additional_info.reset_index(inplace=True)

In [11]:
prediction = prediction.merge(crypto.df_3_barriers_additional_info, on='datetime', how='left')
prediction['y_pred'] = prediction['prc_change']

In [12]:
prediction['y_pred'] = prediction['prc_change']

In [13]:
upper_bound = 0.6
lower_bound = 0.4


# recreate target as continous variable
# df['y_pred'] = df['close'].shift(-config.steps_ahead) / df['close'] - 1
# new_start = config.val_end + config.seq_len * datetime.timedelta(minutes=int(''.join(filter(str.isdigit, config.freq))))

if config.provider == 'hist_data':
    prediction['datetime_local'] = prediction['datetime'].dt.tz_localize('US/Eastern').dt.tz_convert(
        'Europe/London').dt.tz_localize(None)
else:
    prediction['datetime_local'] = prediction['datetime']
prediction['hour_local'] = prediction['datetime_local'].dt.time
prediction['prediction'] = y_pred[:, 1]
conditions = [
    (prediction['prediction'] <= lower_bound),
    (prediction['prediction'] > lower_bound) & (prediction['prediction'] <= upper_bound),
    (prediction['prediction'] > upper_bound)
]
values = [0, 0.5, 1]
prediction['trade'] = np.select(conditions, values)

prediction.reset_index(inplace=True)
# drop last row for which we don't have a label - this works only for one step ahead prediction
#prediction.drop(prediction.tail(1).index, inplace=True)
 

In [16]:
prediction.loc[1046]

index                                  1046
datetime                2021-08-13 20:00:00
open                                46467.5
close                              47604.28
high                               47929.18
low                                46434.79
y_pred                             0.001233
SMA_3                          46866.363333
EMA_3                          47048.488187
VAR_3                         409279.093228
SMA_5                             46737.638
EMA_5                          46856.989847
VAR_5                         236007.349875
SMA_10                             46582.25
EMA_10                         46588.083012
VAR_10                        142578.692605
SMA_13                         46506.082308
EMA_13                         46450.170706
VAR_13                        128800.101933
SMA_20                           46126.8435
EMA_20                         46186.182523
VAR_20                        422734.722031
MACD_12_26_9                    

In [5]:
import datetime

In [6]:
crypto.df_3_barriers[crypto.df_3_barriers.index>datetime.datetime(2021,8,14,4,0,0)]

Unnamed: 0_level_0,open,close,high,low,y_pred
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-08-14 05:00:00,47492.5,47570.06,47607.07,47456.61,1
2021-08-14 06:00:00,47555.3,47550.0,47703.49,47532.9,1
2021-08-14 07:00:00,47550.0,47663.03,47763.88,47526.0,1
2021-08-14 08:00:00,47661.99,47638.67,47739.43,47560.47,1
2021-08-14 09:00:00,47660.18,47000.0,48190.0,46317.05,0
...,...,...,...,...,...
2021-11-26 07:00:00,57585.33,56997.67,57672.26,56661.09,
2021-11-26 08:00:00,56960.23,55251.91,56960.23,54377.58,
2021-11-26 09:00:00,55218.27,54917.08,55514.82,54619.36,
2021-11-26 10:00:00,54800.0,54412.14,54950.0,54356.43,


In [36]:
crypto.df_3_barriers.y_pred.value_counts()

0    11907
1    11042
Name: y_pred, dtype: int64

In [9]:
df.loc[df['datetime']>datetime.datetime(2021,8,14,1,0,0)]

Unnamed: 0,datetime,open,close,high,low,y_pred,SMA_3,EMA_3,VAR_3,SMA_5,...,BBU_5_2.0,BBB_5_2.0,BBP_5_2.0,roc_1,hour,weekday,hour_sin,hour_cos,weekday_sin,weekday_cos
20013,2021-08-14 02:00:00,47609.86,47563.72,47661.84,47428.17,0,47605.340000,47595.933253,1982.990795,47655.730,...,47868.198356,0.891680,0.283474,0.999238,2,5,5.195840e-01,0.854419,-8.660254e-01,0.5
20014,2021-08-14 03:00:00,47546.99,47585.89,47625.0,47444.32,1,47583.203333,47590.911626,334.473229,47652.498,...,47868.594935,0.906970,0.345884,1.000466,3,5,7.308360e-01,0.682553,-8.660254e-01,0.5
20015,2021-08-14 04:00:00,47556.17,47487.9,47702.14,47471.02,1,47545.836667,47539.405813,2640.370229,47577.962,...,47685.251005,0.451003,0.080283,0.997941,4,5,8.878852e-01,0.460065,-8.660254e-01,0.5
20016,2021-08-14 05:00:00,47492.5,47570.06,47607.07,47456.61,1,47547.950000,47554.732907,2767.149095,47561.514,...,47639.346040,0.327290,0.5549,1.00173,5,5,9.790841e-01,0.203456,-8.660254e-01,0.5
20017,2021-08-14 06:00:00,47555.3,47550.0,47703.49,47532.9,1,47535.986667,47552.366453,1834.846529,47551.514,...,47619.181913,0.284609,0.488813,0.999578,6,5,9.976688e-01,-0.068242,-8.660254e-01,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22503,2021-11-25 20:00:00,58910.1,58917.7,59149.98,58864.54,0,58997.060000,58969.334900,19323.396108,59052.818,...,59299.021470,0.833842,0.225597,1.00003,20,3,-7.308360e-01,0.682553,1.224647e-16,-1.0
22504,2021-11-25 21:00:00,58895.83,58863.09,59109.68,58757.34,0,58898.900000,58916.212450,962.568108,59014.762,...,59303.933644,0.979998,0.237747,0.999073,21,3,-5.195840e-01,0.854419,1.224647e-16,-1.0
22505,2021-11-25 22:00:00,58866.49,58937.88,58952.38,58586.64,0,58906.223333,58927.046225,1497.171441,58958.430,...,59163.633589,0.696096,0.449928,1.001271,22,3,-2.697968e-01,0.962917,1.224647e-16,-1.0
22506,2021-11-25 23:00:00,58945.17,58990.14,59364.85,58842.0,0,58930.370000,58958.593112,4077.725708,58924.944,...,59006.811525,0.277871,0.89818,1.000887,23,3,-2.449294e-16,1.000000,1.224647e-16,-1.0


In [15]:
# INITIALIZE PORTFOLIO
budget = 100
transaction = None
triple_barrier = True
i = 0# ITERATE OVER PREDICTIONS
# cost is added once as it represents spread
while i < prediction.shape[0]:
    print(i)
    if prediction.loc[i, 'trade'] == 1:
        # add transaction cost if position changes
        if transaction != 'buy':
            budget = budget * (1 - prediction.loc[i, 'cost'])
        transaction = 'buy'
        budget = budget + budget * prediction.loc[i, 'y_pred']
        prediction.loc[i, 'budget'] = budget
        prediction.loc[i, 'transaction'] = transaction
        i = i + config.steps_ahead
    elif prediction.loc[i, 'trade'] == 0:
        # add transaction cost if position changes
        if transaction != 'sell':
            budget = budget * (1 - prediction.loc[i, 'cost'])
        transaction = 'sell'
        budget = budget + budget * (-prediction.loc[i, 'y_pred'])
        prediction.loc[i, 'budget'] = budget
        prediction.loc[i, 'transaction'] = transaction
        i = i + config.steps_ahead
    elif prediction.loc[i, 'trade'] == 0.5:
        if transaction in ['buy', 'sell']:
            # budget = budget * (1 - prediction.loc[i, 'cost']) # spread is included once in transaction costs
            transaction = None
        prediction.loc[i, 'budget'] = budget
        prediction.loc[i, 'transaction'] = transaction
        if triple_barrier:
            next_date = prediction.loc[i ,'barrier_touched_date']
            i = prediction.loc[prediction['datetime']==next_date].index[0]
        else:    
            i = i + 1 # SUMMARIZE RESULTS

hits = prediction.loc[((prediction['transaction'] == 'buy') & (prediction['y_pred'] > 0)) |
                      ((prediction['transaction'] == 'sell') & (prediction['y_pred'] < 0))].shape[0]
transactions = prediction.loc[prediction['transaction'].isin(['buy', 'sell'])].shape[0]
try:
    hits_ratio = hits / transactions
except ZeroDivisionError:
    hits_ratio = 0
share_of_time_active = round(prediction.loc[prediction['transaction'].isin(['buy', 'sell'])].shape[0] * \
                             config.steps_ahead / prediction.shape[0], 2)
logger.info('''share_of_time_active for bounds %.2f-%.2f is %.2f and hit ratio is %.4f''',
            lower_bound, upper_bound, share_of_time_active, hits_ratio)
logger.info('Portfolio result:  %.2f', budget)

0
3
6
17
19
24
25
29
31
34
37
48
55
56
57
68
70
77
78
79
80
81
92
93
94
96
98
107
114
124
130
141
143
148
159
165
167
168
175
186
192
194
205
213
216
218
229
232
233
244
249
254
260
264
267
273
275
279
281
284
291
293
298
301
305
309
311
313
319
324
325
326
327
338
343
344
347
351
353
358
360
364
368
370
374
380
382
386
390
391
392
393
394
398
405
406
412
419
422
427
428
430
438
439
441
452
455
456
467
478
481
486
492
493
494
500
511
513
514
525
531
540
544
546
547
558
564
567
572
576
580
585
586
589
591
594
595
596
597
598
599
600
601
612
614
615
616
622
630
631
632
633
634
635
636
641
642
646
655
656
667
678
686
691
700
711
712
713
715
721
732
736
737
739
743
744
750
756
757
760
761
763
768
770
781
782
783
784
786
788
790
798
801
804
815
818
820
822
825
828
839
840
841
842
843
844
845
846
847
848
849
850
852
853
854
855
856
857
858
859
860
861
864
873
875
886
894
898
903
904
905
907
912
914
918
920
930
938
939
941
951
954
965
967
970
973
984
986
992
999
1000
1007
1014
1016
1026
1028


IndexError: index 0 is out of bounds for axis 0 with size 0

In [15]:
prediction

Unnamed: 0,index,datetime,open,close,high,low,y_pred,SMA_3,EMA_3,VAR_3,...,weekday,hour_sin,hour_cos,weekday_sin,weekday_cos,cost,datetime_local,hour_local,prediction,trade
0,18678,2021-07-01 00:00:00,35059.71,34722.86,35071.34,34722.86,0,34876.606667,34820.805209,24742.590621,...,3,0.000000e+00,1.000000,1.224647e-16,-1.0,0.0,2021-07-01 00:00:00,00:00:00,0.437496,0.5
1,18679,2021-07-01 01:00:00,34742.43,34957.77,35039.0,34742.43,0,34905.953333,34889.287605,26720.849421,...,3,2.697968e-01,0.962917,1.224647e-16,-1.0,0.0,2021-07-01 01:00:00,01:00:00,0.372683,0.0
2,18680,2021-07-01 02:00:00,34933.7,34314.85,34940.87,34224.27,1,34665.160000,34602.068802,105833.499087,...,3,5.195840e-01,0.854419,1.224647e-16,-1.0,0.0,2021-07-01 02:00:00,02:00:00,0.546845,0.5
3,18681,2021-07-01 03:00:00,34323.34,34130.0,34419.21,34130.0,0,34467.540000,34366.034401,188786.470287,...,3,7.308360e-01,0.682553,1.224647e-16,-1.0,0.0,2021-07-01 03:00:00,03:00:00,0.589272,0.5
4,18682,2021-07-01 04:00:00,34116.87,34342.53,34343.58,34053.76,0,34262.460000,34354.282201,13350.784287,...,3,8.878852e-01,0.460065,1.224647e-16,-1.0,0.0,2021-07-01 04:00:00,04:00:00,0.539866,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3494,22172,2021-11-25 19:00:00,59170.62,58915.91,59309.25,58723.51,0,59097.673333,59020.969800,25738.502241,...,3,-8.878852e-01,0.460065,1.224647e-16,-1.0,0.0,2021-11-25 19:00:00,19:00:00,0.469894,0.5
3495,22173,2021-11-25 20:00:00,58910.1,58917.7,59149.98,58864.54,0,58997.060000,58969.334900,19323.396108,...,3,-7.308360e-01,0.682553,1.224647e-16,-1.0,0.0,2021-11-25 20:00:00,20:00:00,0.467702,0.5
3496,22174,2021-11-25 21:00:00,58895.83,58863.09,59109.68,58757.34,0,58898.900000,58916.212450,962.568108,...,3,-5.195840e-01,0.854419,1.224647e-16,-1.0,0.0,2021-11-25 21:00:00,21:00:00,0.455801,0.5
3497,22175,2021-11-25 22:00:00,58866.49,58937.88,58952.38,58586.64,0,58906.223333,58927.046225,1497.171441,...,3,-2.697968e-01,0.962917,1.224647e-16,-1.0,0.0,2021-11-25 22:00:00,22:00:00,0.449707,0.5


In [29]:
prediction['y_pred_2'] = [1 if y > 0 else 0 for y in prediction['y_pred']]

In [16]:
prediction.groupby(['y_pred','trade'])['close'].count()

y_pred  trade
0       0.0        82
        0.5      1416
        1.0       317
1       0.0        54
        0.5      1200
        1.0       430
Name: close, dtype: int64

In [24]:
(1316+525)/prediction.shape[0]

0.526150328665333

In [17]:
(82+430)/(82+317+54+430)

0.579841449603624

In [18]:
prediction.y_pred.mean()

0.4812803658188054