In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install talib-binary
!pip install tqdm
!pip install backtrader
!pip install shap
!pip install linetimer 

In [None]:
%load_ext tensorboard
%load_ext autoreload
%autoreload 2

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import pdb
import xgboost as xgb
import sklearn as skl
import sklearn.multioutput as skmo
import sklearn.metrics as metrics
import sklearn.utils as utils
import datetime
import time
import plotly.graph_objects as go
import keras
import talib as ta
import matplotlib.pyplot as plt
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
import tqdm
import os
import backtrader as bt
from datetime import datetime
import shap
from keras.utils.np_utils import to_categorical  
import random
from tqdm.contrib import tzip

%matplotlib inline 

In [None]:
import os
os.chdir("/content/drive/MyDrive/Colab Notebooks/ML trading/ml_trading_lib")

In [None]:
from ml_trading_lib.dataframe_preparator import DataframePreparator
from ml_trading_lib.dataframe_splitter import DataframeSplitter
from ml_trading_lib.price_normalizator import PriceNormalizator
from ml_trading_lib.before_modeling_preparation import BeforeModelingPreparation
from ml_trading_lib.modeling import Modeling, MyCallback, SaveCallback, ImprovedCCE
from ml_trading_lib.post_modeling_analysis import PostModelingAnalysis
from ml_trading_lib.backtrader_strategy import MyStrategy1

In [None]:
config = {}

In [None]:
config['capital'] = 5000
config['profit'] = 5
config['taker_fee'] = 0.0016
config['maker_fee'] = 0.0016
config['opening_fee'] = 0.0002
config['rollover_fee'] = 0.0002

config['five_minutes_in_day'] = 288

config['train_size'] = 110000
config['test_size'] = 12002
config['bars'] = 50
config['last_x_bars'] = 15

config['last_x_bars_for_level1'] = 15
config['last_x_bars_for_level2'] = 30

config['validation_days'] = int(config['test_size'] / config['five_minutes_in_day'])

In [None]:
config['columns'] = DataframePreparator.get_cols()

In [None]:
config['columns'].append('volume')
config['columns'].append('num_trades')

In [None]:
config['pairs'] = 10
config['pairs']

10

#GCP config

In [None]:
!gcloud auth login
!gcloud auth application-default login

In [None]:
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/content/.config/application_default_credentials.json'

In [None]:
!export GOOGLE_APPLICATION_CREDENTIALS=/content/.config/application_default_credentials.json

In [None]:
filename = 'gs://trading/test/example.tfrecords'

# Load data from disk and load to GCP

In [None]:
def load_data(path, train=True):
    paths = []
    for root, dirs, files in os.walk(path):
      for dir in dirs:
          paths.append(f'{path}/{dir}')

    f = None
    f2 = None
    t = None

    prefix = '' if train else '_valid'

    for df_path in tqdm.tqdm(paths):
      f_l = np.load(f'{df_path}/f{prefix}.npy')
      f2_l = np.load(f'{df_path}/f2{prefix}.npy')
      t_l = np.load(f'{df_path}/t{prefix}.npy')

      f_l = f_l.reshape((f_l.shape[0], 10, len(config['columns']), 5, 1))

      if f is None:
        f = f_l
        f2 = f2_l
        t = t_l
      else:
        f = np.append(f, f_l, axis=0)
        f2 = np.append(f2, f2_l, axis=0)
        t = np.append(t, t_l, axis=0)


    res = ( 
        f,
        f2,
        t
    )

    return res

In [None]:
f, f2, t = load_data('/content/drive/MyDrive/Colab Notebooks/ML trading/generator_data')

100%|██████████| 10/10 [01:32<00:00,  9.25s/it]


In [None]:
f_valid, f2_valid, t_valid = load_data('/content/drive/MyDrive/Colab Notebooks/ML trading/generator_data', False)

100%|██████████| 10/10 [00:17<00:00,  1.75s/it]


In [None]:
class_weights = utils.class_weight.compute_class_weight('balanced', classes=[0,1,2], y=t_valid)
class_weights = {0: class_weights[0], 1: class_weights[1], 2: class_weights[2]}
class_weights

{0: 0.4566319997859597, 1: 2.402324505841427, 2: 2.5394294945372615}

In [None]:
f, f2, t = f_valid, f2_valid, t_valid

In [None]:
f_re = f.reshape((f.shape[0], 1050))

In [None]:
features_len = f_re[0].shape[0]
features_len

1050

In [None]:
features2_len = f2.shape[1]
features2_len

58

In [None]:
def get_example_object(feature, feature2, label):
  feature_key_value_pair = {}
  feature_key_value_pair[f'feature'] = tf.train.Feature(float_list = tf.train.FloatList(value = feature)) 
  feature_key_value_pair[f'feature2'] = tf.train.Feature(float_list = tf.train.FloatList(value = feature2)) 

  feature_key_value_pair['label'] = tf.train.Feature(int64_list = tf.train.Int64List(value = [label])) 

  features = tf.train.Features(feature = feature_key_value_pair)
  example = tf.train.Example(features = features)

  return example.SerializeToString()

In [None]:
with tf.io.TFRecordWriter(filename) as writer:
  for feature, feature2, label in tzip(f_re, f2, t):
    example = get_example_object(feature, feature2, label)

    writer.write(example)

  0%|          | 0/119470 [00:00<?, ?it/s]

# Prepare TFRecord

In [None]:
def extract_fn(data_record):
  feature_key_value_pair = {}
  feature_key_value_pair[f'feature'] = tf.io.FixedLenFeature([features_len], tf.float32) 
  feature_key_value_pair[f'feature2'] = tf.io.FixedLenFeature([features2_len], tf.float32) 

  label_key_value_pair = {'label': tf.io.FixedLenFeature([], tf.int64)}
  
  sample = tf.io.parse_single_example(data_record, feature_key_value_pair)
  label_t = tf.io.parse_single_example(data_record, label_key_value_pair)

  sample['feature'] = tf.reshape(sample['feature'], tf.stack([10, 21, 5, 1]))
  label_t1 = {'label': tf.one_hot(label_t['label'], 3)}
  return sample, label_t1

In [None]:
dataset = tf.data.TFRecordDataset([filename])
dataset = dataset.map(extract_fn)
dataset = dataset.batch(3000)

In [None]:
valid_filename = 'gs://trading/test/example_valid.tfrecords'

In [None]:
dataset_valid = tf.data.TFRecordDataset([valid_filename])
dataset_valid = dataset_valid.map(extract_fn)
dataset_valid = dataset_valid.batch(3000)

# Init TPU

In [None]:
def connect_to_tpu(tpu_address: str = None):
    if tpu_address is not None:  # When using GCP
        cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
            tpu=tpu_address)
        if tpu_address not in ("", "local"):
            tf.config.experimental_connect_to_cluster(cluster_resolver)
        tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
        strategy = tf.distribute.experimental.TPUStrategy(cluster_resolver)
        print("Running on TPU ", cluster_resolver.master())
        print("REPLICAS: ", strategy.num_replicas_in_sync)
        return cluster_resolver, strategy
    else:                           # When using Colab or Kaggle
        try:
            cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
            strategy = tf.distribute.experimental.TPUStrategy(cluster_resolver)
            print("Running on TPU ", cluster_resolver.master())
            print("REPLICAS: ", strategy.num_replicas_in_sync)
            return cluster_resolver, strategy
        except:
            print("WARNING: No TPU detected.")
            mirrored_strategy = tf.distribute.MirroredStrategy()
            return None, mirrored_strategy

In [None]:
_, strategy = connect_to_tpu()



Running on TPU  grpc://10.114.32.178:8470
REPLICAS:  8


# Training

In [None]:
sample_weights = utils.class_weight.compute_sample_weight(class_weights, t_valid)
sample_weights

array([0.456632, 0.456632, 0.456632, ..., 0.456632, 0.456632, 0.456632])

In [None]:
model = Modeling.get_convlstm(config['bars'], len(config['columns']), strategy)

In [None]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 feature (InputLayer)           [(None, 10, 21, 5,   0           []                               
                                1)]                                                               
                                                                                                  
 conv_lstm2d_4 (ConvLSTM2D)     (None, 10, 21, 5, 1  198656      ['feature[0][0]']                
                                28)                                                               
                                                                                                  
 dropout_5 (Dropout)            (None, 10, 21, 5, 1  0           ['conv_lstm2d_4[0][0]']          
                                28)                                                         

In [None]:
model_path = 'gs://trading/models/model'

In [None]:
# model = Modeling.load_model(model_path, strategy, True)
model

<keras.engine.functional.Functional at 0x7fe0c540b890>

In [None]:
callback = MyCallback([8,9], f_valid, f2_valid, t_valid, config)

In [None]:
nn_name = 'convlstm_10_21_5_1'
log_path = 'gs://trading/logs'

In [None]:
dt_now = datetime.now().strftime("%Y_%m_%d-%H:%M:%S")
logdir = os.path.join(log_path, f'{nn_name}_{dt_now}')
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)
file_writer = tf.summary.create_file_writer(logdir + "/metrics")
file_writer.set_as_default()

In [None]:
save_callback = SaveCallback(model_path, True)

In [None]:
%tensorboard --logdir 'gs://trading/logs/'

In [None]:
history = model.fit(dataset, validation_data=dataset_valid, epochs=300, callbacks=[callback, tensorboard_callback, save_callback])

# Unit tests

## Calculate amount

In [None]:
assert DataframeSplitter.long_calculate_amount(buy_price=100, take_profit_sell_price=200, profit=100, taker_fee=0, maker_fee=0) == 1

In [None]:
assert DataframeSplitter.long_calculate_amount(buy_price=100, take_profit_sell_price=200, profit=1000, taker_fee=0, maker_fee=0) == 10

In [None]:
assert DataframeSplitter.short_calculate_amount(sell_price=200, take_profit_buy_price=100, profit=100, taker_fee=0, maker_fee=0) == 1

In [None]:
assert DataframeSplitter.short_calculate_amount(sell_price=200, take_profit_buy_price=100, profit=1000, taker_fee=0, maker_fee=0) == 10

In [None]:
buy_price = 100
sell_price = 200
taker_fee = 0.2
maker_fee = 0.3
amount = 50

profit = ((amount * sell_price) - (amount * sell_price * maker_fee) - ((amount * buy_price) + (amount * buy_price * taker_fee)))

In [None]:
assert DataframeSplitter.long_calculate_amount(buy_price, sell_price, profit, taker_fee, maker_fee) == amount

In [None]:
assert DataframeSplitter.short_calculate_amount(sell_price, buy_price, profit, taker_fee, maker_fee) == amount

## Calculate stop loss

In [None]:
buy_price = 200
sell_price = 340
taker_fee = 0.2
maker_fee = 0.3
amount = 50

loss = (amount * sell_price) - (amount * sell_price * maker_fee) - ((amount * buy_price) + (amount * buy_price * taker_fee))
loss

-100.0

In [None]:
assert DataframeSplitter.long_calculate_stop_loss(buy_price, amount, -loss, taker_fee, maker_fee) == sell_price

In [None]:
buy_price = 124.61538461538461
sell_price = 200
taker_fee = 0.2
maker_fee = 0.3
amount = 50

loss = (amount * sell_price) - (amount * sell_price * taker_fee) - ((amount * buy_price) + (amount * buy_price * maker_fee))
loss

-100.0

In [None]:
assert DataframeSplitter.short_calculate_stop_loss(sell_price, amount, -loss, taker_fee, maker_fee) == buy_price

## Normalize/unnormalize price 

In [None]:
min_p = 10
max_p = 500
price = 350
assert PriceNormalizator.unnormalize_price(PriceNormalizator.normalize_price(price, min_p, max_p), min_p, max_p) == price

## features_target_split

In [None]:
df = pd.DataFrame({'open': [10.0, 10, 10, 10, 10], 'high': [1000.0, 100, 5, 10, 5000], 'low': [0, 0, 2, 5, 0], 'close': [11.0, 11, 11, 11, 11]})
assert DataframeSplitter.get_gain(df, current_bar = 4, last_x_bars = 3) == 100

## get_predictions_with_threshold

In [None]:
probs = [
       [0, 0, 1],
       [0, 0.75, 0.25],
       [1, 0, 0],
       [0, 0.5, 0.5]
]
t_valid = [2, 1, 0, 0]
assert PostModelingAnalysis.get_predictions_with_threshold(probs, 0.7) == t_valid

## get_win_rate

In [None]:
assert PostModelingAnalysis.get_win_rate(probs, t_valid, 0.7) == (1.0, 2)

In [None]:
probs = [
       [0, 0.8, 0.2],
       [0, 0.75, 0],
       [1, 0, 0],
       [0, 0.5, 0.5]
]

In [None]:
assert PostModelingAnalysis.get_win_rate(probs, t_valid, 0.7) == (0.5, 1)