In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from utils.data_preprocess import *
from models import *

In [2]:
data = pd.read_csv('./data/ETHBTC-5m-data.csv')
data = data_split(data, 3)[0]

In [3]:
WINDOW_SIZE = 24

In [4]:
raw_data = DataLabeling(data, WINDOW_SIZE)
train_df, val_df, test_df = train_val_test_split(raw_data.labelled_data, train_size=0.6, val_size=0.2, test_size=0.2)
Data = DataPreprocess(train_df, val_df, test_df, window_size=WINDOW_SIZE, label_size=1, label_columns=['Label'], shift=0, batch_size=8)

In [5]:
train_data = Data.train.prefetch(tf.data.AUTOTUNE)
val_data = Data.val.prefetch(tf.data.AUTOTUNE)

In [6]:
strong_model = CDT_1D_model(WINDOW_SIZE, 5).model

In [7]:
strong_model.fit(train_data,
                 epochs=1000,
                 validation_data=val_data,
                 steps_per_epoch=len(train_data),
                 validation_steps=int(0.15 * len(val_data)),
                 callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10),
                            tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.2)])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000

In [16]:
strong_model.evaluate(Data.test)



[0.43261733651161194, 0.8864508867263794]

In [18]:
weak_model = MLP_model(WINDOW_SIZE, 5).model
weak_model.fit(train_data,
                 epochs=1000,
                 validation_data=val_data,
                 steps_per_epoch=len(train_data),
                 validation_steps=int(0.15 * len(val_data)),
                 callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10),
                            tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.2),
                            create_model_checkpoint(model_name=weak_model.name, save_path='data_without_TIs')])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000


<keras.callbacks.History at 0x18f894e6a08>

In [19]:
weak_model.evaluate(Data.test)



[0.432639479637146, 0.8864508867263794]

In [1]:
from tensorflow.keras import Sequential, layers, Model, Input
def model():
    input = Input(shape=[24, 5])
    x = layers.LSTM(300, return_sequences=True)(input)
    x = layers.LSTM(200, return_sequences=True)(x)
    x = layers.LSTM(100, return_sequences=True)(x)
    x = layers.LSTM(50, return_sequences=True)(x)
    x = layers.LSTM(20)(x)
    x = layers.Dense(500, activation='relu')(x)
    x = layers.Dropout(0.7)(x)
    output = layers.Dense(3, activation='softmax')(x)
    model = Model(input, output)
    
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [7]:
test = model()
test.fit(train_data, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1d0372116c8>

In [36]:
class DataLabeling:
    """
    This class will label the data with `Buy` `Sell` or `Hold` based on the dynamic threshold of log return.
    Threshold:
        if next period close price >= current close price * (1 + alpha * Volatility of last hour), tag it 'up' label
        elif next period close price <= current close price * (1 - alpha * Volatility of last hour), tag it 'down' label
        else tag it with 'flat'
    Base on the three categories, we can detect the trend of the price movement.
    When the tags change from 'down' to 'up' or 'flat' to 'up', it will enter a long trade with a tag of 'Buy',
    and the tags change from 'up' to 'down' or 'flat' to 'down', it will enter a short trade with a tag of 'Sell',
    otherwise, it will do nothing with a tag of 'Hold'.
    
    Also, it will automatically add features of technical indicators for you.
    The TIs are based on TA-lib
    """
    def __init__(self, data, window_size, alpha=0.55):
        # initialize data and parameters
        self.data = data.set_index('Timestamp').loc[:, ['Open', 'High', 'Low', 'Close', 'Volume']]
        self.data = self.data.set_index('Timestamp')
        self.data.index = pd.to_datetime(self.data.index)
        
        self.__alpha = alpha
        self.__window_size = window_size
    
    def __make_label(self, data):
        # Setup a Threshold for Buy, Sell, Hold Label
        data['STD'] = data.Close.rolling(self.__window_size).std()
        data['Next_Close'] = data.Close.shift(-self.__window_size)
        data = data.fillna(0)
        data = data.assign(Trend=data.apply(self.__func_2, axis=1))
        # data['Trend'] = np.where(data.Next_Close >= data.Close*(1+self.__alpha*data.STD), 1,
        #                               np.where(data.Next_Close <= data.Close*(1-self.__alpha*data.STD), 2, 0))
        data['Previous_Trend'] = data.Trend.shift(fill_value=0)
        # A cursed method
        # for i in range(len(data)):
        #     if data['Trend'][i] == 0:
        #         data['Label'] = 'Hold'
                
        #     elif data['Trend'][i] == 1:
        #         if data['Previous_Trend'][i] == 0:
        #             data['Label'] = 'Buy'
        #         elif data['Previous_Trend'][i] == -1:
        #             data['Label'] = 'Buy'
        #         else:
        #             data['Label'] = 'Hold'
            
        #     elif data['Trend'][i] == -1:
        #         if data['Previous_Trend'][i] == 0:
        #             data['Label'] = 'Sell'
        #         elif data['Previous_Trend'][i] == -1:
        #             data['Label'] = 'Hold'
        #         else:
        #             data['Label'] = 'Sell'
        
        # data['Label'] = np.where(data.Trend > data.Previous_Trend, 'Buy',
        #                          np.where(data.Trend < data.Previous_Trend, 'Sell', 'Hold'))
        # There will be problems using this function under some situations,  like Trend is 0 and Previous_Trend is -1, so the label will be `Buy`,
        # however, if the next Trend is -1, it says that the price is keeping going down, so you actually should not buy at that time.
        data = data.assign(Label=data.apply(self.__func, axis=1))
        data = data.dropna().drop(['Next_Close','STD','Trend', 'Previous_Trend'], axis=1)
        
        # Normalized the data
        scaler = MinMaxScaler()
        data.iloc[:, :-1] = scaler.fit_transform(data.iloc[:, :-1])
        return data
    
    def __func(self, df):
        if (df['Trend'] == 0) or (df['Trend'] == 1 and df['Previous_Trend'] == 1) or (df['Trend'] == 2 and df['Previous_Trend'] == 2):
            return 0
        elif (df['Trend'] == 1) and (df['Previous_Trend'] == 0 or 2):
            return 1
        elif (df['Trend'] == 2) and (df['Previous_Trend'] == 0 or 1):
            return 2
    
    def __func_2(self, df):
        if df.Next_Close >= df.Close*(1+self.__alpha*df.STD):
            return 1
        elif df.Next_Close <= df.Close*(1-self.__alpha*df.STD):
            return 2
        else:
            return 0
    
    @property
    def labelled_data(self):
        return self.__make_label(self.data)

In [37]:
from backtesting import Backtest, Strategy
from backtesting.lib import SignalStrategy

In [2]:
class Threshold(Strategy):
    
    window_size = 24
    alpha = 0.55
    
    def init(self):
        super().init()
        close = pd.Series(self.data.Close)
        next_close = close.shift(fill_value=0)
        # trend = 
        
    
    def next(self):
        if (self.data['Trend'] == 0) or (self.data['Trend'] == 1 and self.data['Previous_Trend'] == 1) or (self.data['Trend'] == 2 and self.data['Previous_Trend'] == 2):
            pass
        elif (self.data['Trend'] == 1) and (self.data['Previous_Trend'] == 0 or 2):
            self.buy()
        elif (self.data['Trend'] == 2) and (self.data['Previous_Trend'] == 0 or 1):
            self.sell()
    
    def __func_2(self, df):
        if df.Next_Close >= df.Close*(1+self.__alpha*df.STD):
            return 1
        elif df.Next_Close <= df.Close*(1-self.__alpha*df.STD):
            return 2
        else:
            return 0

In [30]:
import pandas as pd
df = pd.read_csv('data\ETHBTC-5m-data.csv').iloc[:, :6]
df = df.set_index('Timestamp')
df.index = pd.to_datetime(df.index)
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-07-14 04:00:00,0.080000,0.080000,0.080000,0.080000,0.7260
2017-07-14 04:05:00,0.080000,0.080001,0.080000,0.080001,3.3470
2017-07-14 04:10:00,0.080001,0.086400,0.080001,0.086400,4.6790
2017-07-14 04:15:00,0.085289,0.085620,0.085128,0.085128,53.4310
2017-07-14 04:20:00,0.085274,0.086000,0.085274,0.086000,5.5760
...,...,...,...,...,...
2021-11-10 12:55:00,0.070895,0.070950,0.070834,0.070935,196.3651
2021-11-10 13:00:00,0.070934,0.070958,0.070842,0.070907,193.0658
2021-11-10 13:05:00,0.070908,0.071010,0.070894,0.070997,143.4881
2021-11-10 13:10:00,0.070998,0.071052,0.070967,0.071047,106.6444


In [31]:
test = Backtest(df, Threshold, cash=100, commission=0.0005)

In [32]:
result = test.run()

AttributeError: '_Array' object has no attribute 'rolling'

In [34]:
a = df.Close.values

In [35]:
a.rolling()

AttributeError: 'numpy.ndarray' object has no attribute 'rolling'