# Import Libraries

In [18]:
from indicators import SMA, BBANDS
from historical_data import get_historical_data
import pandas as pd
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error as mae

In [19]:
data = get_historical_data('ETHUSD', 15, '1/1/2023', '06/01/2023', BTPY_format=True)

In [20]:

def feature_eng(data):
    close = data.Close.values
    sma10 = SMA(data.Close, 10)
    sma20 = SMA(data.Close, 20)
    sma50 = SMA(data.Close, 50)
    sma100 = SMA(data.Close, 100)
    upper, lower = BBANDS(data, 20, 2)

    # Price features
    data['X_SMA10'] = (close - sma10) / close
    data['X_SMA20'] = (close - sma20) / close
    data['X_SMA50'] = (close - sma50) / close
    data['X_SMA100'] = (close - sma100) / close

    data['X_DELTA_SMA10'] = (sma10 - sma20) / close
    data['X_DELTA_SMA20'] = (sma20 - sma50) / close
    data['X_DELTA_SMA50'] = (sma50 - sma100) / close

    # indicator features
    data['X_MOM'] = data.Close.pct_change(periods=2)
    data['X_BB_UPPER'] = (upper - close) / close
    data['X_BB_LOWER'] = (lower - close) / close
    data['X_BB_WIDTH'] = (upper - lower) / close
    # data['X_Sentiment'] = some sentiment analysis

    # datetime features
    data['X_day'] = data.index.dayofweek
    data['X_hour'] = data.index.hour

    data = data.dropna().astype(float)

    return data

In [21]:
def get_X(data):
    """Return model with designed features"""
    return data.filter(like='X').values

def get_y(data):
    """Return dependent variable y"""
    y = data.Close.pct_change(48).shift(-48)
    y[y.between(-.004, .004)] = 0
    y[y > 0] = 1
    y[y < 0] = -1
    return y

def get_clean_Xy(df):
    """Return (X, y) cleaned of NaN values"""
    X = get_X(df)
    y = get_y(df).values
    isnan = np.isnan(y)
    X = X[~isnan]
    y = y[~isnan]
    return X, y

In [22]:
fe_data_df = feature_eng(data)
X, y = get_clean_Xy(fe_data_df)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=12)

clf = KNeighborsClassifier(7) # 7 groups
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print('Classification accuracy:', np.mean(y_test == y_pred))
print('MAE', mae(y_test, y_pred) )

Classification accuracy: 0.5156054931335831
MAE 0.6762380357885975


In [23]:
from backtesting import Backtest, Strategy

N_TRAIN = 400

class MLTrainONceStrategy(Strategy):
    price_delta = .004 # 0.4%

    def init(self):
        # init out model, a KNN classifier
        self.clf = KNeighborsClassifier(7)

        # Train the classifier in advance on the first N_TRAIN example
        df = self.data.df.iloc[:N_TRAIN]
        df = feature_eng(df)
        X, y = get_clean_Xy(df)
        self.clf.fit(X, y)

        # plot y for inspection
        self.I(get_y, self.data.df, name='y_true')

        # prepare empty, all-NaN forecast indicator
        self.forcasts = self.I(lambda: np.repeat(np.nan, len(self.data)), name='forecast')

    def next(self):
        # skip the training, in-sample data
        if len(self.data) < N_TRAIN:
            return

        # Proceed only with out-of-sample data. Prepare variables
        high, low, close = self.data.High, self.data.Low, self.data.Close
        current_time = self.data.index[-1]

        # forcast the next movement
        X = get_X(self.data.df.iloc[-1:])
        forecast = self.clf.predict(X)[0]

        # update the plotted 'forecast' indicator
        self.forcasts[-1] = forecast

        # if forecast is upwards and no long position exists
        # place a long order for 20% of available equity.
        # same parameters are followed for short.
        # Set target take-profit and stop-loss prices to be one
        # price_delta away from current closing price
        upper, lower = close[-1] * (1 + np.r_[1, -1]*self.price_delta)

        if forecast == 1 and not self.position.is_long:
            self.buy(size=.2, tp=upper, sl=lower)
        elif forecast == -1 and not self.position.is_short:
            self.sell(size=.2, tp=lower, sl=upper)

        # additionally, set aggressive stop-loss on trades are open
        # for more than two days
        for trade in self.trades:
            if current_time - trade.entry_time > pd.Timedelta('2 days'):
                if trade.is_long:
                    trade.sl = max(trade.sl, low)
                else:
                    trade.sl = min(trade.sl, high)


In [24]:
bt = Backtest(data, MLTrainONceStrategy, commission=.0002, margin=0.05)
bt.run()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['X_SMA10'] = (close - sma10) / close
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['X_SMA20'] = (close - sma20) / close
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['X_SMA50'] = (close - sma50) / close
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .l

Start                     2023-01-01 00:00:00
End                       2023-06-01 23:45:00
Duration                    151 days 23:45:00
Exposure Time [%]                   68.939238
Equity Final [$]                   843.539834
Equity Peak [$]                   11679.72322
Return [%]                         -91.564602
Buy & Hold Return [%]               56.044858
Return (Ann.) [%]                  -99.736221
Volatility (Ann.) [%]                0.453674
Sharpe Ratio                              0.0
Sortino Ratio                             0.0
Calmar Ratio                              0.0
Max. Drawdown [%]                   -93.98965
Avg. Drawdown [%]                   -7.542752
Max. Drawdown Duration      138 days 12:45:00
Avg. Drawdown Duration        8 days 03:56:00
# Trades                                 2344
Win Rate [%]                        49.018771
Best Trade [%]                       0.477404
Worst Trade [%]                     -0.517178
Avg. Trade [%]                    

In [26]:
bt.plot(resample=False)

  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  fig = gridplot(
  fig = gridplot(
