In [2]:
# Data Handling
import pandas as pd
import numpy as np

# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns 
import plotly.express as px
import plotly.graph_objs as go
import matplotlib.ticker as mtick
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

# Financial Data Analysis
import yfinance as yf
import ta

# Machine Learning 
from sklearn.metrics import roc_auc_score, roc_curve, auc

# Models
from catboost import CatBoostClassifier

# Hiding warnings 
import warnings
warnings.filterwarnings("ignore")



### Data preparation 

In [None]:
from abc import ABC, abstractmethod
class AbstractStrategy(ABC):

    @abstractmethod
    def isSell(self, index) -> bool:
        pass
    
    @abstractmethod
    def isBuy(self, index) -> bool:
        pass

In [14]:
class CatBoostStrategy(AbstractStrategy):

    def __init__(self, df: pd.DataFrame, model_path: str) -> None:
        self.df = df
        self.model_path = model_path

        self.model = self.__load_model()

        self.df = self.__do_tramsformation(self.df)
        self.df = self.__populate_signals(self.df)
    
    def __load_model(self):
        model = CatBoostClassifier(random_state = 42, verbose = False)
        model.load_model(self.model_path)
        return model

    def __do_tramsformation(self, df):

        # Adding Simple Moving Averages
        df['sma5'] = ta.trend.sma_indicator(df['close'],window = 5)
        df['sma10'] = ta.trend.sma_indicator(df['close'],window = 10)
        df['sma15'] = ta.trend.sma_indicator(df['close'],window = 15)
        df['sma20'] = ta.trend.sma_indicator(df['close'],window = 20)
        df['sma30'] = ta.trend.sma_indicator(df['close'],window = 30)
        df['sma50'] = ta.trend.sma_indicator(df['close'],window = 50)
        df['sma80'] = ta.trend.sma_indicator(df['close'],window = 80)
        df['sma100'] = ta.trend.sma_indicator(df['close'],window = 100)
        df['sma200'] = ta.trend.sma_indicator(df['close'],window = 200)
    
        # Adding Price to Simple Moving Averages ratios

        df['sma5_ratio'] = df['close'] / df['sma5']
        df['sma10_ratio'] = df['close'] / df['sma10']
        df['sma20_ratio'] = df['close'] / df['sma20']
        df['sma30_ratio'] = df['close'] / df['sma30'] 
        df['sma50_ratio'] = df['close'] / df['sma50']
        df['sma80_ratio'] = df['close'] / df['sma80']
        df['sma100_ratio'] = df['close'] / df['sma100']
        df['sma200_ratio'] = df['close'] / df['sma200']

        # Adding RSI, CCI, Bollinger Bands, and OBV

        df['rsi'] = ta.momentum.RSIIndicator(df['close']).rsi()
        df['cci'] = ta.trend.cci(df['high'], df['low'], df['close'], window=20, constant=0.015)
        bb_indicator = ta.volatility.BollingerBands(df['close'])
        df['bb_high'] = bb_indicator.bollinger_hband()
        df['bb_low'] = bb_indicator.bollinger_lband()
        df['obv'] = ta.volume.OnBalanceVolumeIndicator(close=df['close'], volume=df['volume']).on_balance_volume()

        # Adding features derived from the indicators above

        df['rsi_overbought'] = (df['rsi'] >= 70).astype(int)
        df['rsi_oversold'] = (df['rsi'] <= 30).astype(int)
        df['above_bb_high'] = (df['close'] >= df['bb_high']).astype(int)
        df['below_bb_low'] = (df['close'] <= df['bb_low']).astype(int)
        df['obv_divergence_10_days'] = df['obv'].diff().rolling(10).sum() - df['close'].diff().rolling(10).sum()
        df['obv_divergence_20_days'] = df['obv'].diff().rolling(20).sum() - df['close'].diff().rolling(20).sum()
        df['cci_high'] = (df['cci'] >= 120).astype(int)
        df['cci_low'] = (df['cci'] <= -120).astype(int)
        df['sma5 > sma10'] = (df['sma5'] > df['sma10']).astype(int)
        df['sma10 > sma15'] = (df['sma10'] > df['sma15']).astype(int)
        df['sma15 > sma20'] = (df['sma15'] > df['sma20']).astype(int)
        df['sma20 > sma30'] = (df['sma20'] > df['sma30']).astype(int)
        df['sma30 > sma50'] = (df['sma30'] > df['sma50']).astype(int)
        df['sma50 > sma80'] = (df['sma50'] > df['sma80']).astype(int)
        df['sma80 > sma100'] = (df['sma80'] > df['sma100']).astype(int)
        df['sma100 > sma200'] = (df['sma100'] > df['sma200']).astype(int)

        # Removing NaN values from the dataframe 
        df.dropna(inplace = True)

        return df
    
    def __populate_signals(self, df):
       
        y_pred = self.model.predict_proba(self.df)[:,1] # what is ?

        sign = np.zeros_like(y_pred) # Creating an array with 0s in the same length as y_pred

        # Short selling signal 
        sign[((y_pred >= 0.3477) & (y_pred < 0.5087)) | ((y_pred > 0.5189) & (y_pred < 0.5195))] = -1 

        # Buying signal     
        sign[((y_pred >= 0.5087) & (y_pred <= 0.5189)) | 
            ((y_pred >= 0.5195) & (y_pred <= 0.5252))] = 1

        df['buy_signal'] = sign

        return df

    def isSell(self, index):
        return   bool(self.df.loc[index]['buy_signal'])
    
    def isBuy(self, index):
        return   bool(self.df.loc[index]['buy_signal'])
    
    def isStop(self, index, position):
        return self.df.loc[index]['low'] < position['stop_loss']

In [48]:
strategy = CatBoostStrategy(df=df, model_path='./btc_classifier.cbm')

In [6]:
# balance = 1000
# position = None
# asset = 'BTC'

In [None]:
class BackTestCatBoostStrategy:

    def __init__(self, data_path: pd.DataFrame, model_path: str, balance = 1000,  asset = 'BTC', fee = 0.001):
        
        self.balance = balance
        self.asset = asset
        self.fee = fee
        

        self.df = self.__load_data(data_path)
        self.df = self.__transform_data(self.df)
        
        self.strategy = CatBoostStrategy(df=self.df, model_path=model_path)

        self.trades = []
        self.days = []

        
    def __transform_data(self, df):
        
        df['date'] = pd.to_datetime(df['date'], unit='ms')
        df = self.df.drop_duplicates(subset=['date'], keep='first')
        df = df.set_index(df["date"])
        df['close_shift'] = df['close'].shift(1)
        df['return'] = (df['close']/df['close_shift'] - 1) * 100

        return df

    def __load_data(self, data_path):
        return pd.read_csv(data_path)

   
    def run_backtest(self):

        position = None
        previous_day = -1

        for index, row in self.df.iterrows():
            current_day = index.day

            if  previous_day !=  current_day:
                temp_balance = self.balance
                if position:
                    close_prise = row['close']
                    trade_result = (close_prise - position['open_price']) /  position['open_price']
                    close_size = (position['open_size'] + position['open_size'] * trade_result)
                    fees = close_size * self.fee
                    close_size = close_size - fees
                    temp_balance = temp_balance + (close_size - position['open_size'])
                
                self.days.append({
                    'day': index.date(),
                    'balance': temp_balance,
                    'price': row['close']
                })

            if not position and self.strategy.isBuy(index):
                open_price = row['close']
                open_usd_size = self.balance
                fees = open_usd_size * self.fee
                
                open_usd_size = open_usd_size - fees
                self.balance = self.balance - fees

                stop_loss = open_price - (open_price * 0.01)

                position = {
                    'open_price': open_price,
                    'open_size': open_usd_size,
                    'open_date': index,
                    'open_fee': fees,
                    'open_reason': 'Market Buy',
                    'open_balance': self.balance,
                    'stop_loss': stop_loss
                }


                print(f'{index}  Buy for {open_usd_size} of {self.asset} at {open_price}' )

            elif position and self.strategy.isSell(index):
                close_prise = row['close']
                trade_result = (close_prise - position['open_price']) / position['open_price']
                close_size = position['open_price'] + position['open_price'] * trade_result
                fees = close_size * self.fee
                close_size = close_size - fees
                self.balance = self.balance + close_size - position['open_size']  

                self.trades.append(
                    {
                        'open_date': position['open_date'],
                        'close_date': index,
                        'open_price': position['open_price'],
                        'close_price': close_prise,
                        'open_size': position['open_size'],
                        'close_size': close_size,
                        "open_fee": position["open_fee"],
                        "close_fee": fees,
                        "open_reason": position["open_reason"],
                        "close_reason": "Market Sell",
                        "open_balance": position["open_balance"],
                        "close_balance": self.balance,

                    }
                )

                position = None

                print(f'{index}  Sell for {open_usd_size} of {self.asset} at {open_price}' )

            elif position and self.strategy.isStop(index, position):
                close_prise = position['stop_loss']
                trade_result = (close_prise - position['open_price']) / position['open_price']
                close_size = (position['open_size'] + position['open_size'] * trade_result)
                fees = close_size * self.fee
                close_size = close_size - fees
                self.balance = self.balance + close_size - position['open_size']  
                self.trades.append({
                    'open_date': position['open_date'],
                    'close_date': index,
                    'open_price': position['open_price'],
                    'close_price': close_prise,
                    'open_size': position['open_size'],
                    'close_size': close_size,
                    'open_fee': position['open_fee'],
                    'close_fee': fees,
                    'open_reason': position['open_reason'],
                    'close_reason': 'Stop Loss',
                    'open_balance': position['open_balance'],
                    'close_balance': self.balance,
                })

        print(f'Final balance : {self.balance}')

    def backtest_analysis(self):

        df_trades = pd.DataFrame(self.trades)
        df_days = pd.DataFrame(self.days)

        if not self.trades:
            raise Exception('No trades fount')
            
        if not self.days:
            raise Exception('No days found')
            
        # Returns and trade result
        df_days['evolution'] = df_days['balance'].diff()
        df_days['daily_return'] = df_days['evolution'] /  df_days['balance'].shift(1)

        df_trades['trade_result'] = df_trades['close_size'] - df_trades['open_size']
        df_trades['trade_result_pct'] = df_trades['trade_result']  /  df_trades['open_size']
        df_trades['trades_duration'] = df_trades['close_date'] -  df_trades['open_date'] 

        #Drawdowns and performance
        df_days['balance_ath'] = df_days['balance'].cummax()
        df_days['drawdown'] = df_days['balance_ath'] - df_days['balance']
        df_days['drawdown_pct'] = df_days['drawdown'] / df_days['balance_ath']

        total_trades = len(df_trades)
        total_days = len(df_days)

        #Good trades
        good_trades = df_trades.loc[df_trades['trade_result'] > 0]
        total_good_trades = len(good_trades)
        avg_profit_good_trades = good_trades['trade_result_pct'].mean()
        mean_good_trades_duration = good_trades['trades_duration'].mean()
        global_win_rate = total_good_trades / total_trades

        #Bad trades
        bad_trades = df_trades.loc[df_trades['trade_result'] < 0]
        total_bad_trades = len(bad_trades)
        avg_profit_bad_trades = bad_trades['trade_result_pct'].mean()
        mean_bad_trades_duration = bad_trades['trades_duration'].mean()

        max_days_drawdown = df_days['drawdown_pct'].max()
        initial_balance = df_days.iloc[0]['balance']
        final_balance = df_days.iloc[-1]['balance']

        balance_evolution = (final_balance - initial_balance) / initial_balance
        mean_trades_duration = df_trades['trades_duration'].mean()
        avg_profit = df_trades['trade_result_pct'].mean()
        mean_trades_per_days = total_trades / total_days

        best_trade = df_trades.loc[df_trades['trade_result_pct'].idxmax()]
        worst_trade = df_trades.loc[df_trades['trade_result_pct'].idxmin()]

        #Summary report
        print(f'Period : [{df_days.iloc[0]["day"]}] -> [{df_days.iloc[-1]["day"]}]')
        print(f'Initial balance : {round(initial_balance)} $')

        print('-- General information--')
        print(f'Final balance : {round(final_balance)} $')
        print(f'Performance: {round(balance_evolution * 100, 2)}')
        print(f'Worst Drawdown: {round(max_days_drawdown*100, 2)}')
        print(f'Total trades on the period: {total_trades}')
        print(f'Average profit: {round(avg_profit*100, 2)}')
        print(f'Global Win rate: {round(global_win_rate, 2)}')

        print('-- Trades information--')
        print(f'Mean trades per day: {round(mean_trades_per_days, 2)}')
        print(f'Mean trades duration {mean_trades_duration}')

        print(f"Best trades: +{round(best_trade['trade_result_pct']*100, 2)} % the {best_trade['open_date']} -> {best_trade['close_date']}")
        print(f"Worst trades: {round(worst_trade['trade_result_pct']*100, 2)} % the {worst_trade['open_date']} -> {worst_trade['close_date']}")
        print(f"Total Good trades on the period: {total_good_trades}")
        print(f"Total Bad trades on the period: {total_bad_trades}")
        print(f"Average Good Trades result: {round(avg_profit_good_trades*100, 2)} %")
        print(f"Average Bad Trades result: {round(avg_profit_bad_trades*100, 2)} %")
        print(f"Mean Good Trades Duration: {mean_good_trades_duration}")
        print(f"Mean Bad Trades Duration: {mean_bad_trades_duration}")

        print("\n--- Trades reasons ---")
        print(df_trades["open_reason"].value_counts().to_string())
        print(df_trades["close_reason"].value_counts().to_string())

        sharpe_ratio = (365**(0.5) * df_days['daily_return'].mean())/df_days['daily_return'].std()
        print(f"Sharpe Ratio: {round(sharpe_ratio,2)}")

        self.df_days = df_days
        self.df_trades = df_trades

    def plot_equity_vs_asset(self):
        df_days = self.df_days.copy()
        df_days = df_days.set_index('day')
        fig, ax_left = plt.subplots(figsize=(15, 20), nrows=4, ncols=1)

        ax_left[0].title.set_text("Profit and Loss curve")
        ax_left[0].plot(df_days['balance'], color='royalblue', lw=1)
        ax_left[0].fill_between(df_days['balance'].index, df_days['balance'], alpha=0.2, color='royalblue')
        ax_left[0].axhline(y=df_days.iloc[0]['balance'], color='black', alpha=0.3)
        ax_left[0].legend(['Balance evolution'], loc ="upper left")

        ax_left[1].title.set_text("Asset evolution")
        ax_left[1].plot(df_days['price'], color='sandybrown', lw=1)
        ax_left[1].fill_between(df_days['price'].index, df_days['price'], alpha=0.2, color='sandybrown')
        ax_left[1].axhline(y=df_days.iloc[0]['price'], color='black', alpha=0.3)
        ax_left[1].legend(['Asset evolution'], loc ="upper left")

        ax_left[2].title.set_text("Drawdown curve")
        ax_left[2].plot(-df_days['drawdown_pct']*100, color='indianred', lw=1)
        ax_left[2].fill_between(df_days['drawdown_pct'].index, -df_days['drawdown_pct']*100, alpha=0.2, color='indianred')
        ax_left[2].axhline(y=0, color='black', alpha=0.3)
        ax_left[2].legend(['Drawdown in %'], loc ="lower left")

        ax_right = ax_left[3].twinx()

        ax_left[3].title.set_text("P&L VS Asset (not on the same scale)")
        ax_left[3].plot(df_days['balance'], color='royalblue', lw=1)
        ax_right.plot(df_days['price'], color='sandybrown', lw=1)
        ax_left[3].legend(['Wallet evolution (equity)'], loc ="lower right")
        ax_right.legend(['Asset evolution'], loc ="upper left")

        plt.show()
bt = BackTestCatBoostStrategy(data_path='data/BTC-USDT.csv', model_path='./btc_classifier.cbm', fee=0.01)
bt.run_backtest()
bt.backtest_analysis()
bt.plot_equity_vs_asset()


2017-08-25 13:00:00  Buy for 990.0 of BTC at 4312.0
2017-08-25 15:00:00  Sell for 990.0 of BTC at 4312.0
2017-08-25 18:00:00  Buy for 4306.912236 of BTC at 4449.44
2017-08-25 19:00:00  Sell for 4306.912236 of BTC at 4449.44
2017-08-25 22:00:00  Buy for 4221.457316999999 of BTC at 4303.37
2017-08-26 03:00:00  Sell for 4221.457316999999 of BTC at 4303.37
2017-08-26 04:00:00  Buy for 4212.410994 of BTC at 4297.94
2017-08-26 05:00:00  Sell for 4212.410994 of BTC at 4297.94
2017-08-26 10:00:00  Buy for 4210.264574999999 of BTC at 4292.8
2017-08-26 11:00:00  Sell for 4210.264574999999 of BTC at 4292.8
2017-08-26 18:00:00  Buy for 4203.374472 of BTC at 4272.0
2017-08-26 19:00:00  Sell for 4203.374472 of BTC at 4272.0
2017-08-26 21:00:00  Buy for 4208.078952 of BTC at 4313.98
2017-08-26 23:00:00  Sell for 4208.078952 of BTC at 4313.98
2017-08-27 00:00:00  Buy for 4251.124944 of BTC at 4354.0
2017-08-27 01:00:00  Sell for 4251.124944 of BTC at 4354.0
2017-08-27 04:00:00  Buy for 4292.1715319999

AttributeError: 'BackTestCatBoostStrategy' object has no attribute 'df_days'