In [2]:
# Data Handling
import pandas as pd
import numpy as np

# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns 
import plotly.express as px
import plotly.graph_objs as go
import matplotlib.ticker as mtick
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

# Financial Data Analysis
import yfinance as yf
import ta

# Machine Learning 
from sklearn.metrics import roc_auc_score, roc_curve, auc

# Models
from catboost import CatBoostClassifier

# Hiding warnings 
import warnings
warnings.filterwarnings("ignore")



### Data preparation 

In [3]:
from abc import ABC, abstractmethod
class AbstractStrategy(ABC):

    @abstractmethod
    def isSell(self, index) -> bool:
        pass
    
    @abstractmethod
    def isBuy(self, index) -> bool:
        pass

In [5]:
class CatBoostStrategy(AbstractStrategy):

    def __init__(self, df: pd.DataFrame, model_path: str) -> None:
        self.df = df
        self.model_path = model_path

        self.model = self.__load_model()

        self.df = self.__do_tramsformation(self.df)
        self.df = self.__populate_signals(self.df)
    
    def __load_model(self):
        model = CatBoostClassifier(random_state = 42, verbose = False)
        model.load_model(self.model_path)
        return model

    def __do_tramsformation(self, df):

        # Adding Simple Moving Averages
        df['sma5'] = ta.trend.sma_indicator(df['close'],window = 5)
        df['sma10'] = ta.trend.sma_indicator(df['close'],window = 10)
        df['sma15'] = ta.trend.sma_indicator(df['close'],window = 15)
        df['sma20'] = ta.trend.sma_indicator(df['close'],window = 20)
        df['sma30'] = ta.trend.sma_indicator(df['close'],window = 30)
        df['sma50'] = ta.trend.sma_indicator(df['close'],window = 50)
        df['sma80'] = ta.trend.sma_indicator(df['close'],window = 80)
        df['sma100'] = ta.trend.sma_indicator(df['close'],window = 100)
        df['sma200'] = ta.trend.sma_indicator(df['close'],window = 200)
    
        # Adding Price to Simple Moving Averages ratios

        df['sma5_ratio'] = df['close'] / df['sma5']
        df['sma10_ratio'] = df['close'] / df['sma10']
        df['sma20_ratio'] = df['close'] / df['sma20']
        df['sma30_ratio'] = df['close'] / df['sma30'] 
        df['sma50_ratio'] = df['close'] / df['sma50']
        df['sma80_ratio'] = df['close'] / df['sma80']
        df['sma100_ratio'] = df['close'] / df['sma100']
        df['sma200_ratio'] = df['close'] / df['sma200']

        # Adding RSI, CCI, Bollinger Bands, and OBV

        df['rsi'] = ta.momentum.RSIIndicator(df['close']).rsi()
        df['cci'] = ta.trend.cci(df['high'], df['low'], df['close'], window=20, constant=0.015)
        bb_indicator = ta.volatility.BollingerBands(df['close'])
        df['bb_high'] = bb_indicator.bollinger_hband()
        df['bb_low'] = bb_indicator.bollinger_lband()
        df['obv'] = ta.volume.OnBalanceVolumeIndicator(close=df['close'], volume=df['volume']).on_balance_volume()

        # Adding features derived from the indicators above

        df['rsi_overbought'] = (df['rsi'] >= 70).astype(int)
        df['rsi_oversold'] = (df['rsi'] <= 30).astype(int)
        df['above_bb_high'] = (df['close'] >= df['bb_high']).astype(int)
        df['below_bb_low'] = (df['close'] <= df['bb_low']).astype(int)
        df['obv_divergence_10_days'] = df['obv'].diff().rolling(10).sum() - df['close'].diff().rolling(10).sum()
        df['obv_divergence_20_days'] = df['obv'].diff().rolling(20).sum() - df['close'].diff().rolling(20).sum()
        df['cci_high'] = (df['cci'] >= 120).astype(int)
        df['cci_low'] = (df['cci'] <= -120).astype(int)
        df['sma5 > sma10'] = (df['sma5'] > df['sma10']).astype(int)
        df['sma10 > sma15'] = (df['sma10'] > df['sma15']).astype(int)
        df['sma15 > sma20'] = (df['sma15'] > df['sma20']).astype(int)
        df['sma20 > sma30'] = (df['sma20'] > df['sma30']).astype(int)
        df['sma30 > sma50'] = (df['sma30'] > df['sma50']).astype(int)
        df['sma50 > sma80'] = (df['sma50'] > df['sma80']).astype(int)
        df['sma80 > sma100'] = (df['sma80'] > df['sma100']).astype(int)
        df['sma100 > sma200'] = (df['sma100'] > df['sma200']).astype(int)

        # Removing NaN values from the dataframe 
        df.dropna(inplace = True)

        return df
    
    def __populate_signals(self, df):
       
        y_pred = self.model.predict_proba(self.df)[:,1] # what is ?

        sign = np.zeros_like(y_pred) # Creating an array with 0s in the same length as y_pred

        # Short selling signal 
        sign[((y_pred >= 0.3477) & (y_pred < 0.5087)) | ((y_pred > 0.5189) & (y_pred < 0.5195))] = -1 

        # Buying signal     
        sign[((y_pred >= 0.5087) & (y_pred <= 0.5189)) | 
            ((y_pred >= 0.5195) & (y_pred <= 0.5252))] = 1

        df['buy_signal'] = sign

        return df

    def isSell(self, index):
        return   bool(self.df.loc[index]['buy_signal'])
    
    def isBuy(self, index):
        return   bool(self.df.loc[index]['buy_signal'])

In [48]:
strategy = CatBoostStrategy(df=df, model_path='./btc_classifier.cbm')

In [6]:
# balance = 1000
# position = None
# asset = 'BTC'

In [None]:
class BackTestCatBoostStrategy:

    def __init__(self, data_path: pd.DataFrame, model_path: str, balance = 1000,  asset = 'BTC', fee = 0.001):
        
        self.balance = balance
        self.asset = asset
        self.fee = fee
        

        self.df = self.__load_data(data_path)
        self.df = self.__transform_data(self.df)
        
        self.strategy = CatBoostStrategy(df=self.df, model_path=model_path)
        
    def __transform_data(self, df):
        
        df['date'] = pd.to_datetime(df['date'], unit='ms')
    
        df['close_shift'] = df['close'].shift(1)
        df['return'] = (df['close']/df['close_shift'] - 1) * 100

        return df

    def __load_data(self, data_path):
        return pd.read_csv(data_path)

   
    def run_backtest(self):

        position = None
        
        for index, row in self.df.iterrows():

            if not position and self.strategy.isBuy(index):
                open_price = row['close']
                open_usd_size = self.balance
                fees = open_usd_size * self.fee
                open_usd_size = open_usd_size - fees
                self.balance = self.balance - fees

                position = {
                    'open_price': open_price,
                    'open_size': open_usd_size
                }

                print(f'{index}  Buy for {open_usd_size} of {self.asset} at {open_price}' )

            elif position and self.strategy.isSell(index):
                close_prise = row['close']
                trade_result = (close_prise - position['open_price']) / position['open_price']
                close_size = position['open_price'] + position['open_price'] * trade_result
                fees = close_size * self.fee
                close_size = close_size - fees
                self.balance = self.balance + close_size - position['open_size']  # ????
                position = None

                print(f'{index}  Sell for {open_usd_size} of {self.asset} at {open_price}' )


        print(f'Final balance : {self.balance}')


bt = BackTestCatBoostStrategy(data_path='data/BTC-USDT.csv', model_path='./btc_classifier.cbm', fee=0.01)
bt.run_backtest()


NameError: name 'pd' is not defined