In [1]:
#Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import pandas_ta as ta
import yfinance as yf
from backtesting import Backtest, Strategy

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Download asset data
df = yf.download(tickers="AAPL", start='2010-01-01', end='2023-12-31')

#calculatre technical indicators
df['MA'] = ta.ema(df['Close'], length=20)
df['rsi'] = ta.rsi(df['Close'], length=14)
df['coppock'] = ta.coppock(df['Close'], length=12)
df['returns'] = df['Close'].pct_change(-1) * 100 * -1
df['forecast_tommorrow'] = np.where(df['returns'] > 0,
                                   1,
                                   0) #1 is for UP and 0 is for down

df = df.dropna()
df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,MA,rsi,coppock,returns,forecast_tommorrow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2010-02-09 00:00:00-05:00,7.015000,7.053571,6.955357,7.006786,5.931941,632886800,7.207269,42.054113,-12.137974,-0.548386,0
2010-02-10 00:00:00-05:00,6.996071,7.021429,6.937857,6.968571,5.899589,370361600,7.184536,41.000213,-12.181045,1.786883,1
2010-02-11 00:00:00-05:00,6.960000,7.133929,6.930714,7.095357,6.006924,550345600,7.176043,45.848891,-11.032437,0.853385,1
2010-02-12 00:00:00-05:00,7.075357,7.201429,6.982143,7.156429,6.058629,655468800,7.174175,48.063030,-9.429613,1.484760,1
2010-02-16 00:00:00-05:00,7.212143,7.274643,7.197143,7.264286,6.149941,543737600,7.182757,51.810537,-7.010931,-0.419646,0
...,...,...,...,...,...,...,...,...,...,...,...
2023-12-21 00:00:00-05:00,196.100006,197.080002,193.500000,194.679993,194.431885,46482500,193.302025,57.752982,5.560316,-0.557844,0
2023-12-22 00:00:00-05:00,195.179993,195.410004,192.970001,193.600006,193.353287,37122800,193.330404,54.609714,5.026601,-0.284902,0
2023-12-26 00:00:00-05:00,193.610001,193.889999,192.830002,193.050003,192.803986,28919300,193.303699,53.026883,3.980161,0.051768,1
2023-12-27 00:00:00-05:00,192.490005,193.500000,191.089996,193.149994,192.903839,48087700,193.289060,53.291938,3.311362,0.222134,1


In [3]:
# Separate features and target variable
X = df.drop(columns=['forecast_tommorrow'])
y = df['forecast_tommorrow']

In [4]:
#split data into trains and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
#scale variables
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
model = XGBClassifier()

In [7]:
model.fit(X_train_scaled, y_train)

In [8]:
y_pred = model.predict(X_test_scaled)
y_pred

array([1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1,
       1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0,
       1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0,
       0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1,
       1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0,
       0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1,
       1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0,
       1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0,

In [9]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.9985714285714286
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       317
           1       1.00      1.00      1.00       383

    accuracy                           1.00       700
   macro avg       1.00      1.00      1.00       700
weighted avg       1.00      1.00      1.00       700



In [10]:
compare = pd.DataFrame(y_test)
compare['predictions'] = y_pred
compare.head(50)

Unnamed: 0_level_0,forecast_tommorrow,predictions
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-09-03 00:00:00-04:00,1,1
2022-04-20 00:00:00-04:00,0,0
2018-09-27 00:00:00-04:00,1,1
2020-04-16 00:00:00-04:00,0,0
2011-05-02 00:00:00-04:00,1,1
2016-07-19 00:00:00-04:00,1,1
2016-11-03 00:00:00-04:00,0,0
2017-04-07 00:00:00-04:00,0,0
2016-05-04 00:00:00-04:00,0,0
2011-05-24 00:00:00-04:00,1,1


Aplicaçao do modelo em novos dados

In [11]:
#Download asset data
df_new_stock = yf.download(tickers="MSFT")

#calculatre technical indicators
df_new_stock['MA'] = ta.ema(df_new_stock['Close'], length=20)
df_new_stock['rsi'] = ta.rsi(df_new_stock['Close'], length=14)
df_new_stock['coppock'] = ta.coppock(df_new_stock['Close'], length=12)
df_new_stock['returns'] = df_new_stock['Close'].pct_change(-1) * 100 * -1
df_new_stock['forecast_tommorrow'] = np.where(df_new_stock['returns'] > 0,
                                   1,
                                   0) #1 is for UP and 0 is for down

df_new_stock = df_new_stock.dropna()
df_new_stock

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,MA,rsi,coppock,returns,forecast_tommorrow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1986-04-18 00:00:00-05:00,0.105035,0.105035,0.100694,0.101563,0.062965,21628800,0.098896,59.005425,12.142096,-0.000000,0
1986-04-21 00:00:00-05:00,0.101563,0.102431,0.098958,0.101563,0.062965,22924800,0.099150,59.005425,12.943036,-1.740026,0
1986-04-22 00:00:00-05:00,0.101563,0.101563,0.099826,0.099826,0.061889,15552000,0.099214,53.351414,13.109644,0.432871,1
1986-04-23 00:00:00-05:00,0.099826,0.100694,0.098958,0.100260,0.062158,15609600,0.099314,54.523930,12.916247,9.055453,1
1986-04-24 00:00:00-05:00,0.100260,0.111979,0.099826,0.110243,0.068347,62352000,0.100355,71.974104,15.382587,5.926374,1
...,...,...,...,...,...,...,...,...,...,...,...
2024-02-02 00:00:00-05:00,403.809998,412.649994,403.559998,411.220001,411.220001,28245000,396.050551,68.834282,12.375135,-1.373107,0
2024-02-05 00:00:00-05:00,409.899994,411.160004,403.989990,405.649994,405.649994,25352300,396.964783,61.987050,11.222850,-0.039459,0
2024-02-06 00:00:00-05:00,405.880005,407.970001,402.910004,405.489990,405.489990,18382600,397.776708,61.796883,10.250556,2.067383,1
2024-02-07 00:00:00-05:00,407.440002,414.299988,407.399994,414.049988,414.049988,22340500,399.326544,67.535102,9.764800,0.014488,1


In [12]:
# Separate features and target variable
X_new_stock = df_new_stock.drop(columns=['forecast_tommorrow'])
y_new_stock = df_new_stock['forecast_tommorrow']

In [13]:
scaler = StandardScaler()
X_new_stock_scaled = scaler.fit_transform(X_new_stock)
X_new_stock_scaled

array([[-0.66603567, -0.6664651 , -0.66570859, ...,  0.44871097,
         0.80695281, -0.03015021],
       [-0.66607856, -0.66649695, -0.66573026, ...,  0.44871097,
         0.87526407, -0.84173049],
       [-0.66607856, -0.66650756, -0.66571942, ..., -0.03352651,
         0.88947391,  0.17174874],
       ...,
       [ 4.3462405 ,  4.32189543,  4.36112135, ...,  0.68679788,
         0.64562544,  0.93411515],
       [ 4.36551015,  4.39931383,  4.41715386, ...,  1.17621756,
         0.60419584, -0.02339259],
       [ 4.44715903,  4.41472429,  4.48117339, ...,  1.17935336,
         0.53885639,  0.68408969]])

In [14]:
y_pred_new_stock = model.predict(X_new_stock_scaled)
y_pred_new_stock

array([1, 0, 1, ..., 1, 1, 1])

In [15]:
print("Accuracy: ", accuracy_score(y_new_stock, y_pred_new_stock))

Accuracy:  0.9748137265190471


Create backtesting strategy

In [16]:
class trading_strategy(Strategy):
    n_train = 600
    coef_retrain = 200
    
    def init(self):
        self.model = XGBClassifier()
        self.model.fit(X=X_train, y=y_train)
        self.already_bought = False
        
    def next(self):
        explanatory_today = self.data.df.iloc[[-1], :-1]
        forecast_tomorrow = self.model.predict(explanatory_today)[0]
        
        if forecast_tomorrow == 1 and self.already_bought == False:
            self.buy()
            self.already_bought = True
        elif forecast_tomorrow == 0 and self.already_bought == True:
            self.sell()
            self.already_bought = False
        else:
            pass

In [17]:
"""class WalkForwardAnchored(Regression):
    def next(self):
        
        # we don't take any action and move on to the following day
        if len(self.data) < self.n_train:
            return
        
        # we retrain the model each 200 days
        if len(self.data) % self.coef_retrain == 0:
            X_train = self.data.df.iloc[:, :-1]
            y_train = self.data.df.iloc[:, -1]

            self.model.fit(X_train, y_train)

            super().next()
            
        else:
            
            super().next()"""

"class WalkForwardAnchored(Regression):\n    def next(self):\n        \n        # we don't take any action and move on to the following day\n        if len(self.data) < self.n_train:\n            return\n        \n        # we retrain the model each 200 days\n        if len(self.data) % self.coef_retrain == 0:\n            X_train = self.data.df.iloc[:, :-1]\n            y_train = self.data.df.iloc[:, -1]\n\n            self.model.fit(X_train, y_train)\n\n            super().next()\n            \n        else:\n            \n            super().next()"

In [18]:
bt = Backtest(df_new_stock, trading_strategy, cash=10000, commission=.002, exclusive_orders=True)

In [19]:
results = bt.run()
results

Start                     1986-04-18 00:00...
End                       2024-02-08 00:00...
Duration                  13810 days 00:00:00
Exposure Time [%]                   99.968517
Equity Final [$]          8481936593277598...
Equity Peak [$]           8481936593277598...
Return [%]                8481936593277597...
Buy & Hold Return [%]           407637.058435
Return (Ann.) [%]                 1184.764919
Volatility (Ann.) [%]              360.479512
Sharpe Ratio                         3.286636
Sortino Ratio                      104.725768
Calmar Ratio                        70.379939
Max. Drawdown [%]                  -16.833844
Avg. Drawdown [%]                   -1.418666
Max. Drawdown Duration       52 days 00:00:00
Avg. Drawdown Duration        6 days 00:00:00
# Trades                                 4851
Win Rate [%]                        79.426922
Best Trade [%]                      47.368692
Worst Trade [%]                    -14.638132
Avg. Trade [%]                    