# Backtesting ML Regression-Based

## Load the model

In [1]:
import pickle

In [2]:
with open('models/model_dt_regression.pkl', 'rb') as f:
    model_dt = pickle.load(f)

In [3]:
model_dt

## Load the data

In [4]:
import pandas as pd

df = pd.read_excel('data/Microsoft_LinkedIn_Processed.xlsx', index_col=0, parse_dates=['Date'])
df

Unnamed: 0_level_0,Close,High,Low,Open,Volume,change_tomorrow,change_tomorrow_direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-12-08,55.181129,55.696675,55.027373,55.443424,21220800,1.549137,UP
2016-12-09,56.049412,56.067501,55.289665,55.334887,27349400,0.321693,UP
2016-12-12,56.230301,56.347882,55.823296,55.913741,20198100,1.286142,UP
2016-12-13,56.962925,57.360886,56.293626,56.528784,35718900,-0.478631,DOWN
2016-12-14,56.691582,57.388017,56.555911,56.981009,30352700,-0.159803,DOWN
...,...,...,...,...,...,...,...
2025-02-27,392.529999,405.739990,392.170013,401.269989,21127400,1.123452,UP
2025-02-28,396.989990,397.630005,386.570007,392.660004,32845700,-2.187959,DOWN
2025-03-03,388.489990,398.820007,386.160004,398.820007,23007700,0.030878,UP
2025-03-04,388.609985,392.579987,381.000000,383.399994,29342900,3.094610,UP


## Backtesting.py Library

### Create your Strategy Class

In [5]:
from backtesting import Backtest, Strategy



In [6]:
class Regression(Strategy):
    def init(self):
        self.model = model_dt
        self.already_bought = False

    def next(self):
        explanatory_today = self.data.df.iloc[[-1], :]
        forecast_tomorrow = self.model.predict(explanatory_today)[0]
        
        if forecast_tomorrow > 1 and self.already_bought == False:
            self.buy()
            self.already_bought = True
        elif forecast_tomorrow < -5 and self.already_bought == True:
            self.sell()
            self.already_bought = False
        else:
            pass

### Define initial conditions

In [7]:
df_explanatory = df[['Open', 'High', 'Low', 'Close', 'Volume']].copy()

In [8]:
df_explanatory.iloc[-1:, :]

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-03-05,389.339996,401.670013,388.809998,401.019989,23412000


In [9]:
bt = Backtest(df_explanatory, Regression,
              cash=10000, commission=.002, exclusive_orders=True)

### Run backtesting

In [None]:
results = bt.run()

### Interpret backtesting results

In [None]:
results.to_frame(name='Values').loc[:'Return [%]']

Unnamed: 0,Values
Start,2016-12-08 00:00:00
End,2025-03-05 00:00:00
Duration,3009 days 00:00:00
Exposure Time [%],92.753623
Equity Final [$],75469.03052
Equity Peak [$],89289.821532
Commissions [$],2679.450454
Return [%],654.690305


### Visualize the strategy performance

In [None]:
bt.plot(filename='reports_backtesting/backtesting_regression.html')

FileNotFoundError: [Errno 2] No such file or directory: 'reports_backtesting/backtesting_regression.html'