In [748]:
# Carregando as bibliotecas

import pandas as pd
import numpy as np 

import matplotlib.pyplot as plt
import yfinance as yf
from plotly.subplots import make_subplots
import plotly.graph_objects as go

from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

get_ipython().run_line_magic("matplotlib", "inline")
import warnings
warnings.filterwarnings("ignore")

In [749]:
# Download dos dados

ticker = "PETR4.SA"
start_date = "2015-01-01"
end_date = "2023-12-31"
df = yf.download(ticker, start = start_date, end = end_date)
df["Returns"] = df["Adj Close"].pct_change(1)
p = 1
df["Target"] = df["Adj Close"].pct_change(p).shift(-p)
df["MA"] = df["Adj Close"].rolling(2).mean()

[*********************100%***********************]  1 of 1 completed


In [750]:
df["two_one_green"] = np.where(((df["Returns"].shift(2) < 0) & (df["Returns"].shift(1) < 0) & (df["Returns"] > 0) & (df["Adj Close"] > df["MA"]))
                               , 1
                               , 0 
)

df["two_one_red"] = np.where(((df["Returns"].shift(2) > 0) & (df["Returns"].shift(1) > 0) & (df["Returns"] < 0) & (df["Adj Close"] < df["MA"]))
                               , 1
                               , 0 
)
df.head(10)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns,Target,MA,two_one_green,two_one_red
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2015-01-02,9.99,9.99,9.35,9.36,3.445318,49559500,,-0.08547,,0,0
2015-01-05,9.14,9.19,8.53,8.56,3.150847,78385100,-0.08547,-0.03271,3.298083,0,0
2015-01-06,8.71,8.83,8.04,8.28,3.047781,84723300,-0.03271,0.047102,3.099314,0,0
2015-01-07,8.56,8.73,8.36,8.67,3.191337,85531000,0.047102,0.06459,3.119559,1,0
2015-01-08,8.79,9.39,8.74,9.23,3.397467,83306300,0.06459,0.030336,3.294402,0,0
2015-01-09,9.2,9.51,8.93,9.51,3.500532,50813100,0.030336,-0.070452,3.448999,0,0
2015-01-12,9.27,9.58,8.79,8.84,3.253912,58728900,-0.070452,0.02828,3.377222,0,1
2015-01-13,8.9,9.39,8.85,9.09,3.345934,65834800,0.02828,-0.019802,3.299923,0,0
2015-01-14,8.88,8.96,8.64,8.91,3.279678,55736500,-0.019802,0.044894,3.312806,0,0
2015-01-15,9.0,9.39,8.87,9.31,3.426914,80907800,0.044894,0.013963,3.353296,0,0


In [751]:
op_costs = 0.001
df["Pattern_Return"] = np.where(df["two_one_green"] == 1
                                , df["Target"] - op_costs
                                , np.where(df["two_one_red"] == 1
                                           , -df["Target"] - op_costs
                                           ,0))

df["Pattern_Return_Acc"] = df["Pattern_Return"].cumsum()*100

In [752]:
fig = make_subplots(rows = 1, cols = 1
                    , shared_xaxes = True
                    , vertical_spacing = 0.05)

fig.add_trace(go.Scatter(x = df.index, y = df["Pattern_Return_Acc"]
                                , name = "Patterns"
                                , line = dict(color = "blue"))
              , row = 1, col = 1)

fig.update_layout(height = 600, width = 800
                  , title_text = "Pattern 2-1: " + ticker + " - Accumulated Returns"
                  , font_color = "blue"
                  , title_font_color = "black"
                  , xaxis_title = "Time"
                  , yaxis_title = "Accumulated returns (%)"
                  , font = dict(size = 15, color = "Black")
                 )

fig.update_layout(hovermode = "x unified")

# Code to exclude empty dates from the chart
dt_all = pd.date_range(start = df.index[0]
                       , end = df.index[-1]
                       , freq = "D")
dt_all_py = [d.to_pydatetime() for d in dt_all]
dt_obs_py = [d.to_pydatetime() for d in df.index]

dt_breaks = [d for d in dt_all_py if d not in dt_obs_py]

fig.update_xaxes(
    rangebreaks = [dict(values = dt_breaks)]
)


fig.show()

In [753]:
# Split the data into features (X) and target (y)

df.dropna(inplace = True)

X = df[["two_one_green", "two_one_red", "Target"]]


# Separando os dados com as variaveis em x e o alvo em y

# Separando os dados entre treinamento e teste

# Vamos treinar com 4 anos
start_train = "2015-01-01"
end_train = "2018-12-31"

# Vamos testar com 4.5 anos
start_test = "2019-01-01"
end_test = "2023-12-31"

df1_train = X.loc[start_train : end_train]

df1_test = X.loc[start_test : end_test]

manter = ["two_one_green", "two_one_red"]

X_train = df1_train[manter]
y_train = df1_train["Target"]

X_test = df1_test[manter]
y_test = df1_test["Target"]


# Create a decision tree regressor
regressor = DecisionTreeRegressor(random_state = 42)
regressor.fit(X_train, y_train) #é a linha que treina o modelo

# Make predictions on the test set
y_pred_train = regressor.predict(X_train)
y_pred = regressor.predict(X_test)

# Calculate mean squared error
mse_train = mean_squared_error(y_train, y_pred_train)
print("Mean Squared Error Train:", mse_train)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error Test:", mse)



Mean Squared Error Train: 0.0011441605123481884
Mean Squared Error Test: 0.0008455716049939189


In [754]:
np.mean(y_pred)


0.0016423109515598642

In [755]:
df["tree"] = regressor.predict(df[["two_one_green", "two_one_red"]])

op_costs = 0.001
df["Pattern_Return_Tree"] = np.where(((df["tree"] > np.mean(y_pred)) & (df["Adj Close"]/df["MA"] < 0.99))
                                , df["Target"] - op_costs
                                , np.where(((df["tree"] < np.mean(y_pred)) & (df["Adj Close"]/df["MA"] > 1.01))
                                           , -df["Target"] - op_costs
                                           ,0))

df["Pattern_Return_Acc_Tree"] = df["Pattern_Return_Tree"].cumsum()*100

In [756]:
fig = make_subplots(rows = 1, cols = 1
                    , shared_xaxes = True
                    , vertical_spacing = 0.05)

fig.add_trace(go.Scatter(x = df.index, y = df["Pattern_Return_Acc_Tree"]
                                , name = "Patterns"
                                , line = dict(color = "blue"))
              , row = 1, col = 1)

fig.update_layout(height = 600, width = 800
                  , title_text = "Pattern 2-1 Decision Tree: " + ticker + " - Accumulated Returns"
                  , font_color = "blue"
                  , title_font_color = "black"
                  , xaxis_title = "Time"
                  , yaxis_title = "Accumulated returns (%)"
                  , font = dict(size = 15, color = "Black")
                 )


fig.add_vline(x = end_train, line_width = 3, line_dash="dash", line_color = "black")

fig.update_layout(hovermode = "x unified")

# Code to exclude empty dates from the chart
dt_all = pd.date_range(start = df.index[0]
                       , end = df.index[-1]
                       , freq = "D")
dt_all_py = [d.to_pydatetime() for d in dt_all]
dt_obs_py = [d.to_pydatetime() for d in df.index]

dt_breaks = [d for d in dt_all_py if d not in dt_obs_py]

fig.update_xaxes(
    rangebreaks = [dict(values = dt_breaks)]
)


fig.show()