<a href="https://colab.research.google.com/github/leandrocandida/CleanArch/blob/master/Untitled10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
import plotly.graph_objects as go
from plotly.subplots import make_subplots


In [None]:
ticker = "PETR4.SA"
df = yf.download(ticker,start='2016-12-31',end='2025-03-15')
df = df.xs((ticker),level="Ticker",axis=1)

[*********************100%***********************]  1 of 1 completed


In [None]:
df['Return'] = df['Close'].pct_change()


In [None]:
v = 10
df['HistVol'] = df['Return'].rolling(window=v).std()*100

In [None]:
df['FutureVolatility'] = df['HistVol'].shift(-v)

In [None]:
for lag in range(1,11):
  df[f'lag_{lag}'] = df['HistVol'].shift(lag)

In [None]:
df = df.dropna().copy()

In [None]:
features = [f'lag_{lag}' for lag in range(1,11)]
target = 'FutureVolatility'

In [None]:
df = df.iloc[v*2:]
df.index = pd.to_datetime(df.index)
train = df.loc["2017-01-01":"2020-12-31"]
test = df.loc["2021-01-01":"2025-12-31"]

X_train = train[features]
y_train = train[target]
X_test = test[features]
y_test = test[target]

In [None]:
model = GradientBoostingRegressor(
    n_estimators=1500,
    learning_rate=0.05,
    max_depth=3,
    min_samples_leaf=5,
    subsample=0.8,
    max_features='sqrt',
    random_state=42

)

model.fit(X_train,y_train)

df['Predicted_FutureVolatility'] = model.predict(df[features])

In [None]:
df

Price,Close,High,Low,Open,Volume,Return,HistVol,FutureVolatility,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,lag_8,lag_9,lag_10,Predicted_FutureVolatility
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2017-03-02,4.496592,4.597772,4.460881,4.594796,38490800,-0.026418,1.833897,2.913476,1.727730,1.539131,1.737553,1.701813,1.520113,1.733347,1.912822,1.991864,2.044870,2.048179,2.775050
2017-03-03,4.559086,4.565038,4.475761,4.529327,24516800,0.013898,1.919024,2.936399,1.833897,1.727730,1.539131,1.737553,1.701813,1.520113,1.733347,1.912822,1.991864,2.044870,2.971614
2017-03-06,4.493617,4.603725,4.475761,4.496592,23325000,-0.014360,1.942414,3.307888,1.919024,1.833897,1.727730,1.539131,1.737553,1.701813,1.520113,1.733347,1.912822,1.991864,2.961314
2017-03-07,4.517425,4.576943,4.499569,4.517425,30927300,0.005298,1.923566,3.422517,1.942414,1.919024,1.833897,1.727730,1.539131,1.737553,1.701813,1.520113,1.733347,1.912822,3.041695
2017-03-08,4.329943,4.517425,4.329943,4.511473,49300700,-0.041502,2.097560,3.842356,1.923566,1.942414,1.919024,1.833897,1.727730,1.539131,1.737553,1.701813,1.520113,1.733347,3.098751
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-20,38.500000,38.570000,38.310001,38.419998,26641200,0.001561,1.256393,1.538302,1.270451,1.321814,1.266345,1.260577,0.718354,0.878610,0.768452,0.768103,0.888047,0.884375,1.889993
2025-02-21,38.389999,38.529999,38.169998,38.529999,30115200,-0.002857,1.233367,1.519496,1.256393,1.270451,1.321814,1.266345,1.260577,0.718354,0.878610,0.768452,0.768103,0.888047,1.683679
2025-02-24,38.119999,38.509998,37.799999,38.500000,27830300,-0.007033,1.286144,1.559125,1.233367,1.256393,1.270451,1.321814,1.266345,1.260577,0.718354,0.878610,0.768452,0.768103,2.265156
2025-02-25,37.950001,38.439999,37.830002,38.119999,31960900,-0.004460,1.307272,1.687575,1.286144,1.233367,1.256393,1.270451,1.321814,1.266345,1.260577,0.718354,0.878610,0.768452,1.918994


In [None]:
first_test_date = test.index.min()

fig = make_subplots(rows=2,cols=1,shared_xaxes=True,vertical_spacing=0.5)
fig.add_trace(go.Scatter(x=df.index,y=df['HistVol'].rolling(v).mean(),
                         mode="lines",
                         name='Volatilidade Observada'),row=1,col=1)
fig.add_trace(go.Scatter(x=df.index + pd.Timedelta(days=v),y=df['Predicted_FutureVolatility'].rolling(v).mean(),
                         mode='lines',
                         name='Volatilidade Futura Prevista'),row=1,col=1)
fig.add_trace(go.Scatter(x=df.index + pd.Timedelta(days=v),y=df['Close'],
                         mode='lines',
                         name='Close'),row=2,col=1)
fig.add_shape(
    dict(
      type='line',
      x0=first_test_date,x1=first_test_date,
      y0=df['HistVol'].min(), y1=df['HistVol'].max(),
      line=dict(color='red',width=2,dash='dash')
    ),row=1,col=1
)

fig.update_layout(title = ticker + 'Comparação entre Volatilidade Observada e Prevista',
                  xaxis2_title='Data',
                  yaxis_title='Volatilidade',
                  yaxis2_title='Preço de Fechamento')
fig.show()