<a href="https://colab.research.google.com/github/leandrocandida/CleanArch/blob/master/Untitled10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [86]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
import plotly.graph_objects as go
from plotly.subplots import make_subplots


In [141]:
ticker = "PETR4.SA"
df = yf.download(ticker,start='2016-12-31',end='2025-12-15')
df = df.xs((ticker),level="Ticker",axis=1)

[*********************100%***********************]  1 of 1 completed


In [142]:
df['Return'] = df['Close'].pct_change()


In [163]:
v = 10
df['HistVol'] = df['Return'].rolling(window=v).std()*100

In [164]:
df['FutureVolatility'] = df['HistVol'].shift(-v)

In [165]:
for lag in range(1,11):
  df[f'lag_{lag}'] = df['HistVol'].shift(lag)

In [166]:
def calcular_rsl_ema(dados, periodo):
    ema = dados['Close'].ewm(span=periodo, adjust=False).mean()
    rsl = dados['Close'] / ema
    return rsl

# Exemplo de uso
# (Assumindo que você tem um DataFrame 'df' com a coluna 'Close')
periodo_rsl = 22
rsl_ema = calcular_rsl_ema(df, periodo_rsl)
df['RSL_EMA'] = rsl_ema

In [167]:
#df = df.dropna().copy()
df.head()

Price,Close,High,Low,Open,Volume,Return,HistVol,FutureVolatility,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,lag_8,lag_9,lag_10,RSL_EMA,Predicted_FutureVolatility
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2017-03-30,4.290869,4.347289,4.261174,4.320563,32928600,0.0,,1.776393,,,,,,,,,,,1.0,2.099092
2017-03-31,4.302746,4.350258,4.234449,4.264143,33553000,0.002768,,1.849021,,,,,,,,,,,1.002527,2.218452
2017-04-03,4.356197,4.379953,4.314624,4.317594,39427200,0.012422,,1.811594,,,,,,,,,,,1.01366,2.22729
2017-04-04,4.409647,4.421525,4.326502,4.356197,24132000,0.01227,,1.963408,,,,,,,,,,,1.023774,2.295149
2017-04-05,4.326502,4.5017,4.305716,4.469037,49920900,-0.018855,,2.123678,,,,,,,,,,,1.004081,2.335507


In [168]:
features = [f'lag_{lag}' for lag in range(1,11)]
target = 'FutureVolatility'
features.append('RSL_EMA')

In [169]:
df = df.iloc[v*2:]
df.index = pd.to_datetime(df.index)
train = df.loc["2017-01-01":"2020-12-31"]
test = df.loc["2021-01-01":"2025-12-31"]

X_train = train[features]
y_train = train[target]
X_test = test[features]
y_test = test[target]

In [177]:
model = GradientBoostingRegressor(
    n_estimators=500,
    learning_rate=0.01,
    max_depth=4,
    min_samples_leaf=3,
    subsample=0.8,
    max_features='sqrt',
    random_state=42
#'subsample': 0.8, 'n_estimators': 500, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'max_depth': 4, 'learning_rate': 0.01
)

model.fit(X_train,y_train)

df['Predicted_FutureVolatility'] = model.predict(df[features])

In [None]:
from sklearn.model_selection import RandomizedSearchCV

model = GradientBoostingRegressor(random_state=42)

param_grid = {
    'n_estimators': [500, 1000, 1500, 2000],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [3, 4, 5, 6],
    'min_samples_leaf': [3, 5, 7, 9],
    'subsample': [0.6, 0.7, 0.8, 0.9],
    'max_features': ['sqrt', 'log2', None]
}

random_search = RandomizedSearchCV(
    estimator=model,
    param_distributions=param_grid,
    n_iter=100,  # Número de combinações a serem testadas
    cv=5,  # Validação cruzada com 5 folds
    n_jobs=-1,  # Usa todos os núcleos da CPU
    random_state=42,
    verbose=1,
    scoring='neg_mean_squared_error' # ou 'r2' ou o que for melhor para seu problema.
)

random_search.fit(X_train, y_train)



In [174]:
# Melhores parâmetros encontrados
print("Melhores parâmetros:", random_search.best_params_)

# Melhor modelo
best_model = random_search.best_estimator_

# Avalie o modelo nos dados de teste
from sklearn.metrics import mean_squared_error, r2_score
y_pred = best_model.predict(X_test)



Melhores parâmetros: {'subsample': 0.8, 'n_estimators': 500, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'max_depth': 4, 'learning_rate': 0.01}


In [178]:
first_test_date = test.index.min()

fig = make_subplots(rows=2,cols=1,shared_xaxes=True)
fig.add_trace(go.Scatter(x=df.index,y=df['HistVol'].rolling(v).mean(),
                         mode="lines",
                         name='Volatilidade Observada'),row=1,col=1)
fig.add_trace(go.Scatter(x=df.index + pd.Timedelta(days=v),y=df['Predicted_FutureVolatility'].rolling(v).mean(),
                         mode='lines',
                         name='Volatilidade Futura Prevista'),row=1,col=1)
fig.add_trace(go.Scatter(x=df.index + pd.Timedelta(days=v),y=df['Close'],
                         mode='lines',
                         name='Close'),row=2,col=1)
fig.add_shape(
    dict(
      type='line',
      x0=first_test_date,x1=first_test_date,
      y0=df['HistVol'].min(), y1=df['HistVol'].max(),
      line=dict(color='red',width=2,dash='dash')
    ),row=1,col=1
)

fig.update_layout(title = ticker + ' Comparação entre Volatilidade Observada e Prevista',
                  xaxis2_title='Data',
                  yaxis_title='Volatilidade',
                  yaxis2_title='Preço de Fechamento')
fig.show()