<a href="https://colab.research.google.com/github/leandrocandida/CleanArch/blob/master/Untitled11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
import plotly.graph_objects as go
from plotly.subplots import make_subplots


In [72]:
ticker = "PETR4.SA"
df = yf.download(ticker,period='10y',interval='1D')
df = df.xs((ticker),level="Ticker",axis=1)

[*********************100%***********************]  1 of 1 completed


In [73]:
df['Return'] = df['Close'].pct_change()

In [74]:
v = 20
df['HistVol'] = df['Return'].rolling(window=v).std()*100

In [75]:
df['FutureVolatility'] = df['HistVol'].shift(-v)

In [76]:
for lag in range(1,11):
  df[f'lag_{lag}'] = df['HistVol'].shift(lag)

In [77]:
# Calcular a Média Móvel Exponencial (MME)
df['MME'] = df['Close'].ewm(span=22, adjust=False).mean()

 # Calcular o RSL (Relative Strength Levy)
df['RSL'] = df['Close'] / df['MME']

In [78]:
df['volume_medio'] = df['Volume'].rolling(window=20).mean()

In [79]:
df = df.dropna().copy()

In [80]:
features = [f'lag_{lag}' for lag in range(1,11)]
target = 'FutureVolatility'

In [81]:
features.append('RSL')
features.append('volume_medio')

features

['lag_1',
 'lag_2',
 'lag_3',
 'lag_4',
 'lag_5',
 'lag_6',
 'lag_7',
 'lag_8',
 'lag_9',
 'lag_10',
 'RSL',
 'volume_medio']

In [82]:
df = df.iloc[v*2:]
df.index = pd.to_datetime(df.index)
train = df.loc["2017-01-01":"2020-12-31"]
test = df.loc["2021-01-01":"2025-12-31"]

X_train = train[features]
y_train = train[target]
X_test = test[features]
y_test = test[target]

In [83]:
'''from sklearn.model_selection import RandomizedSearchCV

# Definir o espaço de busca dos hiperparâmetros
param_grid = {
    'n_estimators': [500, 1000, 1500, 2000],
    'learning_rate': [0.01, 0.03, 0.05, 0.1, 0.2],
    'max_depth': [3, 4, 5, 6],
    'min_samples_leaf': [1, 3, 5, 7],
    'subsample': [0.7, 0.8, 0.9, 1.0],
    'max_features': ['sqrt', 'log2', None]
}

# Criar o modelo base
model = GradientBoostingRegressor(random_state=42)

# Configurar o RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=model,
    param_distributions=param_grid,
    n_iter=50,  # Número de combinações a testar
    scoring='neg_mean_squared_error',
    cv=5,  # Validação cruzada com 5 folds
    verbose=1,
    n_jobs=-1,  # Paralelização total
    random_state=42
)

# Executar o RandomizedSearchCV (supondo que X_train e y_train já estejam definidos)
random_search.fit(X_train, y_train)

# Melhor conjunto de hiperparâmetros encontrados
print("Melhores hiperparâmetros:", random_search.best_params_)

# Melhor modelo treinado
best_model = random_search.best_estimator_
'''

'from sklearn.model_selection import RandomizedSearchCV\n\n# Definir o espaço de busca dos hiperparâmetros\nparam_grid = {\n    \'n_estimators\': [500, 1000, 1500, 2000],\n    \'learning_rate\': [0.01, 0.03, 0.05, 0.1, 0.2],\n    \'max_depth\': [3, 4, 5, 6],\n    \'min_samples_leaf\': [1, 3, 5, 7],\n    \'subsample\': [0.7, 0.8, 0.9, 1.0],\n    \'max_features\': [\'sqrt\', \'log2\', None]\n}\n\n# Criar o modelo base\nmodel = GradientBoostingRegressor(random_state=42)\n\n# Configurar o RandomizedSearchCV\nrandom_search = RandomizedSearchCV(\n    estimator=model,\n    param_distributions=param_grid,\n    n_iter=50,  # Número de combinações a testar\n    scoring=\'neg_mean_squared_error\',\n    cv=5,  # Validação cruzada com 5 folds\n    verbose=1,\n    n_jobs=-1,  # Paralelização total\n    random_state=42\n)\n\n# Executar o RandomizedSearchCV (supondo que X_train e y_train já estejam definidos)\nrandom_search.fit(X_train, y_train)\n\n# Melhor conjunto de hiperparâmetros encontrados\npri

In [84]:
model = GradientBoostingRegressor(
    n_estimators=1500,
    learning_rate=0.05,
    max_depth=3,
    min_samples_leaf=5,
    subsample=0.8,
    max_features='sqrt',
    random_state=42

)

model.fit(X_train,y_train)

df['Predicted_FutureVolatility'] = model.predict(df[features])
previsoes = model.predict(X_test)

In [85]:
from sklearn.metrics import mean_squared_error

# Avaliando o modelo
erro_quadratico_medio = mean_squared_error(y_test, previsoes)
print(f'Erro Quadrático Médio: {erro_quadratico_medio}')

Erro Quadrático Médio: 1.9196016531645395


In [86]:
first_test_date = test.index.min()

fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.25)

# Adicionando gráfico de RSL
fig.add_trace(go.Scatter(x=df.index,y=df['HistVol'].rolling(v).mean(),
                         mode='lines',
                         name='Volatilidade Observada'),row=1,col=1)

fig.add_trace(go.Scatter(x=df.index + pd.Timedelta(days=v),y=df['Predicted_FutureVolatility'].rolling(v).mean(),
                         mode='lines',
                         name='Volatilidade Futura Prevista'),row=1,col=1)

# Adicionando gráfico de preços
fig.add_trace(go.Scatter(x=df.index ,y=df['Close'].values,
                         mode='lines', name=f'Close'), row=2, col=1)

fig.add_shape(
    dict(
      type='line',
      x0=first_test_date,x1=first_test_date,
      y0=df['HistVol'].min(), y1=df['HistVol'].max(),
      line=dict(color='red',width=2,dash='dash')
    ),row=1,col=1
)

fig.update_layout(title = ticker + 'Comparação entre Volatilidade Observada e Prevista',
                  xaxis2_title='Data',
                  yaxis_title='Volatilidade',
                  yaxis2_title='Preço de Fechamento',
                  template='plotly_dark',
                  font=dict(size=15),
                  height=900)

fig.show()



In [17]:
df1 = df.copy()
df1['Vol'] = np.round(df['Return'].rolling(window=v).std()*np.sqrt(252),4)
df1.tail()

Price,Close,High,Low,Open,Volume,Return,HistVol,FutureVolatility,lag_1,lag_2,...,lag_5,lag_6,lag_7,lag_8,lag_9,lag_10,MME,RSL,Predicted_FutureVolatility,Vol
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-02-27,36.610001,36.740002,35.48,36.360001,129693500,-0.03531,1.714844,1.973687,1.15512,1.307272,...,1.256393,1.270451,1.321814,1.266345,1.260577,0.718354,37.458914,0.977337,1.311345,0.2722
2025-02-28,35.93,36.549999,35.810001,36.490002,55476800,-0.018574,1.448487,1.8753,1.714844,1.15512,...,1.233367,1.256393,1.270451,1.321814,1.266345,1.260577,37.325965,0.962601,1.122402,0.2299
2025-03-05,34.619999,35.110001,34.549999,35.02,48552000,-0.03646,1.718034,1.352677,1.448487,1.714844,...,1.286144,1.233367,1.256393,1.270451,1.321814,1.266345,37.090663,0.933389,1.523002,0.2727
2025-03-06,34.259998,35.119999,34.029999,34.810001,47749600,-0.010399,1.44203,1.254127,1.718034,1.448487,...,1.307272,1.286144,1.233367,1.256393,1.270451,1.321814,36.844518,0.929853,1.827847,0.2289
2025-03-07,34.630001,34.93,34.23,34.290001,28074700,0.0108,1.552931,1.283418,1.44203,1.718034,...,1.15512,1.307272,1.286144,1.233367,1.256393,1.270451,36.651952,0.944834,1.77359,0.2465


In [18]:
fig = make_subplots(rows=1, cols=1, shared_xaxes=True, vertical_spacing=0.08)

fig.add_trace(go.Scatter(x=df1.index,y=df1['Vol']*100,
                         mode='lines',
                         name='Volatilidade', line = dict(color = "blue")), row=1,col=1)

fig.update_layout(title = ticker + ' Volatilidade Anualizada',
                  font_color='blue',
                  title_font_color='white',
                  xaxis_title='Anos',
                  yaxis_title='Volatilidade (%)',
                  template='plotly_dark',
                  font=dict(size=15,color = "white"),
                  height=600, width=800)
fig.update_layout(hovermode= 'x')

dt_all = pd.date_range(start = df1.index[0],
                       end = df1.index[-1],
                       freq = 'D')

dt_all_py = [d.to_pydatetime()  for d in dt_all]
dt_obs_py = [d.to_pydatetime()  for d in df1.index]

dt_breaks = [d for d in dt_all_py if d not in dt_obs_py]

fig.update_xaxes(
    rangebreaks=[
        dict(values=dt_breaks)
    ]
)
fig.show()