In [102]:
!pip install -r requirements.txt



In [103]:
import yfinance as yf
import json
import pandas as pd
import ta
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [104]:
def download_stock_data(ticker, start_date, end_date):
    """
    Downloads historical stock data for a given ticker symbol between specified dates.

    Parameters:
    ticker (str): The stock ticker symbol.
    start_date (str): The start date in 'YYYY-MM-DD' format.
    end_date (str): The end date in 'YYYY-MM-DD' format.

    Returns:
    pd.DataFrame: A DataFrame containing the stock data.
    """
    stock = yf.download(ticker, start=start_date, end=end_date)
    data = stock.xs(ticker, level="Ticker", axis=1)
    if data.empty:
        raise ValueError(f"No data found for ticker {ticker} between {start_date} and {end_date}.")
    return data

In [105]:
df = download_stock_data('PETR4.SA', '2020-01-01', '2025-01-01')
df.head()

  stock = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-02,9.303457,9.303457,9.18527,9.245879,37774500
2020-01-03,9.227698,9.467103,9.227698,9.358007,71595600
2020-01-06,9.336793,9.376189,9.076175,9.221636,81844000
2020-01-07,9.300428,9.358006,9.233758,9.339824,32822000
2020-01-08,9.242848,9.32467,9.164057,9.300427,48215600


## Preenchendo valores nulos

In [106]:
df_filled = df.copy()
df_filled['Close'] = df_filled['Close'].fillna(method='ffill')
df_filled['Volume'] = df_filled['Volume'].fillna(method='ffill')
df_filled['High'] = df_filled['High'].fillna(method='ffill')
df_filled['Low'] = df_filled['Low'].fillna(method='ffill')

  df_filled['Close'] = df_filled['Close'].fillna(method='ffill')
  df_filled['Volume'] = df_filled['Volume'].fillna(method='ffill')
  df_filled['High'] = df_filled['High'].fillna(method='ffill')
  df_filled['Low'] = df_filled['Low'].fillna(method='ffill')


## Feature engenniring 

### Momentum 

In [107]:
df['RSI'] = ta.momentum.rsi(df_filled['Close'], window=14) # Relative Strength Index
df['MACD'] = ta.trend.macd(df_filled['Close']) # Moving Average Convergence Divergence (também mede tendência)

### Tendência

In [108]:
df['SMA_10'] = ta.trend.sma_indicator(df_filled['Close'], window=10) # Simple Moving Average
df['SMA_50'] = ta.trend.sma_indicator(df_filled['Close'], window=50) # Simple Moving Average
df['SMA_200'] = ta.trend.sma_indicator(df_filled['Close'], window=200) # Simple Moving Average
df['EMA_10'] = ta.trend.ema_indicator(df_filled['Close'], window=10) # Exponential Moving Average
df['EMA_50'] = ta.trend.ema_indicator(df_filled['Close'], window=50) # Exponential Moving Average
df['EMA_200'] = ta.trend.ema_indicator(df_filled['Close'], window=200) # Exponential Moving Average

### Volatilidade

In [109]:
df['ART'] = ta.volatility.average_true_range(df_filled['High'], df_filled['Low'], df_filled['Close'], window=14) # Average True Range
df['BB_High'] = ta.volatility.bollinger_hband(df_filled['Close'], window=20, window_dev=2) # [23, 24, 25, 26, 2, 27, 28, 29, 30, 31, 32, 33]
df['BB_MM'] = ta.volatility.bollinger_mavg(df_filled['Close'], window=20) # [23, 24, 25, 26, 2, 27, 28, 29, 30, 31, 32, 33]
df['BB_Lower'] = ta.volatility.bollinger_lband(df_filled['Close'], window=20, window_dev=2) # [23, 24, 25, 26, 2, 27, 28, 29, 30, 31, 32, 33]
df['BB_Width'] = ta.volatility.bollinger_wband(df_filled['Close'], window=20, window_dev=2) # Largura das Bandas de Bollinger [34, 35, 36, 29]

### Volume

In [110]:

df['Volume_OBV'] = ta.volume.on_balance_volume(df_filled['Close'], df_filled['Volume']) # On-Balance Volume

In [111]:
df.tail()

Price,Close,High,Low,Open,Volume,RSI,MACD,SMA_10,SMA_50,SMA_200,EMA_10,EMA_50,EMA_200,ART,BB_High,BB_MM,BB_Lower,BB_Width,Volume_OBV
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2024-12-20,33.633247,34.089601,33.505469,33.916188,59277400,45.608793,0.309008,34.701216,33.111401,32.031378,34.362004,33.548936,31.714008,0.668415,35.522135,34.527679,33.533224,5.760338,4646570200
2024-12-23,33.642376,33.815789,33.514598,33.770154,43785600,45.716678,0.227853,34.550684,33.123254,32.061752,34.231163,33.5526,31.733195,0.642185,35.556191,34.49016,33.424128,6.181656,4690355800
2024-12-26,33.895279,34.113224,33.734187,33.762617,22920700,48.74964,0.181848,34.412276,33.140692,32.088867,34.170093,33.566039,31.754709,0.629947,35.56345,34.46748,33.37151,6.359443,4713276500
2024-12-27,33.791042,34.113223,33.743664,34.113223,24167200,47.569907,0.135417,34.228332,33.154465,32.117162,34.101175,33.574862,31.774971,0.611348,35.57671,34.445733,33.314757,6.566714,4689109300
2024-12-30,34.293266,34.463833,33.89528,33.904755,22355600,53.418934,0.13756,34.15834,33.183726,32.148587,34.1361,33.603035,31.800028,0.615736,35.569752,34.466653,33.363555,6.400962,4711464900


In [112]:
df_cleaned = df.dropna()
df_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1045 entries, 2020-10-19 to 2024-12-30
Data columns (total 19 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Close       1045 non-null   float64
 1   High        1045 non-null   float64
 2   Low         1045 non-null   float64
 3   Open        1045 non-null   float64
 4   Volume      1045 non-null   int64  
 5   RSI         1045 non-null   float64
 6   MACD        1045 non-null   float64
 7   SMA_10      1045 non-null   float64
 8   SMA_50      1045 non-null   float64
 9   SMA_200     1045 non-null   float64
 10  EMA_10      1045 non-null   float64
 11  EMA_50      1045 non-null   float64
 12  EMA_200     1045 non-null   float64
 13  ART         1045 non-null   float64
 14  BB_High     1045 non-null   float64
 15  BB_MM       1045 non-null   float64
 16  BB_Lower    1045 non-null   float64
 17  BB_Width    1045 non-null   float64
 18  Volume_OBV  1045 non-null   int64  
dtypes: float6

### Normalizando dados

In [113]:
scaler_minmax = MinMaxScaler()
features_to_scale = ['RSI', 'MACD', 
                  'SMA_10', 'SMA_50', 'SMA_200', 'EMA_10', 'EMA_50', 'EMA_200',
                  'ART', 'BB_High', 'BB_MM', 'BB_Lower', 'BB_Width', 
                  'Volume_OBV']
df_scaled_minmax = scaler_minmax.fit_transform(df_cleaned[features_to_scale])
df_scaled_minmax = pd.DataFrame(df_scaled_minmax, columns=features_to_scale, index=df_cleaned.index)

In [114]:
df_scaled_minmax

Unnamed: 0_level_0,RSI,MACD,SMA_10,SMA_50,SMA_200,EMA_10,EMA_50,EMA_200,ART,BB_High,BB_MM,BB_Lower,BB_Width,Volume_OBV
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-10-19,0.254100,0.340983,0.002179,0.009180,0.013706,0.001903,0.005141,0.004155,0.003366,0.002755,0.001103,0.018215,0.110983,0.023914
2020-10-20,0.375924,0.349226,0.002336,0.008593,0.013090,0.002642,0.004813,0.003865,0.006150,0.001337,0.000721,0.018925,0.088387,0.035481
2020-10-21,0.372717,0.356139,0.002577,0.007852,0.012488,0.003209,0.004490,0.003576,0.002241,0.000000,0.000382,0.019635,0.066556,0.025235
2020-10-22,0.484575,0.369103,0.003676,0.007347,0.011904,0.004971,0.004475,0.003372,0.006144,0.001360,0.000705,0.018868,0.089255,0.041963
2020-10-23,0.437373,0.376929,0.003812,0.006692,0.011312,0.005898,0.004343,0.003137,0.004833,0.001689,0.000796,0.018707,0.094468,0.031528
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-20,0.336095,0.538132,0.993401,0.997313,0.995473,0.985038,0.998026,0.996567,0.571556,0.988315,0.998302,0.995180,0.030354,0.882880
2024-12-23,0.337933,0.503746,0.988198,0.997753,0.996646,0.980503,0.998159,0.997333,0.540088,0.989464,0.996989,0.991343,0.039737,0.890284
2024-12-26,0.389594,0.484253,0.983415,0.998401,0.997693,0.978386,0.998650,0.998192,0.525405,0.989709,0.996195,0.989492,0.043696,0.894160
2024-12-27,0.369499,0.464580,0.977057,0.998913,0.998786,0.975997,0.998972,0.999000,0.503091,0.990157,0.995434,0.987496,0.048312,0.890073
