In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from keras.models import Sequential
from keras.layers import LSTM, Dense

In [30]:
# Carregar Dados Diretamente da API
@st.cache_data
def load_data():
    # Código da série do Petróleo Brent
    codigo_serie = "EIA366_PBRENT366"

    # Obter os dados da API
    df = timeseries(codigo_serie)

    # Converter 'RAW DATE' para datetime
    df['RAW DATE'] = pd.to_datetime(df['RAW DATE'], errors='coerce', utc=True)

    # Remover linhas inválidas (sem data ou valor de preço)
    df = df.dropna(subset=['RAW DATE', 'VALUE (US$)'])

    # Renomear as colunas
    df.rename(columns={'VALUE (US$)': 'price', 'RAW DATE': 'date'}, inplace=True)

    return df

# Carregar os dados
df = load_data()

2024-11-17 01:31:26.311 No runtime found, using MemoryCacheStorageManager


In [None]:
# Detectar outliers (usando o método IQR)
Q1 = df['price'].quantile(0.25)
Q3 = df['price'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
data = df[(df['price'] >= lower_bound) & (df['price'] <= upper_bound)]

                        CODE                      date  DAY  MONTH  YEAR  \
DATE                                                                       
1987-05-20  EIA366_PBRENT366 1987-05-20 03:00:00+00:00   20      5  1987   
1987-05-21  EIA366_PBRENT366 1987-05-21 03:00:00+00:00   21      5  1987   
1987-05-22  EIA366_PBRENT366 1987-05-22 03:00:00+00:00   22      5  1987   
1987-05-25  EIA366_PBRENT366 1987-05-25 03:00:00+00:00   25      5  1987   
1987-05-26  EIA366_PBRENT366 1987-05-26 03:00:00+00:00   26      5  1987   

            price  
DATE               
1987-05-20  18.63  
1987-05-21  18.45  
1987-05-22  18.55  
1987-05-25  18.60  
1987-05-26  18.63  


In [33]:
# Dividir os dados em treinamento e teste aleatoriamente
train_data = data.sample(frac=0.8, random_state=42)
test_data = data.drop(train_data.index)

# Ordenar os dados por data após a amostragem
train_data = train_data.sort_values('DATE')
test_data = test_data.sort_values('DATE')

In [34]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_train_data = scaler.fit_transform(train_data['price'].values.reshape(-1, 1))
scaled_test_data = scaler.transform(test_data['price'].values.reshape(-1, 1))

In [35]:
# Função para criar uma estrutura de dados com janelas de tempo
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i:(i + time_step), 0]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

# Criar o conjunto de dados para o modelo LSTM
time_step = 10
X_train, y_train = create_dataset(scaled_train_data, time_step)
X_test, y_test = create_dataset(scaled_test_data, time_step)

# Redimensionar a entrada para [amostras, time steps, features] que é necessário para LSTM
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [36]:
# Criar o modelo LSTM
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(time_step, 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

# Treinar o modelo
model.fit(X_train, y_train, epochs=50, batch_size=1, verbose=1)

  super().__init__(**kwargs)


Epoch 1/50
[1m9027/9027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 6ms/step - loss: 0.0013
Epoch 2/50
[1m9027/9027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 5ms/step - loss: 2.9582e-04
Epoch 3/50
[1m9027/9027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 5ms/step - loss: 2.2686e-04
Epoch 4/50
[1m9027/9027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 6ms/step - loss: 2.2800e-04
Epoch 5/50
[1m9027/9027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 5ms/step - loss: 2.0932e-04
Epoch 6/50
[1m9027/9027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 5ms/step - loss: 1.9550e-04
Epoch 7/50
[1m9027/9027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 5ms/step - loss: 1.9363e-04
Epoch 8/50
[1m9027/9027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 5ms/step - loss: 1.8438e-04
Epoch 9/50
[1m9027/9027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 6ms/step - loss: 1.7928e-04
Epoch 10/50
[1m9027/9027[0m [

<keras.src.callbacks.history.History at 0x1bbfe8a7cd0>

In [1]:
import joblib

# Salvar o modelo
model.save("modelo_lstm_brent.h5")

# Salvar os dados de treino e teste
joblib.dump((X_train, X_test, y_train, y_test, scaler), "dados_treinamento.pkl")


NameError: name 'model' is not defined