# Predicción de Stocks con LSTM Bayesiano (con Keras)

### Librerías

In [2]:
import pandas as pd
import numpy as np
import mysql.connector
import plotly.express as px

### llamado de datos

In [3]:
tabla='2828HK'
conn = mysql.connector.connect(user='root', password='', host='localhost', database='stock_exchange')
stock = pd.DataFrame(pd.read_sql("SELECT * FROM "+tabla, conn))
conn.close()
stock.head(5)



Unnamed: 0,index,Date,Open,High,Low,Close,Volume
0,0,2019-01-02,102.800003,102.800003,99.25,99.5,12118897
1,1,2019-01-03,99.949997,100.400002,98.849998,99.599998,3562140
2,2,2019-01-04,99.0,101.699997,99.0,101.300003,2854212
3,3,2019-01-07,102.400002,103.300003,102.0,102.300003,3527707
4,4,2019-01-08,102.699997,103.199997,102.0,102.400002,4159683


### Selección y Preparación de Datos

In [4]:
Selecccionadas=['Date', 'Open']
stock_2=stock[Selecccionadas]
stock_2=stock_2.loc[(stock_2["Date"] >= '2022-06-01')]
stock_2.head(5)

Unnamed: 0,Date,Open
840,2022-06-01,75.300003
841,2022-06-02,73.800003
842,2022-06-06,74.139999
843,2022-06-07,76.139999
844,2022-06-08,76.400002


In [5]:
stock_2['Open']=np.log(stock_2['Open'])
stock_2.head(5)

Unnamed: 0,Date,Open
840,2022-06-01,4.32148
841,2022-06-02,4.301359
842,2022-06-06,4.305955
843,2022-06-07,4.332574
844,2022-06-08,4.335983


In [6]:
plot_length = 150
plot_df = stock_2.copy(deep=True).iloc[:plot_length]
plot_df['Date'] = plot_df['Date']

fig = px.line(plot_df,
              x="Date",
              y="Open",
              title="Log de Open Stock vs Time")
fig.update_layout({
    'plot_bgcolor': 'rgba(0,0,0,0)',
    'paper_bgcolor': 'rgba(0,0,0,0)',
    'yaxis.color' : 'white',
    'xaxis.color' : 'white',
    'title_font_color' : 'white'
})
fig.update_xaxes(showgrid=False, zeroline=True)
fig.update_yaxes(showgrid=False, zeroline=True)
fig.show()

### División entre Entrenamiento y Prueba

In [7]:
from sklearn.preprocessing import MinMaxScaler

def create_sliding_window(data, sequence_length, stride=1):
    X_list, y_list = [], []
    for i in range(len(data)):
      if (i + sequence_length) < len(data):
        X_list.append(data.iloc[i:i+sequence_length:stride, :].values)
        y_list.append(data.iloc[i+sequence_length, -1])
    return np.array(X_list), np.array(y_list)

train_split = 0.7
n_train = int(train_split * len(stock_2))
n_test = len(stock_2) - n_train

features = ['Open']
feature_array = stock_2[features].values

# Fit Scaler only on Training features
feature_scaler = MinMaxScaler()
feature_scaler.fit(feature_array[:n_train])
# Fit Scaler only on Training target values
target_scaler = MinMaxScaler()
target_scaler.fit(feature_array[:n_train, -1].reshape(-1, 1))

# Transfom on both Training and Test data
scaled_array = pd.DataFrame(feature_scaler.transform(feature_array),
                            columns=features)

sequence_length = 10
X, y = create_sliding_window(scaled_array, 
                             sequence_length)

X_train = X[:n_train]
y_train = y[:n_train]

X_test = X[n_train:]
y_test = y[n_train:]

### Arquitectura del Modelo Bayesiano LSTM

In [None]:
class BayesianLSTM(nn.Module):
    
    def __init__(self, n_features, output_length, batch_size):

        super(BayesianLSTM, self).__init__()

        self.batch_size = batch_size # user-defined

        self.hidden_size_1 = 128 # number of encoder cells (from paper)
        self.hidden_size_2 = 32 # number of decoder cells (from paper)
        self.stacked_layers = 2 # number of (stacked) LSTM layers for each stage
        self.dropout_probability = 0.5 # (Girar) arbitrary value (the paper suggests that performance is generally stable across all ranges)

        self.lstm1 = nn.LSTM(n_features, 
                             self.hidden_size_1, 
                             num_layers=self.stacked_layers,
                             batch_first=True)
        self.lstm2 = nn.LSTM(self.hidden_size_1,
                             self.hidden_size_2,
                             num_layers=self.stacked_layers,
                             batch_first=True)
        
        self.fc = nn.Linear(self.hidden_size_2, output_length) #red densa
        self.loss_fn = nn.MSELoss()
        
    def forward(self, x):
        batch_size, seq_len, _ = x.size()

        hidden = self.init_hidden1(batch_size)
        output, _ = self.lstm1(x, hidden)
        output = F.dropout(output, p=self.dropout_probability, training=True)
        state = self.init_hidden2(batch_size)
        output, state = self.lstm2(output, state)
        output = F.dropout(output, p=self.dropout_probability, training=True)
        output = output[:, -1, :] # take the last decoder cell's outputs
        y_pred = self.fc(output)
        #y_predic(gaus)=.....
        return y_pred # si se asigna una distribución gaussina
        
    def init_hidden1(self, batch_size):
        hidden_state = Variable(torch.zeros(self.stacked_layers, batch_size, self.hidden_size_1))
        cell_state = Variable(torch.zeros(self.stacked_layers, batch_size, self.hidden_size_1))
        return hidden_state, cell_state
    
    def init_hidden2(self, batch_size):
        hidden_state = Variable(torch.zeros(self.stacked_layers, batch_size, self.hidden_size_2))
        cell_state = Variable(torch.zeros(self.stacked_layers, batch_size, self.hidden_size_2))
        return hidden_state, cell_state
    
    def loss(self, pred, truth):
        return self.loss_fn(pred, truth)

    def predict(self, X):
        return self(torch.tensor(X, dtype=torch.float32)).view(-1).detach().numpy()

### Con TensorFlow

In [12]:
import tensorflow as tf

# Define the model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(128)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Fit the model
model.fit(X_train, y_train, epochs=5)

TypeError: 'int' object is not iterable

In [14]:
from tensorflow.keras import layers
model=tf.keras.models.Sequential(
    [
        layers.Dense(20, activation='relu'),
        layers.Dense(1, activation='relu'),
        layers.Dense(1)
    ]
)

In [15]:
model.compile(loss=tf.keras.losses.Huber(), optimizer='adam', metrics='mse')

In [16]:
history=model.fit(X_train,  epochs=20, verbose=1)

Epoch 1/20


ValueError: in user code:

    File "C:\Users\rodrigo.gomez\AppData\Roaming\Python\Python39\site-packages\keras\engine\training.py", line 1249, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\rodrigo.gomez\AppData\Roaming\Python\Python39\site-packages\keras\engine\training.py", line 1233, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\rodrigo.gomez\AppData\Roaming\Python\Python39\site-packages\keras\engine\training.py", line 1222, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\rodrigo.gomez\AppData\Roaming\Python\Python39\site-packages\keras\engine\training.py", line 1025, in train_step
        self._validate_target_and_loss(y, loss)
    File "C:\Users\rodrigo.gomez\AppData\Roaming\Python\Python39\site-packages\keras\engine\training.py", line 979, in _validate_target_and_loss
        raise ValueError(

    ValueError: Target data is missing. Your model was compiled with loss=<keras.losses.Huber object at 0x000002C9C8433D30>, and therefore expects target data to be provided in `fit()`.
