# GNU

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import GRU, Dense, Dropout

# Set default figure size
figsize = (15,9)
plt.style.use('ggplot')

# Set default float size
pd.set_option('display.float_format', lambda x: '%.2f' % x)


2024-07-10 14:59:40.342915: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
df = pd.read_parquet("../data/3-processed/stock-history-features.parquet")
df.insert(13, "close", df.pop("close"))
df.drop("date", axis=1, inplace=True)


In [4]:
class StockData:
    def __init__(self, stock, data) -> None:
        self.data = data.query(f"stock == '{stock}'").copy()
        self.data.drop("stock", axis=1, inplace=True)
        self.scaler = MinMaxScaler()
        self.data_scaled = self.scaler.fit_transform(self.data)

    def create_sequences(self, data, look_back=30):
        X, y = [], []

        for i in range(len(data) - look_back):
            X.append(data[i : i + look_back])
            y.append(data[i + look_back])

        return np.array(X), np.array(y)

    def split(self, train_factor=0.8):
        split = int(len(self.data_scaled) * train_factor)
        X_train, y_train = self.create_sequences(self.data_scaled[:split])
        X_test, y_test = self.create_sequences(self.data_scaled[split:])

        return X_train, y_train, X_test, y_test


In [5]:
TSLA = StockData("TSLA", df)

In [7]:
class GRU_model:
    def __init__(self, stock, units=64, dropout=0.2):
        self.stock = stock
        self.units = units
        self.dropout = dropout
        self.X_train, self.y_train, self.X_test, self.y_test = stock.split()
        self.model = None

    def build_model(self, input_shape, optimizer="rmsprop", loss="mean_squared_error"):
        self.model = Sequential()
        self.model.add(GRU(self.units, return_sequences=True, input_shape=input_shape))
        self.model.add(Dropout(self.dropout))
        self.model.add(GRU(self.units, return_sequences=True))
        self.model.add(Dropout(self.dropout))
        self.model.add(GRU(self.units))
        self.model.add(Dropout(self.dropout))
        self.model.add(Dense(units=1))

        self.model.compile(optimizer=optimizer, loss=loss)
        print(self.model.summary())
        
    def print_summary(self):
        if self.model is None:
            raise ValueError("Model is not defined. Call build_model() first.")
        
        print(self.model.summary())
        
    def train(self, epochs=30, batch_size=32, validation_split=0.2):
        if self.model is None:
            raise ValueError(
                "Model is not defined. Call define_model() before training."
            )

        early_stopping = EarlyStopping(monitor="val_loss", patience=7, mode="min")
        history = self.model.fit(
            self.X_train,
            self.y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=validation_split,
            callbacks=[early_stopping],
            verbose=1
        )

        return history

    def evaluate(self):
        if self.model is None:
            raise ValueError(
                "Model is not defined. Call define_model() before training."
            )

        loss = self.model.evaluate(self.X_test, self.y_test)
        print(f"Test Loss: {loss}")
        return loss

    def predict(self):
        if self.model is None:
            raise ValueError(
                "Model is not defined. Call define_model() before training."
            )

        predictions = self.model.predict(self.X_test)
        predictions = self.stock.scaler.inverse_transform(predictions)

        return predictions

    def plot_history(self, history):
        plt.plot(history.history["loss"], label="Train Loss")
        plt.plot(history.history["val_loss"], label="Validation Loss")
        plt.title(f"{self.stock} Model Loss")
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
        plt.legend()
        plt.show()
