MODEL GRU - GENERACIÓ DE PREUS

In [None]:
import os
import json
import shutil
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Bidirectional, GRU, Dense, Dropout, Input, Layer
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import regularizers
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import itertools
import random
import keras_tuner as kt

# Funció per crear seqüències temporals
def crear_sequencies(X, y, look_back):
    X_seq, y_seq = [], []
    for i in range(len(X) - look_back):
        X_seq.append(X[i:(i + look_back), :])
        y_seq.append(y[i + look_back])
    return np.array(X_seq), np.array(y_seq)

# Capa d’atenció simple
class Attention(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def build(self, input_shape):
        self.W = self.add_weight(name="attn_W",
                                 shape=(input_shape[-1], 1),
                                 initializer="glorot_uniform",
                                 trainable=True)
        super().build(input_shape)
    def call(self, inputs):
        score   = tf.matmul(tf.tanh(inputs), self.W)   # (batch, timesteps, 1)
        weights = tf.nn.softmax(score, axis=1)          # (batch, timesteps, 1)
        context = tf.reduce_sum(weights * inputs, axis=1)  # (batch, features)
        return context

# Funció build_model per a KerasTuner
def build_model(hp, look_back, n_features):
    model = Sequential()
    model.add(Input(shape=(look_back, n_features)))

    # Conv1D + MaxPooling1D
    conv_filters = hp.Choice('conv_filters', [16, 32, 64])
    kernel_size  = hp.Choice('kernel_size', [3, 5])
    model.add(Conv1D(filters=conv_filters,
                     kernel_size=kernel_size,
                     activation='relu',
                     padding='same'))
    model.add(MaxPooling1D(pool_size=2))

    # Capa Bidirectional GRU
    gru_units    = hp.Choice('gru_units', [32, 64, 128])
    dropout_rate = hp.Float('dropout_rate', 0.1, 0.4, step=0.1)
    reg_factor   = hp.Choice('l2_reg', [1e-4, 1e-3])

    model.add(Bidirectional(
        GRU(units=gru_units,
            return_sequences=True,
            recurrent_dropout=dropout_rate,
            kernel_regularizer=regularizers.l2(reg_factor)
        )
    ))

    # Capa d’atenció
    model.add(Attention())
    model.add(Dropout(dropout_rate))

    # Capa densa intermèdia
    dense_units = hp.Choice('dense_units', [16, 32, 64])
    model.add(Dense(dense_units,
                    activation='relu',
                    kernel_regularizer=regularizers.l2(reg_factor)))
    model.add(Dropout(dropout_rate))

    # Capa de sortida
    model.add(Dense(1, activation='linear'))

    # Optimitzador amb learning rate variable
    learning_rate = hp.Float('learning_rate', 1e-4, 1e-2, sampling='log')
    optimizer     = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    model.compile(optimizer=optimizer,
                  loss='mse',
                  metrics=['mae'])
    return model

# Llista de fitxers a processar
dataset_files = [
    "Amazon_Stock_Price_output.csv",
    "Euro_Stoxx_50_Stock_Price_output.csv",
    "Google_Stock_Price_output.csv",
    "Hang_Seng_Stock_Price_output.csv",
    "IBEX_35_Stock_Price_output.csv",
    "Indra_Stock_Price_output.csv",
    "P&G_Stock_Price_output.csv",
    "S&P500_Stock_Price_output.csv"
]

BASE_PATH = r"C:\Users\jesus\Desktop\TFG\GitHUb\TFG_PredictStock\Conjunt de dades Preprocessades\Datasets"
look_back = 20  # finestra de 20 dies

for file_name in dataset_files:
    print(f"\n\n====== Processant {file_name} ======\n")

    # Carrega i preprocessament de dades 
    file_path = os.path.join(BASE_PATH, file_name)
    df = pd.read_csv(file_path)
    df['Date'] = pd.to_datetime(df['Date'], dayfirst=False)
    df.sort_values('Date', inplace=True)
    df.reset_index(drop=True, inplace=True)

    # Llista de features i target
    features = [
        'Open', 'High', 'Low', 'Volume',
        'EMA_7', 'EMA_40', 'MACD', 'Signal_Line',
        'MACD_Hist', 'RSI', 'ATR'
    ]
    target = 'Close'

    # Dropna de files amb NaNs a features o target
    df.dropna(subset=features + [target], inplace=True)
    df.reset_index(drop=True, inplace=True)

    # Split cronològic train/val/test
    n_total    = len(df)
    train_size = int(n_total * 0.70)
    val_size   = int(n_total * 0.15)

    X_raw = df[features].values
    y_raw = df[target].values

    X_train_raw = X_raw[:train_size]
    y_train_raw = y_raw[:train_size]

    X_val_raw   = X_raw[train_size : train_size + val_size]
    y_val_raw   = y_raw[train_size : train_size + val_size]

    X_test_raw  = X_raw[train_size + val_size :]
    y_test_raw  = y_raw[train_size + val_size :]

    # Escalat de X i y 
    scaler_X = StandardScaler()
    X_train  = scaler_X.fit_transform(X_train_raw)
    X_val    = scaler_X.transform(X_val_raw)
    X_test   = scaler_X.transform(X_test_raw)

    scaler_y = StandardScaler()
    y_train  = scaler_y.fit_transform(y_train_raw.reshape(-1,1)).ravel()
    y_val    = scaler_y.transform(y_val_raw.reshape(-1,1)).ravel()
    y_test   = scaler_y.transform(y_test_raw.reshape(-1,1)).ravel()

    # Creació de seqüències temporals 
    X_train_seq, y_train_seq = crear_sequencies(X_train, y_train, look_back)
    X_val_seq,   y_val_seq   = crear_sequencies(X_val,   y_val,   look_back)
    X_test_seq,  y_test_seq  = crear_sequencies(X_test,  y_test,  look_back)

    n_features = X_train_seq.shape[2]

    # Cerca d’hiperparàmetres amb KerasTuner
    tuner_dir     = "hyperparam_tuning"
    tuner_project = file_name.replace(".csv", "")

    tuner = kt.RandomSearch(
        lambda hp: build_model(hp, look_back, n_features),
        objective='val_loss',
        max_trials=20,
        executions_per_trial=1,
        directory=tuner_dir,
        project_name=tuner_project
    )

    tuner.search_space_summary()

    early_stop_tuner = EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True
    )

    tuner.search(
        X_train_seq, y_train_seq,
        epochs=50,
        batch_size=32,
        validation_data=(X_val_seq, y_val_seq),
        callbacks=[early_stop_tuner],
        verbose=1
    )

    tuner.results_summary()

    # Obtenir el millor model i hiperparàmetres
    best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
    best_hp_values = best_hp.values

    best_model = tuner.get_best_models(num_models=1)[0]

    # Fine‐tuning del millor model
    fine_tune_stop = EarlyStopping(
        monitor='val_loss',
        patience=8,
        restore_best_weights=True
    )

    best_model.fit(
        X_train_seq, y_train_seq,
        epochs=100,
        batch_size=32,
        validation_data=(X_val_seq, y_val_seq),
        callbacks=[fine_tune_stop],
        verbose=1
    )

    # Avaluació sobre validation i test i càlcul de mètriques
    # Validation
    y_val_pred_scaled = best_model.predict(X_val_seq)
    y_val_pred        = scaler_y.inverse_transform(y_val_pred_scaled).ravel()
    y_val_true        = scaler_y.inverse_transform(y_val_seq.reshape(-1,1)).ravel()

    rmse_val = np.sqrt(mean_squared_error(y_val_true, y_val_pred))
    mae_val = mean_absolute_error(y_val_true, y_val_pred)
    r2_val  = r2_score(y_val_true, y_val_pred)

    # Test
    y_test_pred_scaled = best_model.predict(X_test_seq)
    y_test_pred        = scaler_y.inverse_transform(y_test_pred_scaled).ravel()
    y_test_true        = scaler_y.inverse_transform(y_test_seq.reshape(-1,1)).ravel()

    rmse_test = np.sqrt(mean_squared_error(y_test_true, y_test_pred))
    mae_test = mean_absolute_error(y_test_true, y_test_pred)
    r2_test  = r2_score(y_test_true, y_test_pred)

    print(f"\nVALIDATION → {file_name} RMSE: {rmse_val:.4f}, MAE: {mae_val:.4f}, R²: {r2_val:.4f}")
    print(f"TEST       → {file_name} RMSE: {rmse_test:.4f}, MAE: {mae_test:.4f}, R²: {r2_test:.4f}")

    # Creació carpeta de resultats per a aquest dataset
    dataset_name = file_name.replace(".csv", "")
    results_base = "resultats_GRU_Attention"
    model_folder = os.path.join(results_base, dataset_name.lower())
    os.makedirs(model_folder, exist_ok=True)
    print(f"\nCarpeta de resultats creada a: {model_folder}")

    # Guardar els millors hiperparàmetres en JSON 
    hp_json_path = os.path.join(model_folder, "best_hyperparameters.json")
    with open(hp_json_path, "w") as f:
        json.dump(best_hp_values, f, indent=2)
    print(f"  ✓ Hiperparàmetres guardats a: {hp_json_path}")

    # Guardar només els pesos finals en HDF5 (.h5) 
    weights_path = os.path.join(model_folder, "best_model_weights.weights.h5")
    best_model.save_weights(weights_path)
    print(f"  ✓ Pesos guardats a: {weights_path}")

    # Guardar les mètriques finals (validation + test) en un CSV
    metrics = pd.DataFrame({
        "Dataset":       [dataset_name],
        "Val_MAE":       [mae_val],
        "Val_RMSE":      [rmse_val],
        "Val_R2":        [r2_val],
        "Test_MAE":      [mae_test],
        "Test_RMSE":     [rmse_test],
        "Test_R2":       [r2_test]
    })
    metrics_csv = os.path.join(model_folder, "metrics_summary.csv")
    metrics.to_csv(metrics_csv, index=False)
    print(f"  ✓ Mètriques guardades a: {metrics_csv}")

    # Esborrar la carpeta de KerasTuner (tots els trials)
    tuner_path = os.path.join(tuner_dir, tuner_project)
    if os.path.isdir(tuner_path):
        shutil.rmtree(tuner_path)
        print(f"  ✗ Carpeta de tuning esborrada: {tuner_path}")

print("\n\n===== Procés complet finalitzat per a tots els datasets =====")


Trial 20 Complete [00h 00m 13s]
val_loss: 0.06181293725967407

Best val_loss So Far: 0.018125729635357857
Total elapsed time: 00h 46m 47s
Results summary
Results in hyperparam_tuning\S&P500_Stock_Price_output
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 02 summary
Hyperparameters:
conv_filters: 32
kernel_size: 3
gru_units: 32
dropout_rate: 0.1
l2_reg: 0.001
dense_units: 64
learning_rate: 0.007487658008462713
Score: 0.018125729635357857

Trial 15 summary
Hyperparameters:
conv_filters: 16
kernel_size: 3
gru_units: 32
dropout_rate: 0.2
l2_reg: 0.001
dense_units: 64
learning_rate: 0.007783541188537225
Score: 0.023036755621433258

Trial 07 summary
Hyperparameters:
conv_filters: 16
kernel_size: 3
gru_units: 128
dropout_rate: 0.1
l2_reg: 0.0001
dense_units: 16
learning_rate: 0.0069405535544335905
Score: 0.02695753052830696

Trial 13 summary
Hyperparameters:
conv_filters: 32
kernel_size: 5
gru_units: 64
dropout_rate: 0.30000000000000004
l2_reg: 0.001
dense_units: 6

  saveable.load_own_variables(weights_store.get(inner_path))


[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 25ms/step - loss: 0.0689 - mae: 0.1880 - val_loss: 0.0774 - val_mae: 0.2102
Epoch 2/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0292 - mae: 0.1147 - val_loss: 0.0240 - val_mae: 0.1168
Epoch 3/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0325 - mae: 0.1264 - val_loss: 0.0593 - val_mae: 0.1792
Epoch 4/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0307 - mae: 0.1167 - val_loss: 0.0368 - val_mae: 0.1364
Epoch 5/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0286 - mae: 0.1150 - val_loss: 0.0792 - val_mae: 0.2109
Epoch 6/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0328 - mae: 0.1175 - val_loss: 0.0523 - val_mae: 0.1663
Epoch 7/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0352 - 