## **This notebook aims to find the best hyperparameters for AAPL, 10y**

In [1]:
import pandas as pd
import numpy as np
import os
import time
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Use GPU 0 in first notebook

In [3]:
project_dir = "/home/jupyter-tfg2425paula/prediction_project_v3"
os.chdir(project_dir)

clean_data_dir = os.path.join(project_dir, "00_data/clean")
horizontal_data_dir = os.path.join(project_dir, "00_data/horizontal_structure")
results_dir = os.path.join(project_dir, "02_results")
plots_dir = os.path.join(project_dir, "03_plots")
pca_data_dir = os.path.join(project_dir, "00_data/pca")

### **GRU Model**

In [4]:
class GRU3DClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout):
        super(GRU3DClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):

        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :]) 
        # return self.sigmoid(out)
        return out

### **LSTM Model**

In [5]:
class StockPriceLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1, dropout=0.0):
        super(StockPriceLSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, 
                            batch_first=True, dropout=dropout)
    
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        batch_size = x.size(0)  # Get the batch size dynamically

        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(x.device)  # (num_layers, batch_size, hidden_dim)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(x.device)  # (num_layers, batch_size, hidden_dim)
        
        out, _ = self.lstm(x, (h0, c0))

        out = self.fc(out[:, -1, :]) 
        # out = self.sigmoid(out)
        return out
    

### **Choose data types**
Okay, we know what suits better AAPL 10y data

Parameter tuning

In [8]:
thresholds = [0.3, 0.35, 0.4, 0.45, 0.5]
thresholds = [0.5]
learning_rates = [0.005, 0.008, 0.009, 0.01]
learning_rates = [0.01]
num_epochs_list = [100, 200]
num_epochs_list = [100]
batch_sizes = [16, 32]
batch_sizes = [16]
prediction_thresholds = [0.35, 0.4, 0.45, 0.5]
prediction_thresholds = [0.5]

#### **Model and Hyperparameters**

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

hidden_size = 64  
output_size = 2  
num_layers = 2
dropout = 0.2

criterion = nn.CrossEntropyLoss()

In [10]:
model_types = ["lstm", "gru"]   

#### **Last data modifications**

In [6]:
def reshape_remove_characters(df):

    X = np.array([np.stack(row) for row in df.drop(columns=['Target']).values])
    y = df['Target'].values

    smote = SMOTE(random_state=42)
    n_samples, timesteps, n_features = X.shape
    X_flat = X.reshape((n_samples, timesteps * n_features))
    X_flat = np.where(X_flat == 'ç', 0, X_flat)

    X_resampled = X_flat.reshape((-1, timesteps, n_features))
    
    return X_resampled, y

In [7]:
def combine_stocks_pkl_df(processing, security_type, period, window_size, project_dir, stocks):

    X_list = []
    y_list = []

    for stock in stocks:
        initial_data_dir = os.path.join(project_dir, f"00_data/{processing}") 

        # 1) Load original data (info only)
        filename = f"{security_type}/{stock}/{period}_data.csv"
        original_input_filepath = os.path.join(initial_data_dir, filename)
        original_data = pd.read_csv(original_input_filepath)

        print(f"----- Appending stock: {stock}, period: {period}, data_type: {security_type} -----")

        # 2) Load the preprocessed data
        pkl_filename = f"{processing}/{security_type}/{stock}/{period}_{window_size}_data.pkl"
        input_filepath = os.path.join(horizontal_data_dir, pkl_filename)
        input_df = pd.read_pickle(input_filepath)

        # 3) Reshape
        X_resampled, y_resampled = reshape_remove_characters(input_df)

        # Store in lists
        X_list.append(X_resampled)
        y_list.append(y_resampled)

    # Concatenate all stocks along the sample dimension
    X_combined = np.concatenate(X_list, axis=0)  # Shape: (total_samples, num_features, window_size)
    y_combined = np.concatenate(y_list, axis=0)  # Shape: (total_samples,)

    # Print shapes to verify
    print("Final X shape:", X_combined.shape)
    print("Final y shape:", y_combined.shape)
    
    return X_combined, y_combined

### **Evaluation function**

In [8]:
def evaluate_rolling_unchanged_model_threshold(
    model, 
    X, 
    y, 
    criterion, 
    optimizer, 
    device, 
    train_size, 
    batch_size, 
    num_epochs, 
    lower_threshold
):
    """
    Evaluate a PyTorch model using a rolling prediction approach for time series,
    training the model only once on the initial training set. For each time step
    after train_size, the model makes a prediction without further parameter updates.
    Only predicts +1 or -1 if the probability of class 1 is above/below given thresholds;
    otherwise, predicts 0. Accuracy is computed only on nonzero predictions.

    Args:
        model:          PyTorch model to evaluate.
        X:              Feature data (numpy array).
        y:              Target data (numpy array).
        criterion:      Loss function (e.g., CrossEntropyLoss).
        optimizer:      Optimizer (e.g., Adam).
        device:         Device for computation (CPU or GPU).
        train_size:     Initial size of the training data (int or float).
                        If < 1, treated as fraction of total length.
        batch_size:     Batch size for training.
        num_epochs:     Number of epochs for initial training only.
        lower_threshold: Probability threshold below which model predicts -1.
        upper_threshold: Probability threshold above which model predicts +1.

    Returns:
        dict: Dictionary with the following keys:
            - "rolling_predictions": All predictions (-1, 0, +1) across the test period.
            - "rolling_targets": Corresponding true targets in [-1, +1].
            - "filtered_predictions": Nonzero predictions only.
            - "filtered_targets": Targets corresponding to nonzero predictions.
            - "accuracy_nonzero": Accuracy computed only on nonzero predictions.
    """

    # Convert X, y to tensors
    X = torch.tensor(X, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.long)

    # Determine initial training set size
    if train_size < 1.0:
        lower_bound = int(train_size * len(X))
    else:
        lower_bound = train_size

    # -------------------------
    # 1) SINGLE TRAINING PHASE
    # -------------------------
    model.to(device)
    model.train()
    
    X_train = X[:lower_bound].to(device)
    y_train = y[:lower_bound].to(device)

    train_dataset = TensorDataset(X_train, y_train)
    trainloader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=False,         # Keep False if order matters; True for better generalization
        # num_workers=4,         # Adjust based on your CPU cores
        # pin_memory=True,       # Speeds up transfer if using GPUs
        drop_last=False        # Ensure the last batch is included
    )

    epoch_train_losses = []
    for epoch in range(num_epochs):
        # torch.cuda.empty_cache()
        epoch_loss = 0.0
        for X_batch, y_batch in trainloader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)

            optimizer.zero_grad()
            pred_y = model(X_batch)   # [batch_size, num_classes]
            loss = criterion(pred_y, y_batch)
            loss.backward()

            # Gradient clipping (optional)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            epoch_loss += loss.item()
               
        if (epoch + 1) % 5 == 0 or epoch == num_epochs - 1:
            print(f"[Train] Epoch {epoch+1}/{num_epochs}, Loss={epoch_loss/len(trainloader):.4f}")

        epoch_train_losses.append(epoch_loss/len(trainloader))
        
    loss_decrease_percentage = ((epoch_train_losses[-1] - epoch_train_losses[0]) / epoch_train_losses[0]) * 100
    # ---------------------------------
    # 2) ROLLING PREDICTIONS, NO UPDATE
    # ---------------------------------
    model.eval()

    rolling_predictions = []
    rolling_targets     = []

    for i in range(lower_bound, len(X)):
        # Single-step "test" sample
        X_test = X[i:i+1].to(device)  # shape: (1, num_features)
        y_test = y[i:i+1].to(device)  # shape: (1, )

        with torch.no_grad():
            # Forward pass
            pred_y = model(X_test)  # [1, num_classes]
            probabilities = torch.softmax(pred_y, dim=1).cpu().numpy()  # shape: (1, 2)
            prob_class_1  = probabilities[:, 1]  # shape: (1,)

            # Threshold-based logic
            # Initialize all predictions to 0
            pred_classes = np.zeros_like(prob_class_1)
            # Predict -1 if prob < lower_threshold
            pred_classes[prob_class_1 < lower_threshold] = -1
            # Predict +1 if prob > upper_threshold
            pred_classes[prob_class_1 > 1-lower_threshold] = 1

        rolling_predictions.append(pred_classes[0])  # scalar
        rolling_targets.append(y_test.item())

    rolling_predictions = np.array(rolling_predictions)
    rolling_targets = np.array(rolling_targets).astype(int)

    # Convert any 0-labeled targets to -1 if your original data is in [-1, +1]
    # (Sometimes y might be {0,1} or {-1, +1}; adapt as needed.)
    rolling_targets[rolling_targets == 0] = -1

    # Filter out zero predictions
    nonzero_mask = rolling_predictions != 0
    filtered_preds = rolling_predictions[nonzero_mask]
    filtered_targets = rolling_targets[nonzero_mask]

    if len(filtered_preds) == 0:
        accuracy_nonzero = None
        print("No nonzero predictions, cannot compute thresholded accuracy.")
    else:
        accuracy_nonzero = accuracy_score(filtered_targets, filtered_preds)
        print(f"Accuracy on Nonzero Predictions: {accuracy_nonzero:.4f}")

    return {
        "rolling_predictions": rolling_predictions,
        "rolling_targets": rolling_targets,
        "filtered_predictions": filtered_preds,
        "filtered_targets": filtered_targets,
        "accuracy_nonzero": accuracy_nonzero,
        "loss_decrease_percentage": loss_decrease_percentage,
        "final_train_loss": epoch_train_losses[-1] 
    }

### **4th Type of comparison:**

Hyperparameters finetuning, for AAPL 10y

In [9]:
processing = "clean"
stocks = ['AAPL', 'MSFT', 'AMZN', 'NVDA', 'SPX']
security_type = "technical"
period = "10y"

possible_train_size = 95
batch_size = 32
num_epochs = 100
window_size = 100

# 3) Reshape
X_combined, y_combined = combine_stocks_pkl_df(processing, security_type, period, window_size, project_dir, stocks)

input_size = X_combined.shape[2]
train_size = int(X_combined.shape[0] * possible_train_size / 100)
test_size = X_combined.shape[0] - train_size

----- Appending stock: AAPL, period: 10y, data_type: technical -----
----- Appending stock: MSFT, period: 10y, data_type: technical -----
----- Appending stock: AMZN, period: 10y, data_type: technical -----
----- Appending stock: NVDA, period: 10y, data_type: technical -----
----- Appending stock: SPX, period: 10y, data_type: technical -----
Final X shape: (12090, 92, 100)
Final y shape: (12090,)


### **Iterate over hyperparameters using OPTUNA**

In [12]:
def evaluate_rolling_unchanged_model_threshold(
    model, 
    X, 
    y, 
    criterion, 
    optimizer, 
    device, 
    train_size, 
    batch_size, 
    num_epochs, 
    lower_threshold
):
    """
    Evaluate a PyTorch model using a rolling prediction approach for time series,
    training the model only once on the initial training set. For each time step
    after train_size, the model makes a prediction without further parameter updates.
    Only predicts +1 or -1 if the probability of class 1 is above/below given thresholds;
    otherwise, predicts 0. Accuracy is computed only on nonzero predictions.

    Args:
        model:          PyTorch model to evaluate.
        X:              Feature data (numpy array).
        y:              Target data (numpy array).
        criterion:      Loss function (e.g., CrossEntropyLoss).
        optimizer:      Optimizer (e.g., Adam).
        device:         Device for computation (CPU or GPU).
        train_size:     Initial size of the training data (int or float).
                        If < 1, treated as fraction of total length.
        batch_size:     Batch size for training.
        num_epochs:     Number of epochs for initial training only.
        lower_threshold: Probability threshold below which model predicts -1.
        upper_threshold: Probability threshold above which model predicts +1.

    Returns:
        dict: Dictionary with the following keys:
            - "rolling_predictions": All predictions (-1, 0, +1) across the test period.
            - "rolling_targets": Corresponding true targets in [-1, +1].
            - "filtered_predictions": Nonzero predictions only.
            - "filtered_targets": Targets corresponding to nonzero predictions.
            - "accuracy_nonzero": Accuracy computed only on nonzero predictions.
    """

    # Convert X, y to tensors
    X = torch.tensor(X, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.long)

    # Determine initial training set size
    if train_size < 1.0:
        lower_bound = int(train_size * len(X))
    else:
        lower_bound = train_size

    # -------------------------
    # 1) SINGLE TRAINING PHASE
    # -------------------------
    model.to(device)
    model.train()
    
    X_train = X[:lower_bound].to(device)
    y_train = y[:lower_bound].to(device)

    train_dataset = TensorDataset(X_train, y_train)
    trainloader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=False,         # Keep False if order matters; True for better generalization
        # num_workers=4,         # Adjust based on your CPU cores
        # pin_memory=True,       # Speeds up transfer if using GPUs
        drop_last=False        # Ensure the last batch is included
    )

    epoch_train_losses = []
    for epoch in range(num_epochs):
        # torch.cuda.empty_cache()
        epoch_loss = 0.0
        for X_batch, y_batch in trainloader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)

            optimizer.zero_grad()
            pred_y = model(X_batch)   # [batch_size, num_classes]
            loss = criterion(pred_y, y_batch)
            loss.backward()

            # Gradient clipping (optional)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            epoch_loss += loss.item()
               
        if (epoch + 1) % 5 == 0 or epoch == num_epochs - 1:
            print(f"[Train] Epoch {epoch+1}/{num_epochs}, Loss={epoch_loss/len(trainloader):.4f}")

        epoch_train_losses.append(epoch_loss/len(trainloader))
        
    loss_decrease_percentage = ((epoch_train_losses[-1] - epoch_train_losses[0]) / epoch_train_losses[0]) * 100
    # ---------------------------------
    # 2) ROLLING PREDICTIONS, NO UPDATE
    # ---------------------------------
    model.eval()

    rolling_predictions = []
    rolling_targets     = []

    for i in range(lower_bound, len(X)):
        # Single-step "test" sample
        X_test = X[i:i+1].to(device)  # shape: (1, num_features)
        y_test = y[i:i+1].to(device)  # shape: (1, )

        with torch.no_grad():
            # Forward pass
            pred_y = model(X_test)  # [1, num_classes]
            probabilities = torch.softmax(pred_y, dim=1).cpu().numpy()  # shape: (1, 2)
            prob_class_1  = probabilities[:, 1]  # shape: (1,)

            # Threshold-based logic
            # Initialize all predictions to 0
            pred_classes = np.zeros_like(prob_class_1)
            # Predict -1 if prob < lower_threshold
            pred_classes[prob_class_1 < lower_threshold] = -1
            # Predict +1 if prob > upper_threshold
            pred_classes[prob_class_1 > 1-lower_threshold] = 1

        rolling_predictions.append(pred_classes[0])  # scalar
        rolling_targets.append(y_test.item())

    rolling_predictions = np.array(rolling_predictions)
    rolling_targets = np.array(rolling_targets).astype(int)

    # Convert any 0-labeled targets to -1 if your original data is in [-1, +1]
    # (Sometimes y might be {0,1} or {-1, +1}; adapt as needed.)
    rolling_targets[rolling_targets == 0] = -1

    # Filter out zero predictions
    nonzero_mask = rolling_predictions != 0
    filtered_preds = rolling_predictions[nonzero_mask]
    filtered_targets = rolling_targets[nonzero_mask]

    if len(filtered_preds) == 0:
        accuracy_nonzero = None
        print("No nonzero predictions, cannot compute thresholded accuracy.")
    else:
        accuracy_nonzero = accuracy_score(filtered_targets, filtered_preds)
        print(f"Accuracy on Nonzero Predictions: {accuracy_nonzero:.4f}")

    return {
        "rolling_predictions": rolling_predictions,
        "rolling_targets": rolling_targets,
        "filtered_predictions": filtered_preds,
        "filtered_targets": filtered_targets,
        "accuracy_nonzero": accuracy_nonzero,
        "loss_decrease_percentage": loss_decrease_percentage
    }

In [13]:
import optuna
from optuna.trial import Trial

### **Define Optuna objective function**

In [None]:
import optuna
import torch
from torch import nn, optim

def objective(trial, X, y, device="cpu"):
    """
    Returns accuracy (to be maximized), so we will call:
        study = optuna.create_study(direction="maximize")
    """
    # -----------------------------------------------------------
    # 1) Choose Model Type
    #    Make sure we match the capitalization "LSTM" vs. "GRU."
    # -----------------------------------------------------------
    model_type = trial.suggest_categorical("model_type", ["lstm", "gru"])
    
    # -----------------------------------------------------------
    # 2) Sample Hyperparameters around known best values
    # -----------------------------------------------------------
    # Best found was ~0.01053, so we widen around that a bit:
    learning_rate = trial.suggest_float("learning_rate", 
                                        0.009, 0.011, log=True)
    
    # Best was 64; we keep 16,32,64 in the search:
    batch_size    = trial.suggest_categorical("batch_size", [64])
    
    # Best was 80 epochs; we allow from 20 to 100:
    num_epochs    = trial.suggest_int("num_epochs", 80, 100, step=10)

    # You can also tune hidden_size, num_layers, dropout, etc.
    hidden_size   = 64
    output_size   = 2
    
    # Some placeholders for whatever your model classes need:
    input_size    = X.shape[2]  # e.g. the "features" dimension
    num_layers    = 1
    dropout       = 0.0

    # -----------------------------------------------------------
    # 3) Build Model
    # -----------------------------------------------------------
    if model_type == "lstm":
        model = StockPriceLSTM(input_size, hidden_size, output_size)
    else:
        model = GRU3DClassifier(input_size, hidden_size, output_size,
                                num_layers, dropout)

    model = model.to(device)
    
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    # -----------------------------------------------------------
    # 4) Train / Evaluate
    # -----------------------------------------------------------
    # Example of how you might slice training set:
    train_size_percent = possible_train_size / 100
    if isinstance(train_size_percent, float):
        actual_train_size = int(train_size_percent * X.shape[0])
    else:
        actual_train_size = train_size_percent

    result = evaluate_rolling_unchanged_model_threshold(
        model=model,
        X=X,
        y=y,
        criterion=criterion,
        optimizer=optimizer,
        device=device,
        train_size=actual_train_size,
        batch_size=batch_size,
        num_epochs=num_epochs,
        lower_threshold=0.5
    )

    # Extract the accuracy on nonzero predictions
    accuracy = result["accuracy_nonzero"]
    
    # If model never predicts nonzero => heavy penalty
    if accuracy is None:
        return 0.0  # or return float("-inf")

    # Now we just return accuracy so that Optuna will maximize it
    return accuracy

# Then create your study as follows:
study = optuna.create_study(direction="maximize")
study.optimize(lambda trial: objective(trial, X_combined, y_combined), n_trials=50)

[I 2025-02-14 12:19:51,897] A new study created in memory with name: no-name-269751d8-55ce-4496-b3b3-7b3ed6a42498


[Train] Epoch 5/100, Loss=0.6168
[Train] Epoch 10/100, Loss=0.5311
[Train] Epoch 15/100, Loss=0.5539
[Train] Epoch 20/100, Loss=0.6062
[Train] Epoch 25/100, Loss=0.6015
[Train] Epoch 30/100, Loss=0.6092
[Train] Epoch 35/100, Loss=0.6482
[Train] Epoch 40/100, Loss=0.6644
[Train] Epoch 45/100, Loss=0.6957
[Train] Epoch 50/100, Loss=0.6964
[Train] Epoch 55/100, Loss=0.6907
[Train] Epoch 60/100, Loss=0.7111
[Train] Epoch 65/100, Loss=0.7286
[Train] Epoch 70/100, Loss=0.7361
[Train] Epoch 75/100, Loss=0.7219
[Train] Epoch 80/100, Loss=0.7242
[Train] Epoch 85/100, Loss=0.7320
[Train] Epoch 90/100, Loss=0.7208
[Train] Epoch 95/100, Loss=0.7354
[Train] Epoch 100/100, Loss=0.7385


[I 2025-02-14 12:32:15,952] Trial 0 finished with value: 0.5256198347107438 and parameters: {'model_type': 'gru', 'learning_rate': 0.010246491531485753, 'batch_size': 64, 'num_epochs': 100}. Best is trial 0 with value: 0.5256198347107438.


Accuracy on Nonzero Predictions: 0.5256
[Train] Epoch 5/80, Loss=0.3109
[Train] Epoch 10/80, Loss=0.1016
[Train] Epoch 15/80, Loss=0.0553
[Train] Epoch 20/80, Loss=0.0752
[Train] Epoch 25/80, Loss=0.0181
[Train] Epoch 30/80, Loss=0.0675
[Train] Epoch 35/80, Loss=0.0231
[Train] Epoch 40/80, Loss=0.0012
[Train] Epoch 45/80, Loss=0.0005
[Train] Epoch 50/80, Loss=0.0003
[Train] Epoch 55/80, Loss=0.0002
[Train] Epoch 60/80, Loss=0.0001
[Train] Epoch 65/80, Loss=0.0001
[Train] Epoch 70/80, Loss=0.0000
[Train] Epoch 75/80, Loss=0.0000
[Train] Epoch 80/80, Loss=0.0000


[I 2025-02-14 12:44:12,253] Trial 1 finished with value: 0.6479338842975206 and parameters: {'model_type': 'lstm', 'learning_rate': 0.010781551739547245, 'batch_size': 64, 'num_epochs': 80}. Best is trial 1 with value: 0.6479338842975206.


Accuracy on Nonzero Predictions: 0.6479
[Train] Epoch 5/90, Loss=0.2887
[Train] Epoch 10/90, Loss=0.1039
[Train] Epoch 15/90, Loss=0.0246
[Train] Epoch 20/90, Loss=0.0426
[Train] Epoch 25/90, Loss=0.0013
[Train] Epoch 30/90, Loss=0.0006
[Train] Epoch 35/90, Loss=0.0003
[Train] Epoch 40/90, Loss=0.0002
[Train] Epoch 45/90, Loss=0.0001
[Train] Epoch 50/90, Loss=0.0001
[Train] Epoch 55/90, Loss=0.0000
[Train] Epoch 60/90, Loss=0.0000
[Train] Epoch 65/90, Loss=0.0000
[Train] Epoch 70/90, Loss=0.0000
[Train] Epoch 75/90, Loss=0.0000


In [37]:
print("Best (1 - accuracy):", study.best_value)
print("Best hyperparameters:", study.best_params)

Best (1 - accuracy): 0.6106870229007634
Best hyperparameters: {'model_type': 'gru', 'learning_rate': 0.009883203468677031, 'batch_size': 32, 'num_epochs': 80}


### **Save Optuna combinations of hyperparameters**

In [39]:
df_results = pd.DataFrame([
    {
        "trial": trial.number,
        "model_type": trial.params["model_type"],
        "learning_rate": trial.params["learning_rate"],
        "batch_size": trial.params["batch_size"],
        "num_epochs": trial.params["num_epochs"],
        "accuracy": trial.value
    }
    for trial in study.trials
])

# -----------------------------------------------------------
# Display DataFrame
# -----------------------------------------------------------

# -----------------------------------------------------------
# (Optional) Save results to a CSV file
# -----------------------------------------------------------
df_results.to_csv(os.path.join(results_dir, "optuna_hyperparameter_results.csv"), index=False)

print("Results saved to optuna_hyperparameter_results.csv")

Results saved to optuna_hyperparameter_results.csv


### **Choose Optuna hyperparameter** 

In [43]:
batch_size= 32
num_epochs= 80
gru_model = GRU3DClassifier(input_size, hidden_size, output_size, num_layers, dropout)
optimizer = optim.Adam(gru_model.parameters(), lr=0.009883203468677031)
train_size_percent = possible_train_size/100
if isinstance(train_size_percent, float):
    actual_train_size = int(train_size_percent * X_resampled.shape[0])
else:
    actual_train_size = train_size_percent
    
result = evaluate_rolling_unchanged_model_threshold(
    model=gru_model,
    X=X_resampled,
    y=y_resampled,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    train_size=actual_train_size,
    batch_size=batch_size,
    num_epochs=num_epochs,
    lower_threshold=0.5,   # as requested
)

[Train] Epoch 5/80, Loss=0.6940
[Train] Epoch 10/80, Loss=0.6937
[Train] Epoch 15/80, Loss=0.6936
[Train] Epoch 20/80, Loss=0.6933
[Train] Epoch 25/80, Loss=0.6931
[Train] Epoch 30/80, Loss=0.6930
[Train] Epoch 35/80, Loss=0.6921
[Train] Epoch 40/80, Loss=0.6888
[Train] Epoch 45/80, Loss=0.6881
[Train] Epoch 50/80, Loss=0.6874
[Train] Epoch 55/80, Loss=0.6859
[Train] Epoch 60/80, Loss=0.6853
[Train] Epoch 65/80, Loss=0.6843
[Train] Epoch 70/80, Loss=0.6811
[Train] Epoch 75/80, Loss=0.6786
[Train] Epoch 80/80, Loss=0.6820
Accuracy on Nonzero Predictions: 0.6336


### **Iterate with Optuna hyperparameter**

In [42]:
results_list = []
stock = "AAPL"
period = "10y"
possible_train_size = 95
batch_size = 32
num_epochs = 80
window_size = 3

prediction_threshold = 0.5

output_folder = os.path.join(results_dir, f"individual_trials") 
os.makedirs(output_folder, exist_ok=True)

processing = "clean"
security_types = ["single_name"]
learning_rate = 0.009883203468677031
model_type = "gru"

results_list = []
for security_type in security_types:
    initial_data_dir = os.path.join(project_dir, f"00_data/{processing}") 
    
    # 1) Load original data (info only)
    filename = f"{security_type}/{stock}/{period}_data.csv"
    original_input_filepath = os.path.join(initial_data_dir, filename)
    original_data = pd.read_csv(original_input_filepath)

    print(f"\n----- LEARNING_RATE: {learning_rate}, SECURITY_TYPE: {security_type}, MODEL_TYPE: {model_type} -----")

    # 2) Load the preprocessed data
    pkl_filename = f"{processing}/{security_type}/{stock}/{period}_{window_size}_data.pkl"
    input_filepath = os.path.join(horizontal_data_dir, pkl_filename)
    input_df = pd.read_pickle(input_filepath)
    
    # 3) Reshape
    X_resampled, y_resampled = reshape_remove_characters(input_df)

    input_size = X_resampled.shape[2]
    train_size = int(X_resampled.shape[0] * possible_train_size / 100)
    test_size = X_resampled.shape[0] - train_size

    for i in range(50):
        print(i)
        
        # for model_type in model_types:
            # 4) Initialize the model
            # if model_type == "gru":
            #     
            # elif model_type == "lstm":
        # model = StockPriceLSTM(input_size, hidden_size, output_size)
        model = GRU3DClassifier(input_size, hidden_size, output_size, num_layers, dropout)
        model = model.to(device)

        # 5) Set up optimizer
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        print(f"Training {stock} | LR: {learning_rate} | Epochs: {num_epochs} "
              f"| Batch: {batch_size} | Security type: {security_type}")

        result = evaluate_rolling_unchanged_model_threshold(
            model=model,
            X=X_resampled,
            y=y_resampled,
            criterion=criterion,
            optimizer=optimizer,
            device=device,
            train_size=train_size,
            batch_size=batch_size,
            num_epochs=num_epochs,
            lower_threshold=0.5
        )

        # 7) Extract results
        rolling_predictions = result["rolling_predictions"]
        rolling_targets = result["rolling_targets"]
        test_accuracy = result["accuracy_nonzero"]
        loss_decrease_percentage = result["loss_decrease_percentage"]
        nonzero_preds = np.count_nonzero(rolling_predictions)
        final_train_loss = result["final_train_loss"]

        # 9) Create a record (dictionary) for this run
        run_record = {
            "STOCK": stock,
            "DATA_TYPE": security_type,
            "MODEL": model_type.upper(),
            "PROCESSING": processing,
            "ACCURACY": test_accuracy,
            "TRAIN_PCT_DECREASE": loss_decrease_percentage,
            "FINAL_TRAIN_LOSS": final_train_loss
        }

        # 10) Append to the results_list
        results_list.append(run_record)


----- LEARNING_RATE: 0.009883203468677031, SECURITY_TYPE: single_name, MODEL_TYPE: gru -----
0
Training AAPL | LR: 0.009883203468677031 | Epochs: 80 | Batch: 32 | Security type: single_name
[Train] Epoch 5/80, Loss=0.6938
[Train] Epoch 10/80, Loss=0.6937
[Train] Epoch 15/80, Loss=0.6932
[Train] Epoch 20/80, Loss=0.6924
[Train] Epoch 25/80, Loss=0.6915
[Train] Epoch 30/80, Loss=0.6909
[Train] Epoch 35/80, Loss=0.6887
[Train] Epoch 40/80, Loss=0.6897
[Train] Epoch 45/80, Loss=0.6882
[Train] Epoch 50/80, Loss=0.6886
[Train] Epoch 55/80, Loss=0.6886
[Train] Epoch 60/80, Loss=0.6849
[Train] Epoch 65/80, Loss=0.6852
[Train] Epoch 70/80, Loss=0.6831
[Train] Epoch 75/80, Loss=0.6831
[Train] Epoch 80/80, Loss=0.6798
Accuracy on Nonzero Predictions: 0.5420


KeyError: 'final_train_loss'

In [16]:
# A modo de ejemplo, generamos datos aleatorios
num_samples = 1000
seq_length = 10
num_features = 5

X = np.random.rand(num_samples, seq_length, num_features).astype(np.float32)
y = np.random.choice([0, 1], size=num_samples).astype(int)  
# O y = np.random.choice([-1, +1], size=num_samples)


In [27]:
# ---------------------------------------------------------
# 3) Definir la función objetivo para Optuna
# ---------------------------------------------------------
def objective(trial):
    """
    Función objetivo donde:
      - Se definen los hiperparámetros que queremos optimizar
      - Se construye y entrena el modelo con esos hiperparámetros
      - Se obtienen las métricas de la función evaluate_rolling_unchanged_model_threshold
    """
    
    # Hiperparámetros a "samplear":
    learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128])

    # Definir el modelo
    input_size = X_resampled.shape[2]
    train_size = int(X_resampled.shape[0] * possible_train_size / 100)
    test_size = X_resampled.shape[0] - train_size
    
    
    model = StockPriceLSTM(input_size, hidden_size, output_size)

    # Definir criterio y optimizador
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Parámetros fijos de tu evaluación
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    train_size = 0.7  # Ejemplo (70% entrenar, 30% test)
    num_epochs = 10
    lower_threshold = 0.5

    # Llamar a tu función de evaluación
    results = evaluate_rolling_unchanged_model_threshold(
        model=model,
        X=X_resampled,
        y=y_resampled,
        criterion=criterion,
        optimizer=optimizer,
        device=device,
        train_size=train_size,
        batch_size=batch_size,
        num_epochs=num_epochs,
        lower_threshold=lower_threshold
        # NOTA: si en tu función original también pasas upper_threshold, agrégalo aquí
    )

    # Queremos optimizar accuracy_nonzero (o el metric que elijas)
    accuracy_nonzero = results["accuracy_nonzero"]

    # A veces puede pasar que no haya predicciones != 0; en tal caso, retornamos 0 para "penalizar"
    if accuracy_nonzero is None:
        return 0.0
    else:
        return accuracy_nonzero


In [28]:
# ---------------------------------------------------------
# 4) Crear y ejecutar el estudio de Optuna
# ---------------------------------------------------------
if __name__ == "__main__":
    study = optuna.create_study(direction="maximize")  # Queremos maximizar la accuracy
    study.optimize(objective, n_trials=20)  # número de iteraciones de búsqueda

    # Ver los mejores resultados
    print("Number of finished trials: ", len(study.trials))
    
    best_trial = study.best_trial
    print("Best trial:")
    print(f"  Value (Accuracy Nonzero): {best_trial.value:.4f}")
    
    print("  Best hyperparameters:")
    for key, value in best_trial.params.items():
        print(f"    {key}: {value}")


[I 2025-02-11 12:21:55,381] A new study created in memory with name: no-name-a89a00e1-c46c-4e97-9dc6-ce3c37404898
  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6924
[Train] Epoch 10/10, Loss=0.6915


[I 2025-02-11 12:21:56,189] Trial 0 finished with value: 0.5044699872286079 and parameters: {'learning_rate': 0.00928854584534339, 'batch_size': 128}. Best is trial 0 with value: 0.5044699872286079.


Accuracy on Nonzero Predictions: 0.5045


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6931
[Train] Epoch 10/10, Loss=0.6921


[I 2025-02-11 12:21:57,353] Trial 1 finished with value: 0.5031928480204342 and parameters: {'learning_rate': 0.009201191445315782, 'batch_size': 64}. Best is trial 0 with value: 0.5044699872286079.


Accuracy on Nonzero Predictions: 0.5032


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6940
[Train] Epoch 10/10, Loss=0.6933


[I 2025-02-11 12:22:00,368] Trial 2 finished with value: 0.4929757343550447 and parameters: {'learning_rate': 0.009246580847534566, 'batch_size': 16}. Best is trial 0 with value: 0.5044699872286079.


Accuracy on Nonzero Predictions: 0.4930


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6941
[Train] Epoch 10/10, Loss=0.6934


[I 2025-02-11 12:22:03,227] Trial 3 finished with value: 0.49808429118773945 and parameters: {'learning_rate': 0.009079412830845061, 'batch_size': 16}. Best is trial 0 with value: 0.5044699872286079.


Accuracy on Nonzero Predictions: 0.4981


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6932
[Train] Epoch 10/10, Loss=0.6930


[I 2025-02-11 12:22:04,928] Trial 4 finished with value: 0.5006385696040868 and parameters: {'learning_rate': 0.009309370037926864, 'batch_size': 32}. Best is trial 0 with value: 0.5044699872286079.


Accuracy on Nonzero Predictions: 0.5006


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6932
[Train] Epoch 10/10, Loss=0.6930


[I 2025-02-11 12:22:06,551] Trial 5 finished with value: 0.4955300127713921 and parameters: {'learning_rate': 0.00960888270777087, 'batch_size': 32}. Best is trial 0 with value: 0.5044699872286079.


Accuracy on Nonzero Predictions: 0.4955


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6932
[Train] Epoch 10/10, Loss=0.6929


[I 2025-02-11 12:22:08,207] Trial 6 finished with value: 0.5019157088122606 and parameters: {'learning_rate': 0.009598229802676621, 'batch_size': 32}. Best is trial 0 with value: 0.5044699872286079.


Accuracy on Nonzero Predictions: 0.5019


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6932
[Train] Epoch 10/10, Loss=0.6929


[I 2025-02-11 12:22:09,870] Trial 7 finished with value: 0.5006385696040868 and parameters: {'learning_rate': 0.009508223317951441, 'batch_size': 32}. Best is trial 0 with value: 0.5044699872286079.


Accuracy on Nonzero Predictions: 0.5006


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6931
[Train] Epoch 10/10, Loss=0.6922


[I 2025-02-11 12:22:10,913] Trial 8 finished with value: 0.4942528735632184 and parameters: {'learning_rate': 0.009385165960245917, 'batch_size': 64}. Best is trial 0 with value: 0.5044699872286079.


Accuracy on Nonzero Predictions: 0.4943


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6941
[Train] Epoch 10/10, Loss=0.6936


[I 2025-02-11 12:22:13,674] Trial 9 finished with value: 0.5031928480204342 and parameters: {'learning_rate': 0.009310136340673276, 'batch_size': 16}. Best is trial 0 with value: 0.5044699872286079.


Accuracy on Nonzero Predictions: 0.5032


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6922
[Train] Epoch 10/10, Loss=0.6912


[I 2025-02-11 12:22:14,483] Trial 10 finished with value: 0.508301404853129 and parameters: {'learning_rate': 0.009898054853584021, 'batch_size': 128}. Best is trial 10 with value: 0.508301404853129.


Accuracy on Nonzero Predictions: 0.5083


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6922
[Train] Epoch 10/10, Loss=0.6912


[I 2025-02-11 12:22:15,270] Trial 11 finished with value: 0.5006385696040868 and parameters: {'learning_rate': 0.009981379110880428, 'batch_size': 128}. Best is trial 10 with value: 0.508301404853129.


Accuracy on Nonzero Predictions: 0.5006


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6922
[Train] Epoch 10/10, Loss=0.6911


[I 2025-02-11 12:22:16,062] Trial 12 finished with value: 0.49808429118773945 and parameters: {'learning_rate': 0.009991564957482792, 'batch_size': 128}. Best is trial 10 with value: 0.508301404853129.


Accuracy on Nonzero Predictions: 0.4981


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6924
[Train] Epoch 10/10, Loss=0.6915


[I 2025-02-11 12:22:16,856] Trial 13 finished with value: 0.508301404853129 and parameters: {'learning_rate': 0.00976753773305084, 'batch_size': 128}. Best is trial 10 with value: 0.508301404853129.


Accuracy on Nonzero Predictions: 0.5083


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6923
[Train] Epoch 10/10, Loss=0.6915


[I 2025-02-11 12:22:17,663] Trial 14 finished with value: 0.5044699872286079 and parameters: {'learning_rate': 0.00980970610962458, 'batch_size': 128}. Best is trial 10 with value: 0.508301404853129.


Accuracy on Nonzero Predictions: 0.5045


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6924
[Train] Epoch 10/10, Loss=0.6912


[I 2025-02-11 12:22:18,473] Trial 15 finished with value: 0.5044699872286079 and parameters: {'learning_rate': 0.009788779602694095, 'batch_size': 128}. Best is trial 10 with value: 0.508301404853129.


Accuracy on Nonzero Predictions: 0.5045


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6925
[Train] Epoch 10/10, Loss=0.6915


[I 2025-02-11 12:22:19,288] Trial 16 finished with value: 0.5006385696040868 and parameters: {'learning_rate': 0.009784285726829593, 'batch_size': 128}. Best is trial 10 with value: 0.508301404853129.


Accuracy on Nonzero Predictions: 0.5006


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6923
[Train] Epoch 10/10, Loss=0.6911


[I 2025-02-11 12:22:20,104] Trial 17 finished with value: 0.5095785440613027 and parameters: {'learning_rate': 0.009848611354388012, 'batch_size': 128}. Best is trial 17 with value: 0.5095785440613027.


Accuracy on Nonzero Predictions: 0.5096


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6924
[Train] Epoch 10/10, Loss=0.6916


[I 2025-02-11 12:22:20,913] Trial 18 finished with value: 0.5031928480204342 and parameters: {'learning_rate': 0.009911915712169237, 'batch_size': 128}. Best is trial 17 with value: 0.5095785440613027.


Accuracy on Nonzero Predictions: 0.5032


  learning_rate = trial.suggest_loguniform('learning_rate',0.009, 0.01)


[Train] Epoch 5/10, Loss=0.6930
[Train] Epoch 10/10, Loss=0.6920


[I 2025-02-11 12:22:22,014] Trial 19 finished with value: 0.4929757343550447 and parameters: {'learning_rate': 0.0096574347457179, 'batch_size': 64}. Best is trial 17 with value: 0.5095785440613027.


Accuracy on Nonzero Predictions: 0.4930
Number of finished trials:  20
Best trial:
  Value (Accuracy Nonzero): 0.5096
  Best hyperparameters:
    learning_rate: 0.009848611354388012
    batch_size: 128
