## **This notebook aims to compare some models and store them in a reasonable fashion**

In [1]:
import pandas as pd
import numpy as np
import os
import time
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Use GPU 0 in first notebook

In [3]:
project_dir = "/home/jupyter-tfg2425paula/prediction_project_v3"
os.chdir(project_dir)

clean_data_dir = os.path.join(project_dir, "00_data/clean")
horizontal_data_dir = os.path.join(project_dir, "00_data/horizontal_structure")
results_dir = os.path.join(project_dir, "02_results")
plots_dir = os.path.join(project_dir, "03_plots")
pca_data_dir = os.path.join(project_dir, "00_data/pca")

### **GRU Model**

In [4]:
class GRU3DClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout):
        super(GRU3DClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):

        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :]) 
        # return self.sigmoid(out)
        return out

### **LSTM Model**

In [5]:
class StockPriceLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1, dropout=0.0):
        super(StockPriceLSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, 
                            batch_first=True, dropout=dropout)
    
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        batch_size = x.size(0)  # Get the batch size dynamically

        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(x.device)  # (num_layers, batch_size, hidden_dim)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(x.device)  # (num_layers, batch_size, hidden_dim)
        
        out, _ = self.lstm(x, (h0, c0))

        out = self.fc(out[:, -1, :]) 
        # out = self.sigmoid(out)
        return out
    

### **Set folders**

Processing

In [6]:
processing_types = ["clean", "pca"]
processing_types= ["clean"]

Folders

In [7]:
# stocks = ['AAPL', 'MSFT', 'AMZN', 'NVDA', 'SPX']
stocks = ['AAPL']
# types_securities = ["single_name", "options", "technical"]
types_securities = ["options"]

Different files

In [8]:
# years = ["15y", "10y", "5y", "2y"]
years = ["10y"]
# window_sizes = [5, 10, 50, 100]
window_sizes = [5]
# train_sizes = [80, 90, 95]
train_sizes = [95]

Same file

In [9]:
thresholds = [0.3, 0.35, 0.4, 0.45, 0.5]
thresholds = [0.5]
learning_rates = [0.005, 0.008, 0.009, 0.01]
learning_rates = [0.01]
num_epochs_list = [100, 200]
num_epochs_list = [100]
batch_sizes = [16, 32]
batch_sizes = [16]
prediction_thresholds = [0.35, 0.4, 0.45, 0.5]
prediction_thresholds = [0.5]

#### **Model and Hyperparameters**

In [10]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

hidden_size = 64  
output_size = 2  
num_layers = 2
dropout = 0.2

criterion = nn.CrossEntropyLoss()

In [11]:
model_types = ["lstm", "gru"]   

#### **Last data modifications**

In [12]:
def reshape_remove_characters(df):

    X = np.array([np.stack(row) for row in df.drop(columns=['Target']).values])
    y = df['Target'].values

    smote = SMOTE(random_state=42)
    n_samples, timesteps, n_features = X.shape
    X_flat = X.reshape((n_samples, timesteps * n_features))
    X_flat = np.where(X_flat == 'ç', 0, X_flat)

    X_resampled = X_flat.reshape((-1, timesteps, n_features))
    
    return X_resampled, y

### **Evaluation function**

In [13]:
def evaluate_rolling_unchanged_model_threshold(
    model, 
    X, 
    y, 
    criterion, 
    optimizer, 
    device, 
    train_size, 
    batch_size, 
    num_epochs, 
    lower_threshold
):
    """
    Evaluate a PyTorch model using a rolling prediction approach for time series,
    training the model only once on the initial training set. For each time step
    after train_size, the model makes a prediction without further parameter updates.
    Only predicts +1 or -1 if the probability of class 1 is above/below given thresholds;
    otherwise, predicts 0. Accuracy is computed only on nonzero predictions.

    Args:
        model:          PyTorch model to evaluate.
        X:              Feature data (numpy array).
        y:              Target data (numpy array).
        criterion:      Loss function (e.g., CrossEntropyLoss).
        optimizer:      Optimizer (e.g., Adam).
        device:         Device for computation (CPU or GPU).
        train_size:     Initial size of the training data (int or float).
                        If < 1, treated as fraction of total length.
        batch_size:     Batch size for training.
        num_epochs:     Number of epochs for initial training only.
        lower_threshold: Probability threshold below which model predicts -1.
        upper_threshold: Probability threshold above which model predicts +1.

    Returns:
        dict: Dictionary with the following keys:
            - "rolling_predictions": All predictions (-1, 0, +1) across the test period.
            - "rolling_targets": Corresponding true targets in [-1, +1].
            - "filtered_predictions": Nonzero predictions only.
            - "filtered_targets": Targets corresponding to nonzero predictions.
            - "accuracy_nonzero": Accuracy computed only on nonzero predictions.
    """

    # Convert X, y to tensors
    X = torch.tensor(X, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.long)

    # Determine initial training set size
    if train_size < 1.0:
        lower_bound = int(train_size * len(X))
    else:
        lower_bound = train_size

    # -------------------------
    # 1) SINGLE TRAINING PHASE
    # -------------------------
    model.to(device)
    model.train()
    
    X_train = X[:lower_bound].to(device)
    y_train = y[:lower_bound].to(device)

    train_dataset = TensorDataset(X_train, y_train)
    trainloader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=False,         # Keep False if order matters; True for better generalization
        # num_workers=4,         # Adjust based on your CPU cores
        # pin_memory=True,       # Speeds up transfer if using GPUs
        drop_last=False        # Ensure the last batch is included
    )

    epoch_train_losses = []
    for epoch in range(num_epochs):
        # torch.cuda.empty_cache()
        epoch_loss = 0.0
        for X_batch, y_batch in trainloader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)

            optimizer.zero_grad()
            pred_y = model(X_batch)   # [batch_size, num_classes]
            loss = criterion(pred_y, y_batch)
            loss.backward()

            # Gradient clipping (optional)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            epoch_loss += loss.item()
               
        if (epoch + 1) % 5 == 0 or epoch == num_epochs - 1:
            print(f"[Train] Epoch {epoch+1}/{num_epochs}, Loss={epoch_loss/len(trainloader):.4f}")

        epoch_train_losses.append(epoch_loss/len(trainloader))
        
    loss_decrease_percentage = ((epoch_train_losses[-1] - epoch_train_losses[0]) / epoch_train_losses[0]) * 100
    # ---------------------------------
    # 2) ROLLING PREDICTIONS, NO UPDATE
    # ---------------------------------
    model.eval()

    rolling_predictions = []
    rolling_targets     = []

    for i in range(lower_bound, len(X)):
        # Single-step "test" sample
        X_test = X[i:i+1].to(device)  # shape: (1, num_features)
        y_test = y[i:i+1].to(device)  # shape: (1, )

        with torch.no_grad():
            # Forward pass
            pred_y = model(X_test)
            probabilities = torch.softmax(pred_y, dim=1).cpu().numpy()
            prob_class_1  = probabilities[:, 1] 

            # Threshold-based logic
            # Initialize all predictions to 0
            pred_classes = np.zeros_like(prob_class_1)
            # Predict -1 if prob < lower_threshold
            pred_classes[prob_class_1 < lower_threshold] = -1
            # Predict +1 if prob > upper_threshold
            pred_classes[prob_class_1 > 1-lower_threshold] = 1

        rolling_predictions.append(pred_classes[0])  # scalar
        rolling_targets.append(y_test.item())

    rolling_predictions = np.array(rolling_predictions)
    rolling_targets = np.array(rolling_targets).astype(int)

    # Convert any 0-labeled targets to -1 if your original data is in [-1, +1]
    # (Sometimes y might be {0,1} or {-1, +1}; adapt as needed.)
    rolling_targets[rolling_targets == 0] = -1

    # Filter out zero predictions
    nonzero_mask = rolling_predictions != 0
    filtered_preds = rolling_predictions[nonzero_mask]
    filtered_targets = rolling_targets[nonzero_mask]

    if len(filtered_preds) == 0:
        accuracy_nonzero = None
        print("No nonzero predictions, cannot compute thresholded accuracy.")
    else:
        accuracy_nonzero = accuracy_score(filtered_targets, filtered_preds)
        print(f"Accuracy on Nonzero Predictions: {accuracy_nonzero:.4f}")

    return {
        "rolling_predictions": rolling_predictions,
        "rolling_targets": rolling_targets,
        "filtered_predictions": filtered_preds,
        "filtered_targets": filtered_targets,
        "accuracy_nonzero": accuracy_nonzero,
        "loss_decrease_percentage": loss_decrease_percentage,
        "final_train_loss": epoch_train_losses[-1] 
    }

### **2nd Type of comparison:**

Window sizes, for AAPL 10y

In [18]:
results_list = []
stock = "AAPL"
period = "10y"
possible_train_size = 95
window_size = 3
batch_size = 16
num_epochs = 100

prediction_threshold = 0.5

output_folder = os.path.join(results_dir, f"inidividual_trials") 
os.makedirs(output_folder, exist_ok=True)

processing = "clean"
security_type = "single_name"
window_sizes = [2, 3, 4]

learning_rate = 0.01

results_list = []
for window_size in window_sizes:
    initial_data_dir = os.path.join(project_dir, f"00_data/{processing}") 
    for model_type in model_types:

        # Load original data (info only)
        filename = f"{security_type}/{stock}/{period}_data.csv"
        original_input_filepath = os.path.join(initial_data_dir, filename)
        original_data = pd.read_csv(original_input_filepath)

        # Iterate over window sizes
        print(f"{window_size}, {model_type}")

        # Load data using the 'processing' variable in path
        pkl_filename = f"{processing}/{security_type}/{stock}/{period}_{window_size}_data.pkl"
        input_filepath = os.path.join(horizontal_data_dir, pkl_filename)
        print(input_filepath)
        input_df = pd.read_pickle(input_filepath)

        X_resampled, y_resampled = reshape_remove_characters(input_df)

        input_size = X_resampled.shape[2]
        train_size = int(X_resampled.shape[0] * possible_train_size / 100)
        test_size = X_resampled.shape[0] - train_size

        # Generate model
        if model_type == "gru":
            model = GRU3DClassifier(input_size, hidden_size, output_size, num_layers, dropout)
        elif model_type == "lstm":
            model = StockPriceLSTM(input_size, hidden_size, output_size)

        model = model.to(device)

        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        print(f"Training {stock} | LR: {learning_rate} | Epochs: {num_epochs} "
              f"| Batch: {batch_size} | Prediction Threshold: {prediction_threshold}")

        result = evaluate_rolling_unchanged_model_threshold(
            model, 
            X_resampled, 
            y_resampled, 
            criterion, 
            optimizer, 
            device, 
            train_size, 
            batch_size, 
            num_epochs, 
            lower_threshold=prediction_threshold
        )     

        rolling_predictions = result["rolling_predictions"]
        rolling_targets = result["rolling_targets"]
        test_accuracy = result["accuracy_nonzero"]
        loss_decrease_percentage = result["loss_decrease_percentage"]
        nonzero_preds = np.count_nonzero(rolling_predictions)
        final_train_loss = result["final_train_loss"]
        
        # 1) Create a record (dictionary) for this run
        run_record = {"STOCK": stock,
            "DATA_TYPE": security_type,
            "MODEL": model_type.upper(),  # Convert to uppercase for consistency
            "PROCESSING": processing,
            "ACCURACY": test_accuracy,
            "TRAIN_PCT_DECREASE": loss_decrease_percentage,
            "FINAL_TRAIN_LOSS": final_train_loss}

        # 2) Append to list
        results_list.append(run_record)

2, lstm
/home/jupyter-tfg2425paula/prediction_project_v3/00_data/horizontal_structure/clean/single_name/AAPL/10y_2_data.pkl
Training AAPL | LR: 0.01 | Epochs: 100 | Batch: 32 | Prediction Threshold: 0.5
[Train] Epoch 5/100, Loss=0.6937
[Train] Epoch 10/100, Loss=0.6937
[Train] Epoch 15/100, Loss=0.6936
[Train] Epoch 20/100, Loss=0.6936
[Train] Epoch 25/100, Loss=0.6937
[Train] Epoch 30/100, Loss=0.6936
[Train] Epoch 35/100, Loss=0.6936
[Train] Epoch 40/100, Loss=0.6935
[Train] Epoch 45/100, Loss=0.6934
[Train] Epoch 50/100, Loss=0.6933
[Train] Epoch 55/100, Loss=0.6931
[Train] Epoch 60/100, Loss=0.6930
[Train] Epoch 65/100, Loss=0.6929
[Train] Epoch 70/100, Loss=0.6927
[Train] Epoch 75/100, Loss=0.6924
[Train] Epoch 80/100, Loss=0.6922
[Train] Epoch 85/100, Loss=0.6920
[Train] Epoch 90/100, Loss=0.6918
[Train] Epoch 95/100, Loss=0.6915
[Train] Epoch 100/100, Loss=0.6913
Accuracy on Nonzero Predictions: 0.4809
2, gru
/home/jupyter-tfg2425paula/prediction_project_v3/00_data/horizontal_st

KeyboardInterrupt: 

### **Plot curves**

In [19]:
import os
import numpy as np
import torch
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score

def evaluate_rolling_unchanged_model_threshold(
    model, 
    X, 
    y, 
    criterion, 
    optimizer, 
    device, 
    train_size, 
    batch_size, 
    num_epochs, 
    lower_threshold,
    plots_dir=None,
    plot_filename=None
):

    # -------------------------------
    # 0) Prepare Tensors & Splits
    # -------------------------------
    X = torch.tensor(X, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.long)

    total_size = len(X)
    # Determine actual train_size index
    if train_size < 1.0:
        lower_bound = int(train_size * total_size)
    else:
        lower_bound = train_size

    # Training portion
    X_train = X[:lower_bound].to(device)
    y_train = y[:lower_bound].to(device)

    train_dataset = TensorDataset(X_train, y_train)
    trainloader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=False,   # Set True if you prefer shuffling
        drop_last=False
    )

    if lower_bound < total_size:
        X_val = X[lower_bound:].to(device)
        y_val = y[lower_bound:].to(device)
        val_dataset = TensorDataset(X_val, y_val)
        valloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    else:
        # If there's no leftover data for "test", handle gracefully
        X_val = None
        y_val = None
        valloader = None

    # ---------------------------------------------
    # 1) SINGLE TRAINING PHASE + Track Loss Curves
    # ---------------------------------------------
    model.to(device)
    model.train()

    epoch_train_losses = []
    epoch_test_losses = []

    for epoch in range(num_epochs):
        # TRAINING PASS
        epoch_loss = 0.0
        for X_batch, y_batch in trainloader:
            optimizer.zero_grad()

            # Forward
            pred_y = model(X_batch)
            loss = criterion(pred_y, y_batch)

            # Backprop & update
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # optional
            optimizer.step()

            epoch_loss += loss.item()

        avg_train_loss = epoch_loss / len(trainloader)
        epoch_train_losses.append(avg_train_loss)

        # VALIDATION PASS (Optional but needed to get test_loss_curve)
        if valloader is not None:
            model.eval()
            val_loss = 0.0
            with torch.no_grad():
                for Xb, yb in valloader:
                    pred_yb = model(Xb)
                    loss_b = criterion(pred_yb, yb)
                    val_loss += loss_b.item()
            avg_val_loss = val_loss / len(valloader)
            epoch_test_losses.append(avg_val_loss)

            model.train()  # Switch back to train mode

        else:
            # If no validation set, just store None or 0
            epoch_test_losses.append(None)

        # Print progress every 5 epochs or last epoch
        if (epoch + 1) % 5 == 0 or (epoch == num_epochs - 1):
            if epoch_test_losses[-1] is not None:
                print(f"[Epoch {epoch+1}/{num_epochs}] "
                      f"Train Loss: {avg_train_loss:.4f}, "
                      f"Val Loss: {avg_val_loss:.4f}")
            else:
                print(f"[Epoch {epoch+1}/{num_epochs}] "
                      f"Train Loss: {avg_train_loss:.4f}")

    # % decrease from first to last train loss
    if len(epoch_train_losses) > 1:
        loss_decrease_percentage = ((epoch_train_losses[-1] - epoch_train_losses[0])
                                    / epoch_train_losses[0]) * 100
    else:
        loss_decrease_percentage = 0.0

    final_train_loss = epoch_train_losses[-1]

    # -------------------------------
    # 2) ROLLING PREDICTIONS, NO UPDATE
    # -------------------------------
    model.eval()
    rolling_predictions = []
    rolling_targets = []

    for i in range(lower_bound, total_size):
        X_test = X[i:i+1].to(device)
        y_test = y[i:i+1].to(device)

        with torch.no_grad():
            pred_y = model(X_test)
            probabilities = torch.softmax(pred_y, dim=1).cpu().numpy()
            prob_class_1 = probabilities[:, 1]  # shape: (1,)

            # Threshold-based logic
            pred_classes = np.zeros_like(prob_class_1)
            # Predict -1 if prob < lower_threshold
            pred_classes[prob_class_1 < lower_threshold] = -1
            # Predict +1 if prob > (1 - lower_threshold)
            pred_classes[prob_class_1 > (1 - lower_threshold)] = 1

        rolling_predictions.append(pred_classes[0])
        rolling_targets.append(y_test.item())

    rolling_predictions = np.array(rolling_predictions)
    rolling_targets = np.array(rolling_targets).astype(int)

    # If original labels might be {0,1}, adapt as needed
    rolling_targets[rolling_targets == 0] = -1

    # Filter out zero predictions
    nonzero_mask = (rolling_predictions != 0)
    filtered_preds = rolling_predictions[nonzero_mask]
    filtered_targets = rolling_targets[nonzero_mask]

    if len(filtered_preds) == 0:
        accuracy_nonzero = None
        print("No nonzero predictions, cannot compute thresholded accuracy.")
    else:
        accuracy_nonzero = accuracy_score(filtered_targets, filtered_preds)
        print(f"Accuracy on Nonzero Predictions: {accuracy_nonzero:.4f}")

    # -------------------------------------------------
    # 3) PLOT (if plots_dir is set and there's test data)
    # -------------------------------------------------
    if plots_dir is not None:
        os.makedirs(plots_dir, exist_ok=True)

        # If user didn't provide a filename, create a default
        if plot_filename is None:
            plot_filename = "train_test_loss_curve.png"
        plot_path = os.path.join(plots_dir, plot_filename)

        # Plot the training and validation (test) loss curves
        plt.figure(figsize=(8, 5))
        plt.plot(epoch_train_losses, label="Train Loss")
        # Only plot test loss if it isn't None
        if any(x is not None for x in epoch_test_losses):
            plt.plot(epoch_test_losses, label="Test Loss")
        plt.xlabel("Epoch")
        plt.ylabel("Loss")
        plt.title("Train vs. Test Loss per Epoch")
        plt.legend()
        plt.tight_layout()

        plt.savefig(plot_path)
        plt.close()
        print(f"Loss curves saved to: {plot_path}")

    # ----------------
    # 4) Return results
    # ----------------
    return {
        "rolling_predictions": rolling_predictions,
        "rolling_targets": rolling_targets,
        "filtered_predictions": filtered_preds,
        "filtered_targets": filtered_targets,
        "accuracy_nonzero": accuracy_nonzero,
        "loss_decrease_percentage": loss_decrease_percentage,
        "final_train_loss": final_train_loss,
        "train_loss_curve": epoch_train_losses,
        "test_loss_curve": epoch_test_losses
    }


In [23]:
processing = "clean"
security_type = "single_name"
window_sizes = [2, 3, 4, 5, 10]
possible_train_size = 80

results_list = []

# Main loop
for window_size in window_sizes:
    initial_data_dir = os.path.join(project_dir, f"00_data/{processing}") 
    for model_type in model_types:

        # 1) Load original data (info only)
        filename = f"{security_type}/{stock}/{period}_data.csv"
        original_input_filepath = os.path.join(initial_data_dir, filename)
        original_data = pd.read_csv(original_input_filepath)

        print(f"\n----- WINDOW_SIZE: {window_size}, MODEL_TYPE: {model_type} -----")

        # 2) Load the preprocessed data
        pkl_filename = f"{processing}/{security_type}/{stock}/{period}_{window_size}_data.pkl"
        input_filepath = os.path.join(horizontal_data_dir, pkl_filename)
        input_df = pd.read_pickle(input_filepath)

        # 3) Reshape
        X_resampled, y_resampled = reshape_remove_characters(input_df)

        input_size = X_resampled.shape[2]
        train_size = int(X_resampled.shape[0] * possible_train_size / 100)
        test_size = X_resampled.shape[0] - train_size

        # 4) Initialize the model
        if model_type == "gru":
            model = GRU3DClassifier(input_size, hidden_size, output_size, num_layers, dropout)
        elif model_type == "lstm":
            model = StockPriceLSTM(input_size, hidden_size, output_size)

        model = model.to(device)

        # 5) Set up optimizer
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        print(f"Training {stock} | LR: {learning_rate} | Epochs: {num_epochs} "
              f"| Batch: {batch_size} | Threshold: {prediction_threshold}")

        
        result = evaluate_rolling_unchanged_model_threshold(
            model=model,
            X=X_resampled,
            y=y_resampled,
            criterion=criterion,
            optimizer=optimizer,
            device=device,
            train_size=train_size,
            batch_size=batch_size,
            num_epochs=num_epochs,
            lower_threshold=0.5,
            plots_dir=plots_dir,                # folder where plots are saved
            plot_filename=f"AAPL_{model_type}_win{window_size}_{possible_train_size}_loss.png" # optional custom filename
        )

        # 7) Extract results
        rolling_predictions = result["rolling_predictions"]
        rolling_targets = result["rolling_targets"]
        test_accuracy = result["accuracy_nonzero"]
        loss_decrease_percentage = result["loss_decrease_percentage"]
        nonzero_preds = np.count_nonzero(rolling_predictions)
        final_train_loss = result["final_train_loss"]

        # 9) Create a record (dictionary) for this run
        run_record = {
            "STOCK": stock,
            "DATA_TYPE": security_type,
            "MODEL": model_type.upper(),
            "PROCESSING": processing,
            "ACCURACY": test_accuracy,
            "TRAIN_PCT_DECREASE": loss_decrease_percentage,
            "FINAL_TRAIN_LOSS": final_train_loss
        }

        # 10) Append to the results_list
        results_list.append(run_record)


----- WINDOW_SIZE: 2, MODEL_TYPE: lstm -----
Training AAPL | LR: 0.01 | Epochs: 100 | Batch: 32 | Threshold: 0.5
[Epoch 5/100] Train Loss: 0.6939, Val Loss: 0.6924
[Epoch 10/100] Train Loss: 0.6938, Val Loss: 0.6925
[Epoch 15/100] Train Loss: 0.6937, Val Loss: 0.6927
[Epoch 20/100] Train Loss: 0.6936, Val Loss: 0.6931
[Epoch 25/100] Train Loss: 0.6934, Val Loss: 0.6939
[Epoch 30/100] Train Loss: 0.6930, Val Loss: 0.6947
[Epoch 35/100] Train Loss: 0.6927, Val Loss: 0.6954
[Epoch 40/100] Train Loss: 0.6925, Val Loss: 0.6958
[Epoch 45/100] Train Loss: 0.6923, Val Loss: 0.6962
[Epoch 50/100] Train Loss: 0.6921, Val Loss: 0.6965
[Epoch 55/100] Train Loss: 0.6919, Val Loss: 0.6968
[Epoch 60/100] Train Loss: 0.6918, Val Loss: 0.6971
[Epoch 65/100] Train Loss: 0.6917, Val Loss: 0.6975
[Epoch 70/100] Train Loss: 0.6915, Val Loss: 0.6980
[Epoch 75/100] Train Loss: 0.6914, Val Loss: 0.6985
[Epoch 80/100] Train Loss: 0.6913, Val Loss: 0.6990
[Epoch 85/100] Train Loss: 0.6912, Val Loss: 0.6995
[Ep

KeyboardInterrupt: 

In [None]:
epoch_train_losses

NameError: name 'epoch_train_losses' is not defined

In [27]:
results_csv_path = os.path.join(output_folder, f"02_{stock}_window_sizes.csv")

if len(results_list) > 0:
    df = pd.DataFrame(results_list)

    if os.path.exists(results_csv_path):
        # Append without header
        df.to_csv(results_csv_path, mode='a', header=False, index=False)
    else:
        # Write new file with header
        df.to_csv(results_csv_path, index=False)

    # Clear the list before next iteration
    results_list = []

In [26]:
results_csv_path

'/home/jupyter-tfg2425paula/prediction_project_v3/02_results/inidividual_trials/01_AAPL_proc_model_type.csv'

In [25]:
df

Unnamed: 0,STOCK,DATA_TYPE,MODEL,PROCESSING,ACCURACY,TRAIN_PCT_DECREASE,FINAL_TRAIN_LOSS
0,AAPL,single_name,LSTM,clean,0.51145,-13.434468,0.602222
1,AAPL,single_name,GRU,clean,0.427481,-17.854286,0.571629
2,AAPL,single_name,LSTM,clean,0.427481,-92.449701,0.052604
3,AAPL,single_name,GRU,clean,0.465649,-54.370292,0.317963
4,AAPL,single_name,LSTM,clean,0.496124,-99.997286,1.9e-05
5,AAPL,single_name,GRU,clean,0.55814,-91.448532,0.059744
6,AAPL,single_name,LSTM,clean,0.531746,-99.998481,1.1e-05
7,AAPL,single_name,GRU,clean,0.47619,-96.84312,0.022098
8,AAPL,single_name,LSTM,clean,0.518868,-99.999147,6e-06
9,AAPL,single_name,GRU,clean,0.613208,-96.695056,0.023305
