In [69]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from sklearn import metrics
import numpy as npw
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler



def split_data_with_window(x_in, y_in, split_window_size):
    # Initialize lists to store training and temporary sets
    x_out1_list, y_out1_list, x_out2_list, y_out2_list = [], [], [], []

    # Iterate through the data with the specified window size
    for i in range(0, len(x_in) - split_window_size, split_window_size + 1):
        x_out1_out2 = x_in.iloc[i:i+split_window_size+1]
        y_out1_out2 = y_in.iloc[i:i+split_window_size+1]

        # Separate the last row for the temporary set
        # [ :-1]: all elements except the last one
        # [-1:]:  selects only the last element
        # (:) is used to indicate slicing of a sequence
        # sequence[start : end : step]

        x_out1 = x_out1_out2.iloc[:-1]
        y_out1 = y_out1_out2.iloc[:-1]

        x_out2 = x_out1_out2.iloc[-1:]
        y_out2 = y_out1_out2.iloc[-1:]

        x_out1_list.append(x_out1)
        y_out1_list.append(y_out1)
        x_out2_list.append(x_out2)
        y_out2_list.append(y_out2)

    # Concatenate the lists into pandas DataFrames
    x_out1 = pd.concat(x_out1_list)
    y_out1 = pd.concat(y_out1_list)
    x_out2 = pd.concat(x_out2_list)
    y_out2 = pd.concat(y_out2_list)

    return x_out1, y_out1, x_out2, y_out2










# Read the CSV file
data = pd.read_csv("../data/data/aapl_raw_data.csv")

data = data.drop("date", axis=1)

data.isnull().sum()
data=data.fillna(0)  # Filling null values with zero
data.isnull().sum()

data = data.astype('float32')


# Keep data until 31.08.2023
data = data.iloc[:10731]

#print(data['open'].dtype)
#print(data.shape)

# Assuming 'data' is a pandas DataFrame
x_data = data[['open', 'high', 'low', 'volume', 'adjusted_close', 'change_percent', 'avg_vol_20d']]
y_data = data["close"]

# Now x_data and y_data are pandas DataFrames/Series, respectively

x_data.tail(1)



# Split Data to train and temp

# Define your split_window_size
split_window_size = 3

# Call the split_data_with_window function
x_train, y_train, x_temp, y_temp = split_data_with_window(x_data, y_data, split_window_size)


"""

# Print the last 5 rows of x_data
print("Last 5 rows of x_data:")
print(x_data.tail(5))

# Print the last 5 rows of x_train
print("\nLast 25 rows of x_train:")
print(x_train.tail(25))

print("\nLast 3 rows of y_train:")
print(y_temp.tail(3))

"""

# Split temp into val and test

# Define your split_window_size
split_window_size = 1

# Call the split_data_with_window function
x_val, y_val, x_test, y_test = split_data_with_window(x_temp, y_temp, split_window_size)




scaler = MinMaxScaler()

x_train_normalized = scaler.fit_transform(x_train)
x_val_normalized = scaler.transform(x_val)
x_test_normalized = scaler.transform(x_test)

# Convert the data to PyTorch tensors
x_train_tensor = torch.tensor(x_train_normalized, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

x_val_tensor = torch.tensor(x_val_normalized, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)

x_test_tensor = torch.tensor(x_test_normalized, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)


"""
# x_train_tensor inverse

x_test_original = scaler.inverse_transform(x_train_tensor.numpy())
print("\nFirst row of x_test_original:")
print(x_test_original[0])

print("\nFirst row of x_train:")
print(x_train.head(1))



print("\nLast row of x_test_original:")
print(x_test_original[-1])

print("\nLast row of x_train:")
print(x_train.tail(1))
"""



'''

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, learning_rate, window_size, dropout_prob=0.5):  # Added dropout_prob
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.window_size = window_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_prob)  # Included dropout in LSTM
        self.dropout = nn.Dropout(dropout_prob)  # Added dropout layer
        self.fc = nn.Linear(hidden_size, 1)
        self.learning_rate = learning_rate


    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))

        out = self.fc(out[:, -1, :])
        return out

'''

'\n\nclass LSTMModel(nn.Module):\n    def __init__(self, input_size, hidden_size, num_layers, learning_rate, window_size, dropout_prob=0.5):  # Added dropout_prob\n        super(LSTMModel, self).__init__()\n        self.hidden_size = hidden_size\n        self.num_layers = num_layers\n        self.window_size = window_size\n        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_prob)  # Included dropout in LSTM\n        self.dropout = nn.Dropout(dropout_prob)  # Added dropout layer\n        self.fc = nn.Linear(hidden_size, 1)\n        self.learning_rate = learning_rate\n\n\n    def forward(self, x):\n        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)\n        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)\n\n        out, _ = self.lstm(x, (h0, c0))\n\n        out = self.fc(out[:, -1, :])\n        return out\n\n'

In [70]:
import torch
import torch.nn as nn
import itertools
import random
import torch.optim.lr_scheduler as lr_scheduler
import numpy as np  # Added this line


# Define the hyperparameters to search over
input_sizes = [7]

out_channels_list = [128]
kernel_sizes = [1]
paddings = [1]


hidden_sizes = [5]
num_layers_list = [2]

learning_rates = [0.0005]

window_sizes = [20]
dropout_probs = [0]
weight_decays = [0]
factors = [0.5]
patience_lr = [10]

num_epochs = 150000
patience = 10  # Number of epochs to wait for improvement

def set_random_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False




# Combine hyperparameters into a list of tuples
hyperparameter_combinations = list(itertools.product(input_sizes, hidden_sizes, num_layers_list, learning_rates, window_sizes, dropout_probs, weight_decays, factors, patience_lr, out_channels_list, kernel_sizes, paddings))

# Walk-forward validation training with sliding window for each hyperparameter combination
for hyperparams in hyperparameter_combinations:
    class CNNLSTMModel(nn.Module):

        def __init__(self, input_size, hidden_size, num_layers, learning_rate, window_size, out_channels, kernel_size, padding, dropout_prob=0.5):

            super(CNNLSTMModel, self).__init__()
            self.hidden_size = hidden_size
            self.num_layers = num_layers
            self.window_size = window_size

            # Define the convolutional layer


            self.conv1 = nn.Conv1d(in_channels=input_size, out_channels=out_channels, kernel_size=kernel_size, padding=padding)
            self.conv2 = nn.Conv1d(in_channels=out_channels, out_channels=out_channels * 2, kernel_size=kernel_size, padding=padding)

             # Define the LSTM layer
            lstm_input_size = out_channels * 2  # Adjusted to match the output from conv2
            self.lstm = nn.LSTM(input_size=lstm_input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout_prob)


            # Define the dropout layer
            self.dropout = nn.Dropout(dropout_prob)

            # Define the fully connected layer
            self.fc = nn.Linear(hidden_size, 1)

            self.learning_rate = learning_rate

        def forward(self, x):
            # Apply convolutional layers
            x = x.permute(0, 2, 1)  # Permute dimensions for Conv1d input
            x = F.relu(self.conv1(x))
            x = F.relu(self.conv2(x))
            x = x.permute(0, 2, 1)  # Restore dimensions

            # Apply LSTM
            h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
            c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
            lstm_out, _ = self.lstm(x, (h0, c0))

            # Apply dropout
            lstm_out = self.dropout(lstm_out)

            # Apply fully connected layer
            out = self.fc(lstm_out[:, -1, :])

            return out








    set_random_seeds(42)


    input_size, hidden_size, num_layers, learning_rate, window_size, dropout_prob, weight_decay, factor, patience_lr, out_channels, kernel_size, padding = hyperparams


    print(f"Hyperparameters: input_size={input_size}, hidden_size={hidden_size}, num_layers={num_layers}, learning_rate={learning_rate}, window_size={window_size}, dropout_prob={dropout_prob}, weight_decay={weight_decay}, factor={factor}, patience={patience}, out_channels={out_channels}, kernel_size={kernel_size}, padding={padding}")

    # Initialize the model

    model = CNNLSTMModel(input_size, hidden_size, num_layers, learning_rate, window_size, out_channels, kernel_size, padding, dropout_prob=0.5)

    # Define the loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Initialize the scheduler after defining the optimizer
    scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)


    best_val_loss = float('inf')
    counter = 0


    # segment sequential data into smaller windows using a sliding window approach,
    # ensuring temporal coherence, and returns the windows as tensors for training sequential models.

    # segment sequential data into smaller windows using a sliding window approach,
    # ensuring temporal coherence, and returns the windows as tensors for training sequential models.
    def split_data_with_sliding_window(x_train_tensor, y_train_tensor, window_size):
        # Check if the lengths of x_train_tensor and y_train_tensor match
        if len(x_train_tensor) != len(y_train_tensor):
            raise ValueError("Lengths of x_train_tensor and y_train_tensor must match.")

        # Initialize lists to store sequential windows
        x_seq_list, y_seq_list = [], []

        # Iterate through the data with the specified window size
        for i in range(len(x_train_tensor) - window_size):
            # Extract a window of input features and target output
            x_window = x_train_tensor[i:i+window_size]
            y_window = y_train_tensor[i+window_size]  # Next entry as target output

            x_seq_list.append(x_window)
            y_seq_list.append(y_window)

        # Concatenate the lists into tensors
        x_seq = torch.stack(x_seq_list)
        y_seq = torch.stack(y_seq_list)

        return x_seq, y_seq



    # Training loop
    for epoch in range(num_epochs):
        # Clear gradients
        optimizer.zero_grad()

        # Split the training data into sliding windows
        x_train_seq, y_train_seq = split_data_with_sliding_window(x_train_tensor, y_train_tensor, window_size)

        # Forward pass
        outputs = model(x_train_seq)

        # Calculate loss
        loss = criterion(outputs, y_train_seq)

        # Backward pass and optimization
        loss.backward()

        optimizer.step()

        scheduler.step()



                # Monitor gradients
        if epoch % 100 == 0:
            ("Gradients:")    # print("Gradients:")
            for name, param in model.named_parameters():
                if param.grad is not None:
                    (f"{name}: {param.grad.norm().item()}") # print(f"{name}: {param.grad.norm().item()}")

                # Monitor Learning Rate

        current_lr = optimizer.param_groups[0]['lr']
        # print(f"Epoch {epoch+1}/{num_epochs}, Learning Rate: {current_lr}")



        # Validate the model
        with torch.no_grad():
            # Split validation data into sliding windows
            x_val_seq, y_val_seq = split_data_with_sliding_window(x_val_tensor, y_val_tensor, window_size)

            # Forward pass for validation
            val_outputs = model(x_val_seq)

            # Calculate validation loss
            val_loss = criterion(val_outputs, y_val_seq)

            #Update the scheduler within the training loop after calculating the validation loss:
            scheduler.step(val_loss)

            # Check for early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                counter = 0
            else:
                counter += 1
                if counter >= patience:
                    print(f"Early stopping at epoch {epoch} with validation loss {val_loss}.")
                    break

        # Print progress
        if epoch % 100 == 0:
            print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {loss.item():.0f}, Val Loss: {val_loss.item():.0f},  Lear. Rate: {current_lr:.5f}, Train Grad.: {param.grad.norm().item():.1f}")

    # After the training loop, you can evaluate the model on the test data
    # Split test data into sliding windows
    x_test_seq, y_test_seq = split_data_with_sliding_window(x_test_tensor, y_test_tensor, window_size)

    # Evaluate the model
    with torch.no_grad():
        test_outputs = model(x_test_seq)
        test_loss = criterion(test_outputs, y_test_seq)
        print(f"Test Loss: {test_loss.item()}")




Hyperparameters: input_size=7, hidden_size=5, num_layers=2, learning_rate=0.0005, window_size=20, dropout_prob=0, weight_decay=0, factor=0.5, patience=10, out_channels=128, kernel_size=1, padding=1




Epoch 1/150000, Train Loss: 29947, Val Loss: 30443,  Lear. Rate: 0.00050, Train Grad.: 223.9
Epoch 101/150000, Train Loss: 29860, Val Loss: 30360,  Lear. Rate: 0.00045, Train Grad.: 223.1
Epoch 201/150000, Train Loss: 29700, Val Loss: 30195,  Lear. Rate: 0.00045, Train Grad.: 221.7
Epoch 301/150000, Train Loss: 29575, Val Loss: 30068,  Lear. Rate: 0.00045, Train Grad.: 220.6
Early stopping at epoch 352 with validation loss 30014.931640625.
Test Loss: 29796.857421875


In [71]:
p

NameError: name 'p' is not defined

In [None]:
import torch
import torch.nn as nn
import itertools
import random
import torch.optim.lr_scheduler as lr_scheduler
import numpy as np  # Added this line


# Define the hyperparameters to search over
input_sizes = [7]

out_channels_list = [16]
kernel_sizes = [1]
paddings = [1]


hidden_sizes = [5]
num_layers_list = [2]

learning_rates = [0.0005]

window_sizes = [20]
dropout_probs = [0]
weight_decays = [0]
factors = [0.5]
patience_lr = [10]

num_epochs = 150000
patience = 10  # Number of epochs to wait for improvement

def set_random_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False




# Combine hyperparameters into a list of tuples
hyperparameter_combinations = list(itertools.product(input_sizes, hidden_sizes, num_layers_list, learning_rates, window_sizes, dropout_probs, weight_decays, factors, patience_lr, out_channels_list, kernel_sizes, paddings))

# Walk-forward validation training with sliding window for each hyperparameter combination
for hyperparams in hyperparameter_combinations:
    class CNNLSTMModel(nn.Module):

        def __init__(self, input_size, hidden_size, num_layers, learning_rate, window_size, out_channels, kernel_size, padding, dropout_prob=0.5):

            super(CNNLSTMModel, self).__init__()
            self.hidden_size = hidden_size
            self.num_layers = num_layers
            self.window_size = window_size

            # Define the convolutional layer


            self.conv1 = nn.Conv1d(in_channels=input_size, out_channels=out_channels, kernel_size=kernel_size, padding=padding)
            self.conv2 = nn.Conv1d(in_channels=out_channels, out_channels=out_channels * 2, kernel_size=kernel_size, padding=padding)

            # Define the LSTM layer
            self.lstm = nn.LSTM(input_size=32, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout_prob)


            # Define the dropout layer
            self.dropout = nn.Dropout(dropout_prob)

            # Define the fully connected layer
            self.fc = nn.Linear(hidden_size, 1)

            self.learning_rate = learning_rate

        def forward(self, x):
            # Apply convolutional layers
            x = x.permute(0, 2, 1)  # Permute dimensions for Conv1d input
            x = F.relu(self.conv1(x))
            x = F.relu(self.conv2(x))
            x = x.permute(0, 2, 1)  # Restore dimensions

            # Apply LSTM
            h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
            c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
            lstm_out, _ = self.lstm(x, (h0, c0))

            # Apply dropout
            lstm_out = self.dropout(lstm_out)

            # Apply fully connected layer
            out = self.fc(lstm_out[:, -1, :])

            return out








    set_random_seeds(42)


    input_size, hidden_size, num_layers, learning_rate, window_size, dropout_prob, weight_decay, factor, patience_lr, out_channels, kernel_size, padding = hyperparams


    print(f"Hyperparameters: input_size={input_size}, hidden_size={hidden_size}, num_layers={num_layers}, learning_rate={learning_rate}, window_size={window_size}, dropout_prob={dropout_prob}, weight_decay={weight_decay}, factor={factor}, patience={patience}, out_channels={out_channels}, kernel_size={kernel_size}, padding={padding}")

    # Initialize the model

    model = CNNLSTMModel(input_size, hidden_size, num_layers, learning_rate, window_size, out_channels, kernel_size, padding, dropout_prob=0.5)

    # Define the loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Initialize the scheduler after defining the optimizer
    scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)


    best_val_loss = float('inf')
    counter = 0


    # segment sequential data into smaller windows using a sliding window approach,
    # ensuring temporal coherence, and returns the windows as tensors for training sequential models.

    # segment sequential data into smaller windows using a sliding window approach,
    # ensuring temporal coherence, and returns the windows as tensors for training sequential models.
    def split_data_with_sliding_window(x_train_tensor, y_train_tensor, window_size):
        # Check if the lengths of x_train_tensor and y_train_tensor match
        if len(x_train_tensor) != len(y_train_tensor):
            raise ValueError("Lengths of x_train_tensor and y_train_tensor must match.")

        # Initialize lists to store sequential windows
        x_seq_list, y_seq_list = [], []

        # Iterate through the data with the specified window size
        for i in range(len(x_train_tensor) - window_size):
            # Extract a window of input features and target output
            x_window = x_train_tensor[i:i+window_size]
            y_window = y_train_tensor[i+window_size]  # Next entry as target output

            x_seq_list.append(x_window)
            y_seq_list.append(y_window)

        # Concatenate the lists into tensors
        x_seq = torch.stack(x_seq_list)
        y_seq = torch.stack(y_seq_list)

        return x_seq, y_seq



    # Training loop
    for epoch in range(num_epochs):
        # Clear gradients
        optimizer.zero_grad()

        # Split the training data into sliding windows
        x_train_seq, y_train_seq = split_data_with_sliding_window(x_train_tensor, y_train_tensor, window_size)

        # Forward pass
        outputs = model(x_train_seq)

        # Calculate loss
        loss = criterion(outputs, y_train_seq)

        # Backward pass and optimization
        loss.backward()

        optimizer.step()

        scheduler.step()



                # Monitor gradients
        if epoch % 100 == 0:
            ("Gradients:")    # print("Gradients:")
            for name, param in model.named_parameters():
                if param.grad is not None:
                    (f"{name}: {param.grad.norm().item()}") # print(f"{name}: {param.grad.norm().item()}")

                # Monitor Learning Rate

        current_lr = optimizer.param_groups[0]['lr']
        # print(f"Epoch {epoch+1}/{num_epochs}, Learning Rate: {current_lr}")



        # Validate the model
        with torch.no_grad():
            # Split validation data into sliding windows
            x_val_seq, y_val_seq = split_data_with_sliding_window(x_val_tensor, y_val_tensor, window_size)

            # Forward pass for validation
            val_outputs = model(x_val_seq)

            # Calculate validation loss
            val_loss = criterion(val_outputs, y_val_seq)

            #Update the scheduler within the training loop after calculating the validation loss:
            scheduler.step(val_loss)

            # Check for early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                counter = 0
            else:
                counter += 1
                if counter >= patience:
                    print(f"Early stopping at epoch {epoch} with validation loss {val_loss}.")
                    break

        # Print progress
        if epoch % 100 == 0:
            print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {loss.item():.0f}, Val Loss: {val_loss.item():.0f},  Lear. Rate: {current_lr:.5f}, Train Grad.: {param.grad.norm().item():.1f}")

    # After the training loop, you can evaluate the model on the test data
    # Split test data into sliding windows
    x_test_seq, y_test_seq = split_data_with_sliding_window(x_test_tensor, y_test_tensor, window_size)

    # Evaluate the model
    with torch.no_grad():
        test_outputs = model(x_test_seq)
        test_loss = criterion(test_outputs, y_test_seq)
        print(f"Test Loss: {test_loss.item()}")




Hyperparameters: input_size=7, hidden_size=5, num_layers=2, learning_rate=0.0005, window_size=20, dropout_prob=0, weight_decay=0, factor=0.5, patience=10, out_channels=32, kernel_size=1, padding=1


RuntimeError: input.size(-1) must be equal to input_size. Expected 32, got 64

In [None]:
p

In [None]:
import torch
import torch.nn as nn
import itertools
import random
import torch.optim.lr_scheduler as lr_scheduler
import numpy as np  # Added this line


# Define the hyperparameters to search over
input_sizes = [7]

out_channels_list = [16]

hidden_sizes = [5]
num_layers_list = [2]

learning_rates = [0.0005]

window_sizes = [20]
dropout_probs = [0]
weight_decays = [0]
factors = [0.5]
patience_lr = [10]

num_epochs = 150000
patience = 10  # Number of epochs to wait for improvement

def set_random_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False




# Combine hyperparameters into a list of tuples
hyperparameter_combinations = list(itertools.product(input_sizes, hidden_sizes, num_layers_list, learning_rates, window_sizes, dropout_probs, weight_decays, factors, patience_lr, out_channels_list))

# Walk-forward validation training with sliding window for each hyperparameter combination
for hyperparams in hyperparameter_combinations:
    class CNNLSTMModel(nn.Module):

        def __init__(self, input_size, hidden_size, num_layers, learning_rate, window_size, out_channels, dropout_prob=0.5):

            super(CNNLSTMModel, self).__init__()
            self.hidden_size = hidden_size
            self.num_layers = num_layers
            self.window_size = window_size

            # Define the convolutional layer

            self.conv1 = nn.Conv1d(in_channels=input_size, out_channels=out_channels, kernel_size=3, padding=1)
            self.conv2 = nn.Conv1d(in_channels=out_channels, out_channels=out_channels * 2, kernel_size=3, padding=1)

            # Define the LSTM layer
            self.lstm = nn.LSTM(input_size=32, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout_prob)

            # Define the dropout layer
            self.dropout = nn.Dropout(dropout_prob)

            # Define the fully connected layer
            self.fc = nn.Linear(hidden_size, 1)

            self.learning_rate = learning_rate

        def forward(self, x):
            # Apply convolutional layers
            x = x.permute(0, 2, 1)  # Permute dimensions for Conv1d input
            x = F.relu(self.conv1(x))
            x = F.relu(self.conv2(x))
            x = x.permute(0, 2, 1)  # Restore dimensions

            # Apply LSTM
            h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
            c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
            lstm_out, _ = self.lstm(x, (h0, c0))

            # Apply dropout
            lstm_out = self.dropout(lstm_out)

            # Apply fully connected layer
            out = self.fc(lstm_out[:, -1, :])

            return out









    set_random_seeds(42)


    input_size, hidden_size, num_layers, learning_rate, window_size, dropout_prob, weight_decay, factor, patience_lr, out_channels = hyperparams

    print(f"Hyperparameters: input_size={input_size}, hidden_size={hidden_size}, num_layers={num_layers}, learning_rate={learning_rate}, window_size={window_size}, dropout_prob={dropout_prob}, weight_decay={weight_decay}, factor={factor}, patience={patience}, out_channels={out_channels}")

    # Initialize the model

    model = CNNLSTMModel(input_size, hidden_size, num_layers, learning_rate, window_size, out_channels, dropout_prob=0.5)

    # Define the loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Initialize the scheduler after defining the optimizer
    scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)


    best_val_loss = float('inf')
    counter = 0


    # segment sequential data into smaller windows using a sliding window approach,
    # ensuring temporal coherence, and returns the windows as tensors for training sequential models.

    # segment sequential data into smaller windows using a sliding window approach,
    # ensuring temporal coherence, and returns the windows as tensors for training sequential models.
    def split_data_with_sliding_window(x_train_tensor, y_train_tensor, window_size):
        # Check if the lengths of x_train_tensor and y_train_tensor match
        if len(x_train_tensor) != len(y_train_tensor):
            raise ValueError("Lengths of x_train_tensor and y_train_tensor must match.")

        # Initialize lists to store sequential windows
        x_seq_list, y_seq_list = [], []

        # Iterate through the data with the specified window size
        for i in range(len(x_train_tensor) - window_size):
            # Extract a window of input features and target output
            x_window = x_train_tensor[i:i+window_size]
            y_window = y_train_tensor[i+window_size]  # Next entry as target output

            x_seq_list.append(x_window)
            y_seq_list.append(y_window)

        # Concatenate the lists into tensors
        x_seq = torch.stack(x_seq_list)
        y_seq = torch.stack(y_seq_list)

        return x_seq, y_seq



    # Training loop
    for epoch in range(num_epochs):
        # Clear gradients
        optimizer.zero_grad()

        # Split the training data into sliding windows
        x_train_seq, y_train_seq = split_data_with_sliding_window(x_train_tensor, y_train_tensor, window_size)

        # Forward pass
        outputs = model(x_train_seq)

        # Calculate loss
        loss = criterion(outputs, y_train_seq)

        # Backward pass and optimization
        loss.backward()

        optimizer.step()

        scheduler.step()



                # Monitor gradients
        if epoch % 100 == 0:
            ("Gradients:")    # print("Gradients:")
            for name, param in model.named_parameters():
                if param.grad is not None:
                    (f"{name}: {param.grad.norm().item()}") # print(f"{name}: {param.grad.norm().item()}")

                # Monitor Learning Rate

        current_lr = optimizer.param_groups[0]['lr']
        # print(f"Epoch {epoch+1}/{num_epochs}, Learning Rate: {current_lr}")



        # Validate the model
        with torch.no_grad():
            # Split validation data into sliding windows
            x_val_seq, y_val_seq = split_data_with_sliding_window(x_val_tensor, y_val_tensor, window_size)

            # Forward pass for validation
            val_outputs = model(x_val_seq)

            # Calculate validation loss
            val_loss = criterion(val_outputs, y_val_seq)

            #Update the scheduler within the training loop after calculating the validation loss:
            scheduler.step(val_loss)

            # Check for early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                counter = 0
            else:
                counter += 1
                if counter >= patience:
                    print(f"Early stopping at epoch {epoch} with validation loss {val_loss}.")
                    break

        # Print progress
        if epoch % 100 == 0:
            print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {loss.item():.0f}, Val Loss: {val_loss.item():.0f},  Lear. Rate: {current_lr:.5f}, Train Grad.: {param.grad.norm().item():.1f}")

    # After the training loop, you can evaluate the model on the test data
    # Split test data into sliding windows
    x_test_seq, y_test_seq = split_data_with_sliding_window(x_test_tensor, y_test_tensor, window_size)

    # Evaluate the model
    with torch.no_grad():
        test_outputs = model(x_test_seq)
        test_loss = criterion(test_outputs, y_test_seq)
        print(f"Test Loss: {test_loss.item()}")




Hyperparameters: input_size=7, hidden_size=5, num_layers=2, learning_rate=0.0005, window_size=20, dropout_prob=0, weight_decay=0, factor=0.5, patience=10, out_channels=16
Epoch 1/150000, Train Loss: 29999, Val Loss: 30497,  Lear. Rate: 0.00050, Train Grad.: 224.4


KeyboardInterrupt: 

In [None]:
import torch
import torch.nn as nn
import itertools
import random
import torch.optim.lr_scheduler as lr_scheduler
import numpy as np  # Added this line


# Define the hyperparameters to search over
input_sizes = [7]

hidden_sizes = [5]
num_layers_list = [2]

learning_rates = [0.0005]

window_sizes = [20]
dropout_probs = [0]
weight_decays = [0]
factors = [0.5]
patience_lr = [10]

num_epochs = 150000
patience = 10  # Number of epochs to wait for improvement

def set_random_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False




# Combine hyperparameters into a list of tuples
hyperparameter_combinations = list(itertools.product(input_sizes, hidden_sizes, num_layers_list, learning_rates, window_sizes, dropout_probs, weight_decays, factors, patience_lr))

# Walk-forward validation training with sliding window for each hyperparameter combination
for hyperparams in hyperparameter_combinations:
    class CNNLSTMModel(nn.Module):
        def __init__(self, input_size, hidden_size, num_layers, learning_rate, window_size, dropout_prob=0.5):
            super(CNNLSTMModel, self).__init__()
            self.hidden_size = hidden_size
            self.num_layers = num_layers
            self.window_size = window_size

            # Define the convolutional layer
            self.conv1 = nn.Conv1d(in_channels=input_size, out_channels=16, kernel_size=3, padding=1)
            self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, padding=1)

            # Define the LSTM layer
            self.lstm = nn.LSTM(input_size=32, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout_prob)

            # Define the dropout layer
            self.dropout = nn.Dropout(dropout_prob)

            # Define the fully connected layer
            self.fc = nn.Linear(hidden_size, 1)

            self.learning_rate = learning_rate

        def forward(self, x):
            # Apply convolutional layers
            x = x.permute(0, 2, 1)  # Permute dimensions for Conv1d input
            x = F.relu(self.conv1(x))
            x = F.relu(self.conv2(x))
            x = x.permute(0, 2, 1)  # Restore dimensions

            # Apply LSTM
            h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
            c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
            lstm_out, _ = self.lstm(x, (h0, c0))

            # Apply dropout
            lstm_out = self.dropout(lstm_out)

            # Apply fully connected layer
            out = self.fc(lstm_out[:, -1, :])

            return out









    set_random_seeds(42)

    input_size, hidden_size, num_layers, learning_rate, window_size, dropout_prob, weight_decay, factor, patience_lr = hyperparams


    print(f"Hyperparameters: input_size={input_size}, hidden_size={hidden_size}, num_layers={num_layers}, learning_rate={learning_rate}, window_size={window_size}, dropout_prob={dropout_prob}, weight_decay={weight_decay}, factor={factor}, patience={patience}")

    # Initialize the model
    model = CNNLSTMModel(input_size, hidden_size, num_layers, learning_rate, window_size, dropout_prob=0.5)

    # Define the loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Initialize the scheduler after defining the optimizer
    scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)


    best_val_loss = float('inf')
    counter = 0


    # segment sequential data into smaller windows using a sliding window approach,
    # ensuring temporal coherence, and returns the windows as tensors for training sequential models.

    # segment sequential data into smaller windows using a sliding window approach,
    # ensuring temporal coherence, and returns the windows as tensors for training sequential models.
    def split_data_with_sliding_window(x_train_tensor, y_train_tensor, window_size):
        # Check if the lengths of x_train_tensor and y_train_tensor match
        if len(x_train_tensor) != len(y_train_tensor):
            raise ValueError("Lengths of x_train_tensor and y_train_tensor must match.")

        # Initialize lists to store sequential windows
        x_seq_list, y_seq_list = [], []

        # Iterate through the data with the specified window size
        for i in range(len(x_train_tensor) - window_size):
            # Extract a window of input features and target output
            x_window = x_train_tensor[i:i+window_size]
            y_window = y_train_tensor[i+window_size]  # Next entry as target output

            x_seq_list.append(x_window)
            y_seq_list.append(y_window)

        # Concatenate the lists into tensors
        x_seq = torch.stack(x_seq_list)
        y_seq = torch.stack(y_seq_list)

        return x_seq, y_seq



    # Training loop
    for epoch in range(num_epochs):
        # Clear gradients
        optimizer.zero_grad()

        # Split the training data into sliding windows
        x_train_seq, y_train_seq = split_data_with_sliding_window(x_train_tensor, y_train_tensor, window_size)

        # Forward pass
        outputs = model(x_train_seq)

        # Calculate loss
        loss = criterion(outputs, y_train_seq)

        # Backward pass and optimization
        loss.backward()

        optimizer.step()

        scheduler.step()



                # Monitor gradients
        if epoch % 100 == 0:
            ("Gradients:")    # print("Gradients:")
            for name, param in model.named_parameters():
                if param.grad is not None:
                    (f"{name}: {param.grad.norm().item()}") # print(f"{name}: {param.grad.norm().item()}")

                # Monitor Learning Rate

        current_lr = optimizer.param_groups[0]['lr']
        # print(f"Epoch {epoch+1}/{num_epochs}, Learning Rate: {current_lr}")



        # Validate the model
        with torch.no_grad():
            # Split validation data into sliding windows
            x_val_seq, y_val_seq = split_data_with_sliding_window(x_val_tensor, y_val_tensor, window_size)

            # Forward pass for validation
            val_outputs = model(x_val_seq)

            # Calculate validation loss
            val_loss = criterion(val_outputs, y_val_seq)

            #Update the scheduler within the training loop after calculating the validation loss:
            scheduler.step(val_loss)

            # Check for early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                counter = 0
            else:
                counter += 1
                if counter >= patience:
                    print(f"Early stopping at epoch {epoch} with validation loss {val_loss}.")
                    break

        # Print progress
        if epoch % 100 == 0:
            print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {loss.item():.0f}, Val Loss: {val_loss.item():.0f},  Lear. Rate: {current_lr:.5f}, Train Grad.: {param.grad.norm().item():.1f}")

    # After the training loop, you can evaluate the model on the test data
    # Split test data into sliding windows
    x_test_seq, y_test_seq = split_data_with_sliding_window(x_test_tensor, y_test_tensor, window_size)

    # Evaluate the model
    with torch.no_grad():
        test_outputs = model(x_test_seq)
        test_loss = criterion(test_outputs, y_test_seq)
        print(f"Test Loss: {test_loss.item()}")




Hyperparameters: input_size=7, hidden_size=5, num_layers=2, learning_rate=0.0005, window_size=20, dropout_prob=0, weight_decay=0, factor=0.5, patience=10
Epoch 1/150000, Train Loss: 29999, Val Loss: 30497,  Lear. Rate: 0.00050, Train Grad.: 224.4
Epoch 101/150000, Train Loss: 29838, Val Loss: 30335,  Lear. Rate: 0.00045, Train Grad.: 222.9
Epoch 201/150000, Train Loss: 29616, Val Loss: 30111,  Lear. Rate: 0.00045, Train Grad.: 221.0
Early stopping at epoch 292 with validation loss 29961.986328125.
Test Loss: 29711.775390625
