In [1]:
import os
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, root_mean_squared_error

logical_cores = os.cpu_count()
print(f"Number of logical CPU cores: {logical_cores}")

num_workers = max(1, logical_cores // 2)
print(f"Number of workers set to: {num_workers}")

def is_gpu_available():
    try:
        return torch.cuda.is_available()
    except ImportError:
        return False

gpu_available = is_gpu_available()
print(f"GPU available: {gpu_available}")

Number of logical CPU cores: 16
Number of workers set to: 8
GPU available: True


In [2]:
def load_or_create_ticker_df(csv_file_path):
    """
    Load the existing ticker DataFrame from a CSV file if it exists,
    otherwise create a new DataFrame with predefined column types.
    Ensure the DataFrame has the specified columns, add any missing columns,
    and rearrange the columns in alphabetical order, excluding 'Ticker_Symbol'.

    Args:
    csv_file_path (str): The path to the CSV file.

    Returns:
    pd.DataFrame: The loaded or newly created DataFrame.
    """
    # Define the column types
    column_types = {
        "Ticker_Symbol": str,
        "Best_Cov1D_Classification_Accuracy": float,
        "Best_Cov1D_Classification_Path": str,
        "Best_Cov1D_Regression_RMSE": float,
        "Best_Cov1D_Regression_Path": str,
        "Best_LSTM_Classification_Accuracy": float,
        "Best_LSTM_Classification_Path": str,
        "Best_LSTM_Regression_RMSE": float,
        "Best_LSTM_Regression_Path": str,
        "Best_Transformer_Classification_Accuracy": float,
        "Best_TransformerClassification_Path": str,
        "Best_Transformer_Regression_RMSE": float,
        "Best_Transformer_Regression_Path": str,
        "Best_XGBClassifier_Classification_Accuracy": float,
        "Best_XGBClassifier_Classification_Path": str,
        "Best_XGBRegressor_Regression_RMSE": float,
        "Best_XGBRegressor_Regression_Path": str
    }


    if os.path.isfile(csv_file_path):
        # Load the existing file into a DataFrame
        ticker_df = pd.read_csv(csv_file_path)
        
        # Ensure all specified columns are present
        for column, dtype in column_types.items():
            if column not in ticker_df.columns:
                ticker_df[column] = pd.Series(dtype=dtype)
        
        # Reorder columns alphabetically, excluding 'Ticker_Symbol'
        columns = ["Ticker_Symbol"] + sorted([col for col in ticker_df.columns if col != "Ticker_Symbol"])
        ticker_df = ticker_df[columns]
    else:
        # Create a new DataFrame with the specified column types
        ticker_df = pd.DataFrame(columns=column_types.keys()).astype(column_types)
    
    return ticker_df

csv_file_path = "../ticker-best-model.csv"
ticker_df = load_or_create_ticker_df(csv_file_path)

In [3]:
os.makedirs('../models/pytorch/conv1d-classification/', exist_ok=True)
os.makedirs('../models/pytorch/conv1d-regression/', exist_ok=True)
os.makedirs('../models/pytorch/lstm-classification/', exist_ok=True)
os.makedirs('../models/pytorch/lstm-regression/', exist_ok=True)
os.makedirs('../models/pytorch/transformer-classification/', exist_ok=True)
os.makedirs('../models/pytorch/transformer-regression/', exist_ok=True)


path = '../data/train'
ticker_list = []

if os.path.exists(path):
    ticker_list = [os.path.splitext(f)[0] for f in os.listdir(path) if f.endswith('.csv')]


In [4]:
def preprocess_data(df):
    if df.isna().sum().sum() > 0 or df.isin([float('inf'), float('-inf')]).sum().sum() > 0:
        df = df.replace([float('inf'), float('-inf')], float('nan')).dropna()

    df = df.dropna()

    columns_to_drop = [
        'NEXT_DAY_CLOSEPRICE', 'DAILY_CLOSEPRICE_CHANGE', 'CLOSEPRICE_DIRECTION',
        'DAILY_MIDPRICE', 'NEXT_DAY_MIDPRICE', 'DAILY_MIDPRICE_CHANGE', 'MIDPRICE_DIRECTION', 'Date'
    ]
    X = df.drop(columns=columns_to_drop)
    y_classifier = (df['DAILY_CLOSEPRICE_CHANGE'] > 0).astype(int)
    y_regressor = df['DAILY_CLOSEPRICE_CHANGE']

    return X, y_classifier, y_regressor

In [5]:
class Conv1ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, l2_lambda=0.01, dropout_rate=0.5):
        super(Conv1ResidualBlock, self).__init__()
        
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding='same')
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, stride=1, padding='same')
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(dropout_rate)
        
        nn.init.kaiming_normal_(self.conv1.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.conv2.weight, nonlinearity='relu')
        nn.init.zeros_(self.conv1.bias)
        nn.init.zeros_(self.conv2.bias)
        
        self.l2_lambda = l2_lambda

        if in_channels != out_channels:
            self.residual_conv = nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=1)
        else:
            self.residual_conv = nn.Identity()

    def forward(self, x):
        residual = self.residual_conv(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += residual
        out = self.relu(out)
        
        return out

class Conv1DModel(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, num_blocks=1, l2_lambda=0.01, dropout_rate=0.5, classification=True):
        super(Conv1DModel, self).__init__()
        self.blocks = nn.Sequential(
            Conv1ResidualBlock(in_channels, out_channels, kernel_size, l2_lambda=l2_lambda, dropout_rate=dropout_rate),
            *[Conv1ResidualBlock(out_channels, out_channels, kernel_size, l2_lambda=l2_lambda, dropout_rate=dropout_rate) for _ in range(num_blocks-1)]
        )
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)  # Global average pooling for 1D
        self.fc = nn.Linear(out_channels, 2 if classification else 1)
        self.classification = classification

    def forward(self, x):
        out = self.blocks(x)
        out = self.global_avg_pool(out)
        out = out.view(out.size(0), -1)  # Flatten the tensor
        out = self.fc(out)
        if self.classification:
            out = F.log_softmax(out, dim=1)
        return out

In [6]:
def conv1d_classification_hyperparameters_search(X, y, gpu_available, ticker, ticker_df, csv_file_path):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy()
    
    # Reshape X for Conv1D
    NUM_CHANNELS = 1
    X = X.reshape((X.shape[0], NUM_CHANNELS, -1))  # Reshape for Conv1D: (batch_size, num_channels, sequence_length)
    
    # Split data into training and validation sets
    TEST_SIZE = 0.2
    RANDOM_STATE = 42
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)
    
    def conv1d_objective(trial):
        in_channels = X_train.shape[1]
        out_channels = trial.suggest_int('out_channels', 16, 128)
        kernel_size = trial.suggest_int('kernel_size', 3, 7)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        l2_lambda = trial.suggest_float('l2_lambda', 1e-5, 1e-2)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = Conv1DModel(in_channels, out_channels, kernel_size, num_blocks, l2_lambda, dropout_rate, classification=True).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2), weight_decay=l2_lambda)
        criterion = nn.CrossEntropyLoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.long).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.long).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_accuracy = accuracy_score(target_val.cpu(), val_pred.cpu())
            return val_accuracy  # Return accuracy directly
    
    study = optuna.create_study(direction='maximize')  # Change direction to 'maximize'
    study.optimize(conv1d_objective, n_trials=100)
    
    best_model = Conv1DModel(X.shape[1], study.best_params['out_channels'], study.best_params['kernel_size'], study.best_params['num_blocks'], study.best_params['l2_lambda'], study.best_params['dropout_rate'], classification=True).to(device)
    model_path = f'../models/pytorch/conv1d-classification/{ticker}.pth'
    
    # Update ticker_df and save the best model
    metric_col = 'Best_Cov1D_Classification_Accuracy'
    path_col = 'Best_Cov1D_Classification_Path'
    
    if ticker in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value > current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_file_path, index=False)
            print(f"Best model for {ticker} saved with accuracy: {study.best_value}")
        else:
            print(f"Previous model accuracy: {current_score} is better for {ticker} than accuracy: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_file_path, index=False)
        print(f"Best model for {ticker} saved with accuracy: {study.best_value}")
    
    return ticker_df

In [7]:
def conv1d_regression_hyperparameters_search(X, y, gpu_available, ticker, ticker_df, csv_file_path):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy().reshape(-1, 1)
    
    # Reshape X for Conv1D
    NUM_CHANNELS = 1
    X = X.reshape((X.shape[0], NUM_CHANNELS, -1))  # Reshape for Conv1D: (batch_size, num_channels, sequence_length)
    
    # Split data into training and validation sets
    TEST_SIZE = 0.2
    RANDOM_STATE = 42
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)
    
    def conv1d_objective(trial):
        in_channels = X_train.shape[1]
        out_channels = trial.suggest_int('out_channels', 16, 128)
        kernel_size = trial.suggest_int('kernel_size', 3, 7)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        l2_lambda = trial.suggest_float('l2_lambda', 1e-5, 1e-2)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = Conv1DModel(in_channels, out_channels, kernel_size, num_blocks, l2_lambda, dropout_rate, classification=False).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2), weight_decay=l2_lambda)
        criterion = nn.MSELoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.float32).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.float32).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_rmse = root_mean_squared_error(target_val.cpu(), val_pred.cpu())
            return val_rmse
    
    study = optuna.create_study(direction='minimize')
    study.optimize(conv1d_objective, n_trials=100)
    
    best_model = Conv1DModel(X.shape[1], study.best_params['out_channels'], study.best_params['kernel_size'], study.best_params['num_blocks'], study.best_params['l2_lambda'], study.best_params['dropout_rate'], classification=False).to(device)
    model_path = f'../models/pytorch/conv1d-regression/{ticker}.pth'
    
    # Update ticker_df and save the best model
    metric_col = 'Best_Cov1D_Regression_RMSE'
    path_col = 'Best_Cov1D_Regression_Path'
    
    if ticker in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value < current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_file_path, index=False)
            print(f"Best model for {ticker} saved with RMSE: {study.best_value}")
        else:
            print(f"Previous model RMSE: {current_score} is better for {ticker} than RMSE: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_file_path, index=False)
        print(f"Best model for {ticker} saved with RMSE: {study.best_value}")
    
    return ticker_df

In [8]:
class LSTMResidualBlock(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1, dropout_rate=0.5):
        super(LSTMResidualBlock, self).__init__()
        
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)
        self.dropout = nn.Dropout(dropout_rate)
        
        if input_size != hidden_size:
            self.residual_fc = nn.Linear(input_size, hidden_size)
        else:
            self.residual_fc = nn.Identity()

    def forward(self, x):
        residual = self.residual_fc(x)
        out, _ = self.lstm(x)
        out = self.dropout(out)
        out += residual
        return out

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_blocks=1, num_layers=1, dropout_rate=0.5, classification=True):
        super(LSTMModel, self).__init__()
        self.blocks = nn.Sequential(
            LSTMResidualBlock(input_size, hidden_size, num_layers=num_layers, dropout_rate=dropout_rate),
            *[LSTMResidualBlock(hidden_size, hidden_size, num_layers=num_layers, dropout_rate=dropout_rate) for _ in range(num_blocks-1)]
        )
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)  # Global average pooling for 1D
        self.fc = nn.Linear(hidden_size, 2 if classification else 1)
        self.classification = classification

    def forward(self, x):
        out = self.blocks(x)
        out = out.mean(dim=1)  # Global average pooling
        out = self.fc(out)
        if self.classification:
            out = F.log_softmax(out, dim=1)
        return out

In [9]:
def lstm_classification_hyperparameters_search(X, y, gpu_available, ticker, ticker_df, csv_file_path):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy()
    
    def create_sequences(X, y, sequence_length):
        sequences_X, sequences_y = [], []
        for i in range(len(X) - sequence_length + 1):
            sequences_X.append(X[i:i + sequence_length])
            sequences_y.append(y[i + sequence_length - 1])
        return np.array(sequences_X), np.array(sequences_y)
    
    def lstm_objective(trial):
        sequence_length = trial.suggest_categorical('sequence_length', [5, 7, 14, 20, 30])
        
        # Create sequences
        X_seq, y_seq = create_sequences(X, y, sequence_length)
        
        # Split data into training, validation, and test sets
        TEST_SIZE = 0.2
        VAL_SIZE = 0.1
        RANDOM_STATE = 42
        
        X_train, X_temp, y_train, y_temp = train_test_split(X_seq, y_seq, test_size=TEST_SIZE + VAL_SIZE, random_state=RANDOM_STATE)
        X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=TEST_SIZE / (TEST_SIZE + VAL_SIZE), random_state=RANDOM_STATE)
        
        input_size = X_train.shape[2]  # Number of features
        hidden_size = trial.suggest_int('hidden_size', 16, 128)
        num_layers = trial.suggest_int('num_layers', 1, 3)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = LSTMModel(input_size, hidden_size, num_blocks, num_layers, dropout_rate, classification=True).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2))
        criterion = nn.CrossEntropyLoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.long).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.long).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_accuracy = accuracy_score(target_val.cpu(), val_pred.cpu())
            return val_accuracy  # Return accuracy directly
    
    study = optuna.create_study(direction='maximize')
    study.optimize(lstm_objective, n_trials=100)
    
    # Reshape X to match the best sequence length
    best_sequence_length = study.best_params['sequence_length']
    X_seq, y_seq = create_sequences(X, y, best_sequence_length)
    
    best_model = LSTMModel(X_seq.shape[2], study.best_params['hidden_size'], study.best_params['num_blocks'], study.best_params['num_layers'], study.best_params['dropout_rate'], classification=True).to(device)
    model_path = f'../models/pytorch/lstm-classification/{ticker}.pth'
    
    # Update ticker_df and save the best model
    metric_col = 'Best_LSTM_Classification_Accuracy'
    path_col = 'Best_LSTM_Classification_Path'
    
    if ticker in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value > current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_file_path, index=False)
            print(f"Best model for {ticker} saved with accuracy: {study.best_value}")
        else:
            print(f"Previous model accuracy: {current_score} is better for {ticker} than accuracy: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_file_path, index=False)
        print(f"Best model for {ticker} saved with accuracy: {study.best_value}")
        
    return ticker_df

In [10]:
def lstm_regression_hyperparameters_search(X, y, gpu_available, ticker, ticker_df, csv_file_path):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy().reshape(-1, 1)
    
    def create_sequences(X, y, sequence_length):
        sequences_X, sequences_y = [], []
        for i in range(len(X) - sequence_length + 1):
            sequences_X.append(X[i:i + sequence_length])
            sequences_y.append(y[i + sequence_length - 1])
        return np.array(sequences_X), np.array(sequences_y)
    
    def lstm_objective(trial):
        sequence_length = trial.suggest_categorical('sequence_length', [5, 7, 14, 20, 30])
        
        # Create sequences
        X_seq, y_seq = create_sequences(X, y, sequence_length)
        
        # Split data into training, validation, and test sets
        TEST_SIZE = 0.2
        VAL_SIZE = 0.1
        RANDOM_STATE = 42
        
        X_train, X_temp, y_train, y_temp = train_test_split(X_seq, y_seq, test_size=TEST_SIZE + VAL_SIZE, random_state=RANDOM_STATE)
        X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=TEST_SIZE / (TEST_SIZE + VAL_SIZE), random_state=RANDOM_STATE)
        
        input_size = X_train.shape[2]  # Number of features
        hidden_size = trial.suggest_int('hidden_size', 16, 128)
        num_layers = trial.suggest_int('num_layers', 1, 3)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = LSTMModel(input_size, hidden_size, num_blocks, num_layers, dropout_rate, classification=False).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2))
        criterion = nn.MSELoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.float32).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.float32).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_rmse = root_mean_squared_error(target_val.cpu(), val_pred.cpu())
            return val_rmse
    
    study = optuna.create_study(direction='minimize')
    study.optimize(lstm_objective, n_trials=100)
    
    # Reshape X to match the best sequence length
    best_sequence_length = study.best_params['sequence_length']
    X_seq, y_seq = create_sequences(X, y, best_sequence_length)
    
    best_model = LSTMModel(X_seq.shape[2], study.best_params['hidden_size'], study.best_params['num_blocks'], study.best_params['num_layers'], study.best_params['dropout_rate'], classification=False).to(device)
    model_path = f'../models/pytorch/lstm-regression/{ticker}.pth'
    
    # Update ticker_df and save the best model
    metric_col = 'Best_LSTM_Regression_RMSE'
    path_col = 'Best_LSTM_Regression_Path'
    
    if ticker in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value < current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_file_path, index=False)
            print(f"Best model for {ticker} saved with RMSE: {study.best_value}")
        else:
            print(f"Previous model RMSE: {current_score} is better for {ticker} than RMSE: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_file_path, index=False)
        print(f"Best model for {ticker} saved with RMSE: {study.best_value}")
        
    return ticker_df

In [11]:
class Transformer(nn.Module):
    def __init__(self, input_dim, num_heads, num_layers, num_classes=1, classification=True):
        super(Transformer, self).__init__()
        encoder_layer = nn.TransformerEncoderLayer(d_model=input_dim, nhead=num_heads, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(input_dim, num_classes)
        self.classification = classification

    def forward(self, x):
        out = self.transformer_encoder(x)
        out = out.mean(dim=1)  # Global average pooling
        out = self.fc(out)
        if self.classification:
            out = torch.softmax(out, dim=1)  # Use softmax for classification
        return out

In [12]:
def process_transformer_classification(X, y, gpu_available):
    device = torch.device("cuda" if gpu_available and torch.cuda.is_available() else "cpu")
    
    # Convert DataFrame to tensors
    X_tensor = torch.tensor(X.values, dtype=torch.float32)
    y_tensor = torch.tensor(y.values, dtype=torch.long)
    
    # Split data into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)
    
    # Create DataLoader
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_val, y_val)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    
    def objective(trial):
        input_dim = X.shape[1]
        num_heads = trial.suggest_int('num_heads', 2, 8)
        num_layers = trial.suggest_int('num_layers', 1, 4)
        lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
        
        # Ensure input_dim is divisible by num_heads
        while input_dim % num_heads != 0:
            num_heads = trial.suggest_int('num_heads', 2, 8)
        
        model = Transformer(input_dim=input_dim, num_heads=num_heads, num_layers=num_layers, num_classes=len(y.unique()), classification=True).to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=lr)
        
        # Training loop
        model.train()
        for epoch in range(10):  # Number of epochs can be a hyperparameter too
            for batch_X, batch_y in train_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
        
        # Validation loop
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                _, predicted = torch.max(outputs.data, 1)
                total += batch_y.size(0)
                correct += (predicted == batch_y).sum().item()
        
        accuracy = correct / total

        # Print trial information
        print(f"Trial {trial.number}: Value: {accuracy}, Params: {trial.params}")
        return accuracy
    
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=50)
    
    print("Best hyperparameters: ", study.best_params)
    print("Best accuracy: ", study.best_value)

In [13]:
for ticker in ticker_list:
    dataframe = pd.read_csv(f"../data/train/{ticker}.csv")
    X, y_classifier, y_regressor = preprocess_data(dataframe)
    ticker_df = conv1d_classification_hyperparameters_search(X, y_classifier, gpu_available, ticker, ticker_df, csv_file_path)
    ticker_df = conv1d_regression_hyperparameters_search(X, y_regressor, gpu_available, ticker, ticker_df, csv_file_path)
    ticker_df = lstm_classification_hyperparameters_search(X, y_classifier, gpu_available, ticker, ticker_df, csv_file_path)
    ticker_df = lstm_regression_hyperparameters_search(X, y_regressor, gpu_available, ticker, ticker_df, csv_file_path)
    #process_transformer_classification(X, y_classifier, gpu_available)
    #break

[I 2024-09-08 09:05:25,428] A new study created in memory with name: no-name-9fc83e4d-0084-4174-9bee-67c3b7a7d41d
[I 2024-09-08 09:05:27,979] Trial 0 finished with value: 0.4647887323943662 and parameters: {'out_channels': 31, 'kernel_size': 3, 'num_blocks': 3, 'l2_lambda': 0.005878282458810926, 'dropout_rate': 0.477196257133226, 'lr': 0.000448312406917989}. Best is trial 0 with value: 0.4647887323943662.
  return F.conv1d(input, weight, bias, self.stride,
[I 2024-09-08 09:05:28,708] Trial 1 finished with value: 0.49295774647887325 and parameters: {'out_channels': 50, 'kernel_size': 4, 'num_blocks': 5, 'l2_lambda': 0.00022854338185612628, 'dropout_rate': 0.2316638538035515, 'lr': 0.006897837910278271}. Best is trial 1 with value: 0.49295774647887325.
[I 2024-09-08 09:05:28,962] Trial 2 finished with value: 0.5070422535211268 and parameters: {'out_channels': 117, 'kernel_size': 7, 'num_blocks': 1, 'l2_lambda': 0.0037102374780775267, 'dropout_rate': 0.4398802780083345, 'lr': 0.0085743908

Best model for CL=F saved with accuracy: 0.5492957746478874


[I 2024-09-08 09:06:08,065] Trial 1 finished with value: 1.7351794372546436 and parameters: {'out_channels': 58, 'kernel_size': 4, 'num_blocks': 1, 'l2_lambda': 0.0037572819685197747, 'dropout_rate': 0.17329723403440933, 'lr': 0.004821046621384645}. Best is trial 0 with value: 1.7351794372546436.
[I 2024-09-08 09:06:08,344] Trial 2 finished with value: 1.7351794372546436 and parameters: {'out_channels': 22, 'kernel_size': 5, 'num_blocks': 3, 'l2_lambda': 0.007148292524700958, 'dropout_rate': 0.3220500080682412, 'lr': 0.002848532490712399}. Best is trial 0 with value: 1.7351794372546436.
[I 2024-09-08 09:06:08,500] Trial 3 finished with value: 1.7351794372546436 and parameters: {'out_channels': 79, 'kernel_size': 3, 'num_blocks': 1, 'l2_lambda': 0.0010195401815046455, 'dropout_rate': 0.27545090045764187, 'lr': 0.0038039641837647694}. Best is trial 0 with value: 1.7351794372546436.
[I 2024-09-08 09:06:08,730] Trial 4 finished with value: 1.7351794372546436 and parameters: {'out_channels'

Best model for CL=F saved with RMSE: 1.7351794372546436


[I 2024-09-08 09:06:55,010] Trial 0 finished with value: 0.4117647058823529 and parameters: {'sequence_length': 14, 'hidden_size': 120, 'num_layers': 3, 'num_blocks': 3, 'dropout_rate': 0.19341069917363474, 'lr': 0.0019235599357162615}. Best is trial 0 with value: 0.4117647058823529.
[I 2024-09-08 09:06:55,463] Trial 1 finished with value: 0.5428571428571428 and parameters: {'sequence_length': 5, 'hidden_size': 75, 'num_layers': 3, 'num_blocks': 4, 'dropout_rate': 0.3075865523758001, 'lr': 0.00887102641653462}. Best is trial 1 with value: 0.5428571428571428.
[I 2024-09-08 09:06:55,772] Trial 2 finished with value: 0.5 and parameters: {'sequence_length': 30, 'hidden_size': 23, 'num_layers': 3, 'num_blocks': 2, 'dropout_rate': 0.41820098689762664, 'lr': 0.0029100992753057077}. Best is trial 1 with value: 0.5428571428571428.
[I 2024-09-08 09:06:56,116] Trial 3 finished with value: 0.42857142857142855 and parameters: {'sequence_length': 5, 'hidden_size': 56, 'num_layers': 1, 'num_blocks': 

Best model for CL=F saved with accuracy: 0.6857142857142857


[I 2024-09-08 09:07:32,576] Trial 0 finished with value: 1.4950957599656645 and parameters: {'sequence_length': 20, 'hidden_size': 49, 'num_layers': 3, 'num_blocks': 5, 'dropout_rate': 0.4682404081731122, 'lr': 0.006165889735673943}. Best is trial 0 with value: 1.4950957599656645.
[I 2024-09-08 09:07:32,982] Trial 1 finished with value: 1.4897425962016029 and parameters: {'sequence_length': 7, 'hidden_size': 59, 'num_layers': 2, 'num_blocks': 4, 'dropout_rate': 0.2765101094367679, 'lr': 0.002592533062139507}. Best is trial 1 with value: 1.4897425962016029.
[I 2024-09-08 09:07:33,192] Trial 2 finished with value: 1.131356216342103 and parameters: {'sequence_length': 14, 'hidden_size': 45, 'num_layers': 3, 'num_blocks': 1, 'dropout_rate': 0.44819555310937786, 'lr': 0.0024548485378424172}. Best is trial 2 with value: 1.131356216342103.
[I 2024-09-08 09:07:33,693] Trial 3 finished with value: 1.6608859950266313 and parameters: {'sequence_length': 5, 'hidden_size': 61, 'num_layers': 2, 'num

Best model for CL=F saved with RMSE: 1.131356216342103


[I 2024-09-08 09:08:05,918] Trial 0 finished with value: 0.4507042253521127 and parameters: {'out_channels': 98, 'kernel_size': 7, 'num_blocks': 5, 'l2_lambda': 0.0004923745061358378, 'dropout_rate': 0.36635296225359393, 'lr': 0.00041875488495562877}. Best is trial 0 with value: 0.4507042253521127.
[I 2024-09-08 09:08:06,427] Trial 1 finished with value: 0.5492957746478874 and parameters: {'out_channels': 127, 'kernel_size': 4, 'num_blocks': 2, 'l2_lambda': 0.0017785790091365255, 'dropout_rate': 0.2674984862435076, 'lr': 0.006376259381408898}. Best is trial 1 with value: 0.5492957746478874.
[I 2024-09-08 09:08:06,928] Trial 2 finished with value: 0.5492957746478874 and parameters: {'out_channels': 115, 'kernel_size': 7, 'num_blocks': 2, 'l2_lambda': 0.002622676930585504, 'dropout_rate': 0.36481401821601156, 'lr': 0.0056121396367368875}. Best is trial 1 with value: 0.5492957746478874.
[I 2024-09-08 09:08:07,075] Trial 3 finished with value: 0.5492957746478874 and parameters: {'out_chann

Best model for NVDA saved with accuracy: 0.5492957746478874


[I 2024-09-08 09:08:54,611] Trial 0 finished with value: 1.5966128023133297 and parameters: {'out_channels': 58, 'kernel_size': 7, 'num_blocks': 2, 'l2_lambda': 0.006430708899768613, 'dropout_rate': 0.1244581343445081, 'lr': 0.009505228890606648}. Best is trial 0 with value: 1.5966128023133297.
[I 2024-09-08 09:08:55,024] Trial 1 finished with value: 1.5966128023133297 and parameters: {'out_channels': 98, 'kernel_size': 4, 'num_blocks': 2, 'l2_lambda': 0.002107355609998692, 'dropout_rate': 0.1446434260502099, 'lr': 0.009094508925517766}. Best is trial 0 with value: 1.5966128023133297.
[I 2024-09-08 09:08:55,891] Trial 2 finished with value: 1.5966128023133297 and parameters: {'out_channels': 77, 'kernel_size': 3, 'num_blocks': 5, 'l2_lambda': 0.003657960303297945, 'dropout_rate': 0.4938071688826685, 'lr': 0.00010060625880050687}. Best is trial 0 with value: 1.5966128023133297.
[I 2024-09-08 09:08:56,130] Trial 3 finished with value: 1.5966128023133297 and parameters: {'out_channels': 1

Best model for NVDA saved with RMSE: 1.5966128023133297


[I 2024-09-08 09:09:49,443] Trial 0 finished with value: 0.47058823529411764 and parameters: {'sequence_length': 14, 'hidden_size': 127, 'num_layers': 1, 'num_blocks': 4, 'dropout_rate': 0.2942486200841321, 'lr': 0.004278302624611784}. Best is trial 0 with value: 0.47058823529411764.
[I 2024-09-08 09:09:49,842] Trial 1 finished with value: 0.47058823529411764 and parameters: {'sequence_length': 14, 'hidden_size': 72, 'num_layers': 2, 'num_blocks': 4, 'dropout_rate': 0.4929807173537588, 'lr': 0.004066791279887552}. Best is trial 0 with value: 0.47058823529411764.
[I 2024-09-08 09:09:50,145] Trial 2 finished with value: 0.5294117647058824 and parameters: {'sequence_length': 14, 'hidden_size': 66, 'num_layers': 1, 'num_blocks': 4, 'dropout_rate': 0.33510797023307115, 'lr': 0.002294406496804298}. Best is trial 2 with value: 0.5294117647058824.
[I 2024-09-08 09:09:50,996] Trial 3 finished with value: 0.47058823529411764 and parameters: {'sequence_length': 14, 'hidden_size': 103, 'num_layers

Best model for NVDA saved with accuracy: 0.6857142857142857


[I 2024-09-08 09:10:23,774] Trial 1 finished with value: 2.475960080743807 and parameters: {'sequence_length': 14, 'hidden_size': 126, 'num_layers': 2, 'num_blocks': 1, 'dropout_rate': 0.3118849949968918, 'lr': 0.004963178608690177}. Best is trial 0 with value: 1.8155752703415142.
[I 2024-09-08 09:10:24,160] Trial 2 finished with value: 2.475960080743807 and parameters: {'sequence_length': 14, 'hidden_size': 117, 'num_layers': 1, 'num_blocks': 5, 'dropout_rate': 0.13765187011327196, 'lr': 0.008646532862807164}. Best is trial 0 with value: 1.8155752703415142.
[I 2024-09-08 09:10:24,484] Trial 3 finished with value: 1.8155752703415142 and parameters: {'sequence_length': 20, 'hidden_size': 65, 'num_layers': 3, 'num_blocks': 2, 'dropout_rate': 0.28906109607720115, 'lr': 0.007577526320251916}. Best is trial 0 with value: 1.8155752703415142.
[I 2024-09-08 09:10:24,674] Trial 4 finished with value: 1.0408448421175824 and parameters: {'sequence_length': 7, 'hidden_size': 75, 'num_layers': 2, '

Best model for NVDA saved with RMSE: 1.0408448421175824


[I 2024-09-08 09:10:58,246] Trial 0 finished with value: 0.47297297297297297 and parameters: {'out_channels': 94, 'kernel_size': 3, 'num_blocks': 1, 'l2_lambda': 0.007879386866542487, 'dropout_rate': 0.4561441155794924, 'lr': 0.007852674161875}. Best is trial 0 with value: 0.47297297297297297.
[I 2024-09-08 09:10:58,464] Trial 1 finished with value: 0.5135135135135135 and parameters: {'out_channels': 109, 'kernel_size': 7, 'num_blocks': 1, 'l2_lambda': 0.00015313110889491534, 'dropout_rate': 0.10639781597288356, 'lr': 0.0054453466136352565}. Best is trial 1 with value: 0.5135135135135135.
[I 2024-09-08 09:10:59,294] Trial 2 finished with value: 0.5135135135135135 and parameters: {'out_channels': 68, 'kernel_size': 6, 'num_blocks': 5, 'l2_lambda': 0.006799829441640593, 'dropout_rate': 0.1344752094131202, 'lr': 0.004507051709829072}. Best is trial 1 with value: 0.5135135135135135.
[I 2024-09-08 09:10:59,693] Trial 3 finished with value: 0.4864864864864865 and parameters: {'out_channels':

Best model for SGDUSD=X saved with accuracy: 0.6081081081081081


[I 2024-09-08 09:11:30,875] Trial 0 finished with value: 0.0018248550832419164 and parameters: {'out_channels': 21, 'kernel_size': 7, 'num_blocks': 2, 'l2_lambda': 0.003330348221730771, 'dropout_rate': 0.19926951745080446, 'lr': 0.009851111856173262}. Best is trial 0 with value: 0.0018248550832419164.
[I 2024-09-08 09:11:31,450] Trial 1 finished with value: 0.0018248550832419164 and parameters: {'out_channels': 90, 'kernel_size': 3, 'num_blocks': 3, 'l2_lambda': 0.009207144505495922, 'dropout_rate': 0.47988710094045983, 'lr': 0.005962446670046648}. Best is trial 0 with value: 0.0018248550832419164.
[I 2024-09-08 09:11:31,857] Trial 2 finished with value: 0.0018248550832419164 and parameters: {'out_channels': 61, 'kernel_size': 6, 'num_blocks': 3, 'l2_lambda': 0.007276736222964848, 'dropout_rate': 0.1835185077771938, 'lr': 0.008729442240789137}. Best is trial 0 with value: 0.0018248550832419164.
[I 2024-09-08 09:11:31,999] Trial 3 finished with value: 0.0018248550832419164 and parameter

Best model for SGDUSD=X saved with RMSE: 0.0018248550832419164


[I 2024-09-08 09:12:42,262] Trial 0 finished with value: 0.5277777777777778 and parameters: {'sequence_length': 7, 'hidden_size': 45, 'num_layers': 3, 'num_blocks': 3, 'dropout_rate': 0.45942812733833305, 'lr': 0.001648656752920405}. Best is trial 0 with value: 0.5277777777777778.
[I 2024-09-08 09:12:42,508] Trial 1 finished with value: 0.42857142857142855 and parameters: {'sequence_length': 20, 'hidden_size': 21, 'num_layers': 2, 'num_blocks': 2, 'dropout_rate': 0.2664697832076607, 'lr': 0.0053739787462351064}. Best is trial 0 with value: 0.5277777777777778.
[I 2024-09-08 09:12:42,706] Trial 2 finished with value: 0.5277777777777778 and parameters: {'sequence_length': 5, 'hidden_size': 99, 'num_layers': 3, 'num_blocks': 1, 'dropout_rate': 0.298964117909721, 'lr': 0.0005136066092684137}. Best is trial 0 with value: 0.5277777777777778.
[I 2024-09-08 09:12:42,902] Trial 3 finished with value: 0.4444444444444444 and parameters: {'sequence_length': 5, 'hidden_size': 28, 'num_layers': 3, 'n

Best model for SGDUSD=X saved with accuracy: 0.6857142857142857


[I 2024-09-08 09:13:58,533] Trial 0 finished with value: 0.0015725821722058916 and parameters: {'sequence_length': 7, 'hidden_size': 85, 'num_layers': 3, 'num_blocks': 3, 'dropout_rate': 0.38119870122478683, 'lr': 0.00439047525636902}. Best is trial 0 with value: 0.0015725821722058916.
[I 2024-09-08 09:13:59,255] Trial 1 finished with value: 0.0015725821722058916 and parameters: {'sequence_length': 7, 'hidden_size': 109, 'num_layers': 2, 'num_blocks': 5, 'dropout_rate': 0.3136657369083804, 'lr': 0.0020141609884482548}. Best is trial 0 with value: 0.0015725821722058916.
[I 2024-09-08 09:13:59,651] Trial 2 finished with value: 0.0018685091756578448 and parameters: {'sequence_length': 20, 'hidden_size': 60, 'num_layers': 3, 'num_blocks': 3, 'dropout_rate': 0.18928259296833994, 'lr': 0.006332016325420031}. Best is trial 0 with value: 0.0015725821722058916.
[I 2024-09-08 09:14:00,345] Trial 3 finished with value: 0.0018685091756578448 and parameters: {'sequence_length': 20, 'hidden_size': 5

Best model for SGDUSD=X saved with RMSE: 0.0015725821722058916


[I 2024-09-08 09:14:55,175] Trial 0 finished with value: 0.5135135135135135 and parameters: {'out_channels': 58, 'kernel_size': 7, 'num_blocks': 3, 'l2_lambda': 0.003323410712186488, 'dropout_rate': 0.25381695800915455, 'lr': 0.006180513718725547}. Best is trial 0 with value: 0.5135135135135135.
[I 2024-09-08 09:14:56,231] Trial 1 finished with value: 0.5135135135135135 and parameters: {'out_channels': 99, 'kernel_size': 6, 'num_blocks': 2, 'l2_lambda': 0.0045566504757776774, 'dropout_rate': 0.24133469391641618, 'lr': 0.006670622921265912}. Best is trial 0 with value: 0.5135135135135135.
[I 2024-09-08 09:14:58,478] Trial 2 finished with value: 0.5 and parameters: {'out_channels': 82, 'kernel_size': 5, 'num_blocks': 5, 'l2_lambda': 0.007894434062894331, 'dropout_rate': 0.21057365352049306, 'lr': 0.0031672331538254114}. Best is trial 0 with value: 0.5135135135135135.
[I 2024-09-08 09:14:58,830] Trial 3 finished with value: 0.4864864864864865 and parameters: {'out_channels': 17, 'kernel_s

Best model for USDSGD=X saved with accuracy: 0.5405405405405406


[I 2024-09-08 09:17:40,352] Trial 0 finished with value: 0.0032912780826539973 and parameters: {'out_channels': 71, 'kernel_size': 3, 'num_blocks': 1, 'l2_lambda': 0.005372110811114268, 'dropout_rate': 0.3406892999424117, 'lr': 0.00163788118288879}. Best is trial 0 with value: 0.0032912780826539973.
[I 2024-09-08 09:17:41,123] Trial 1 finished with value: 0.0032912780826539973 and parameters: {'out_channels': 88, 'kernel_size': 5, 'num_blocks': 2, 'l2_lambda': 0.006120678697732414, 'dropout_rate': 0.3966847775436936, 'lr': 0.004030226096190104}. Best is trial 0 with value: 0.0032912780826539973.
[I 2024-09-08 09:17:41,330] Trial 2 finished with value: 0.0032912780826539973 and parameters: {'out_channels': 22, 'kernel_size': 5, 'num_blocks': 2, 'l2_lambda': 0.0003584553791439997, 'dropout_rate': 0.11451541090710796, 'lr': 0.0035127615021591126}. Best is trial 0 with value: 0.0032912780826539973.
[I 2024-09-08 09:17:42,587] Trial 3 finished with value: 0.0032912780826539973 and parameter

Best model for USDSGD=X saved with RMSE: 0.0032912780826539973


[I 2024-09-08 09:19:16,291] Trial 1 finished with value: 0.5 and parameters: {'sequence_length': 5, 'hidden_size': 81, 'num_layers': 2, 'num_blocks': 3, 'dropout_rate': 0.11959974971763998, 'lr': 0.008742737836807332}. Best is trial 0 with value: 0.5.
[I 2024-09-08 09:19:16,988] Trial 2 finished with value: 0.5588235294117647 and parameters: {'sequence_length': 30, 'hidden_size': 106, 'num_layers': 2, 'num_blocks': 2, 'dropout_rate': 0.12806835492456525, 'lr': 0.003705276127706086}. Best is trial 2 with value: 0.5588235294117647.
[I 2024-09-08 09:19:17,666] Trial 3 finished with value: 0.5142857142857142 and parameters: {'sequence_length': 14, 'hidden_size': 77, 'num_layers': 3, 'num_blocks': 4, 'dropout_rate': 0.1419340550461016, 'lr': 0.0006623937603179766}. Best is trial 2 with value: 0.5588235294117647.
[I 2024-09-08 09:19:18,238] Trial 4 finished with value: 0.5 and parameters: {'sequence_length': 5, 'hidden_size': 41, 'num_layers': 3, 'num_blocks': 5, 'dropout_rate': 0.4303425687

Best model for USDSGD=X saved with accuracy: 0.6470588235294118


[I 2024-09-08 09:20:12,096] Trial 0 finished with value: 0.004185017372166191 and parameters: {'sequence_length': 14, 'hidden_size': 26, 'num_layers': 3, 'num_blocks': 2, 'dropout_rate': 0.31912891874757654, 'lr': 7.885293864408368e-05}. Best is trial 0 with value: 0.004185017372166191.
[I 2024-09-08 09:20:12,381] Trial 1 finished with value: 0.003387947906871176 and parameters: {'sequence_length': 20, 'hidden_size': 57, 'num_layers': 1, 'num_blocks': 3, 'dropout_rate': 0.3171228964452445, 'lr': 0.002204860074797264}. Best is trial 1 with value: 0.003387947906871176.
[I 2024-09-08 09:20:13,023] Trial 2 finished with value: 0.0036408197193276803 and parameters: {'sequence_length': 5, 'hidden_size': 121, 'num_layers': 2, 'num_blocks': 5, 'dropout_rate': 0.18701922880888208, 'lr': 0.006412754260363796}. Best is trial 1 with value: 0.003387947906871176.
[I 2024-09-08 09:20:13,207] Trial 3 finished with value: 0.0036408197193276803 and parameters: {'sequence_length': 5, 'hidden_size': 117, 

Best model for USDSGD=X saved with RMSE: 0.0028111993613357793
