In [1]:
import os
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, root_mean_squared_error

logical_cores = os.cpu_count()
print(f"Number of logical CPU cores: {logical_cores}")

num_workers = max(1, logical_cores // 2)
print(f"Number of workers set to: {num_workers}")

def is_gpu_available():
    try:
        return torch.cuda.is_available()
    except ImportError:
        return False

gpu_available = is_gpu_available()
print(f"GPU available: {gpu_available}")

Number of logical CPU cores: 16
Number of workers set to: 8
GPU available: True


In [2]:
def load_or_create_ticker_df(csv_file_path):
    """
    Load the existing ticker DataFrame from a CSV file if it exists,
    otherwise create a new DataFrame with predefined column types.
    Ensure the DataFrame has the specified columns, add any missing columns,
    and rearrange the columns in alphabetical order, excluding 'Ticker_Symbol'.

    Args:
    csv_file_path (str): The path to the CSV file.

    Returns:
    pd.DataFrame: The loaded or newly created DataFrame.
    """
    # Define the column types
    column_types = {
        "Ticker_Symbol": str,
        "Best_Cov1D_Classification_Accuracy": float,
        "Best_Cov1D_Classification_Path": str,
        "Best_Cov1D_Regression_RMSE": float,
        "Best_Cov1D_Regression_Path": str,
        "Best_Hybrid_Cov1D_LSTM_Classification_Accuracy": float,
        "Best_Hybrid_Cov1D_LSTM_Classification_Path": str,
        "Best_Hybrid_Cov1D_LSTM_Regression_RMSE": float,
        "Best_Hybrid_Cov1D_LSTM_Regression_Path": str,
        "Best_Hybrid_Cov1D_Transformer_Classification_Accuracy": float,
        "Best_Hybrid_Cov1D_Transformer_Classification_Path": str,
        "Best_Hybrid_Cov1D_Transformer_Regression_RMSE": float,
        "Best_Hybrid_Cov1D_Transformer_Regression_Path": str,
        "Best_LSTM_Classification_Accuracy": float,
        "Best_LSTM_Classification_Path": str,
        "Best_LSTM_Regression_RMSE": float,
        "Best_LSTM_Regression_Path": str,
        "Best_Transformer_Classification_Accuracy": float,
        "Best_TransformerClassification_Path": str,
        "Best_Transformer_Regression_RMSE": float,
        "Best_Transformer_Regression_Path": str,
        "Best_XGBClassifier_Classification_Accuracy": float,
        "Best_XGBClassifier_Classification_Path": str,
        "Best_XGBRegressor_Regression_RMSE": float,
        "Best_XGBRegressor_Regression_Path": str
    }


    if os.path.isfile(csv_file_path):
        # Load the existing file into a DataFrame
        ticker_df = pd.read_csv(csv_file_path)
        
        # Ensure all specified columns are present
        for column, dtype in column_types.items():
            if column not in ticker_df.columns:
                ticker_df[column] = pd.Series(dtype=dtype)
        
        # Reorder columns alphabetically, excluding 'Ticker_Symbol'
        columns = ["Ticker_Symbol"] + sorted([col for col in ticker_df.columns if col != "Ticker_Symbol"])
        ticker_df = ticker_df[columns]
    else:
        # Create a new DataFrame with the specified column types
        ticker_df = pd.DataFrame(columns=column_types.keys()).astype(column_types)
    
    return ticker_df

csv_file_path = "../ticker-best-model.csv"
ticker_df = load_or_create_ticker_df(csv_file_path)

In [3]:
os.makedirs('../models/pytorch/conv1d-classification/', exist_ok=True)
os.makedirs('../models/pytorch/conv1d-regression/', exist_ok=True)
os.makedirs('../models/pytorch/lstm-classification/', exist_ok=True)
os.makedirs('../models/pytorch/lstm-regression/', exist_ok=True)
os.makedirs('../models/pytorch/transformer-classification/', exist_ok=True)
os.makedirs('../models/pytorch/transformer-regression/', exist_ok=True)


path = '../data/train'
ticker_list = []

if os.path.exists(path):
    ticker_list = [os.path.splitext(f)[0] for f in os.listdir(path) if f.endswith('.csv')]


In [4]:
def preprocess_data(df):
    if df.isna().sum().sum() > 0 or df.isin([float('inf'), float('-inf')]).sum().sum() > 0:
        df = df.replace([float('inf'), float('-inf')], float('nan')).dropna()

    df = df.dropna()

    columns_to_drop = [
        'NEXT_DAY_CLOSEPRICE', 'DAILY_CLOSEPRICE_CHANGE', 'CLOSEPRICE_DIRECTION',
        'DAILY_MIDPRICE', 'NEXT_DAY_MIDPRICE', 'DAILY_MIDPRICE_CHANGE', 'MIDPRICE_DIRECTION', 'Date'
    ]
    X = df.drop(columns=columns_to_drop)
    y_classifier = (df['DAILY_CLOSEPRICE_CHANGE'] > 0).astype(int)
    y_regressor = df['DAILY_CLOSEPRICE_CHANGE']

    return X, y_classifier, y_regressor

In [5]:
class Conv1ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, l2_lambda=0.01, dropout_rate=0.5):
        super(Conv1ResidualBlock, self).__init__()
        
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding='same')
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, stride=1, padding='same')
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(dropout_rate)
        
        nn.init.kaiming_normal_(self.conv1.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.conv2.weight, nonlinearity='relu')
        nn.init.zeros_(self.conv1.bias)
        nn.init.zeros_(self.conv2.bias)
        
        self.l2_lambda = l2_lambda

        if in_channels != out_channels:
            self.residual_conv = nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=1)
        else:
            self.residual_conv = nn.Identity()

    def forward(self, x):
        residual = self.residual_conv(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += residual
        out = self.relu(out)
        
        return out

class Conv1DModel(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, num_blocks=1, l2_lambda=0.01, dropout_rate=0.5, classification=True):
        super(Conv1DModel, self).__init__()
        self.blocks = nn.Sequential(
            Conv1ResidualBlock(in_channels, out_channels, kernel_size, l2_lambda=l2_lambda, dropout_rate=dropout_rate),
            *[Conv1ResidualBlock(out_channels, out_channels, kernel_size, l2_lambda=l2_lambda, dropout_rate=dropout_rate) for _ in range(num_blocks-1)]
        )
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)  # Global average pooling for 1D
        self.fc = nn.Linear(out_channels, 2 if classification else 1)
        self.classification = classification

    def forward(self, x):
        out = self.blocks(x)
        out = self.global_avg_pool(out)
        out = out.view(out.size(0), -1)  # Flatten the tensor
        out = self.fc(out)
        if self.classification:
            out = F.log_softmax(out, dim=1)
        return out

In [6]:
def process_conv1d_classification(X, y, gpu_available, ticker, ticker_df, csv_file_path):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy()
    
    # Reshape X for Conv1D
    NUM_CHANNELS = 1
    X = X.reshape((X.shape[0], NUM_CHANNELS, -1))  # Reshape for Conv1D: (batch_size, num_channels, sequence_length)
    
    # Split data into training and validation sets
    TEST_SIZE = 0.2
    RANDOM_STATE = 42
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)
    
    def conv1d_objective(trial):
        in_channels = X_train.shape[1]
        out_channels = trial.suggest_int('out_channels', 16, 128)
        kernel_size = trial.suggest_int('kernel_size', 3, 7)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        l2_lambda = trial.suggest_float('l2_lambda', 1e-5, 1e-2)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = Conv1DModel(in_channels, out_channels, kernel_size, num_blocks, l2_lambda, dropout_rate, classification=True).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2), weight_decay=l2_lambda)
        criterion = nn.CrossEntropyLoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.long).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.long).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_accuracy = accuracy_score(target_val.cpu(), val_pred.cpu())
            return val_accuracy  # Return accuracy directly
    
    study = optuna.create_study(direction='maximize')  # Change direction to 'maximize'
    study.optimize(conv1d_objective, n_trials=100)
    
    best_model = Conv1DModel(X.shape[1], study.best_params['out_channels'], study.best_params['kernel_size'], study.best_params['num_blocks'], study.best_params['l2_lambda'], study.best_params['dropout_rate'], classification=True).to(device)
    model_path = f'../models/pytorch/conv1d-classification/{ticker}.pth'
    
    # Update ticker_df and save the best model
    metric_col = 'Best_Cov1D_Classification_Accuracy'
    path_col = 'Best_Cov1D_Classification_Path'
    
    if ticker in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value > current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_file_path, index=False)
            print(f"Best model for {ticker} saved with accuracy: {study.best_value}")
        else:
            print(f"Previous model accuracy: {current_score} is better for {ticker} than accuracy: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_file_path, index=False)
        print(f"Best model for {ticker} saved with accuracy: {study.best_value}")
    
    return ticker_df

In [7]:
def process_conv1d_regression(X, y, gpu_available, ticker, ticker_df, csv_file_path):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy().reshape(-1, 1)
    
    # Reshape X for Conv1D
    NUM_CHANNELS = 1
    X = X.reshape((X.shape[0], NUM_CHANNELS, -1))  # Reshape for Conv1D: (batch_size, num_channels, sequence_length)
    
    # Split data into training and validation sets
    TEST_SIZE = 0.2
    RANDOM_STATE = 42
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)
    
    def conv1d_objective(trial):
        in_channels = X_train.shape[1]
        out_channels = trial.suggest_int('out_channels', 16, 128)
        kernel_size = trial.suggest_int('kernel_size', 3, 7)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        l2_lambda = trial.suggest_float('l2_lambda', 1e-5, 1e-2)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = Conv1DModel(in_channels, out_channels, kernel_size, num_blocks, l2_lambda, dropout_rate, classification=False).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2), weight_decay=l2_lambda)
        criterion = nn.MSELoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.float32).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.float32).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_rmse = root_mean_squared_error(target_val.cpu(), val_pred.cpu())
            return val_rmse
    
    study = optuna.create_study(direction='minimize')
    study.optimize(conv1d_objective, n_trials=100)
    
    best_model = Conv1DModel(X.shape[1], study.best_params['out_channels'], study.best_params['kernel_size'], study.best_params['num_blocks'], study.best_params['l2_lambda'], study.best_params['dropout_rate'], classification=False).to(device)
    model_path = f'../models/pytorch/conv1d-regression/{ticker}.pth'
    
    # Update ticker_df and save the best model
    metric_col = 'Best_Cov1D_Regression_RMSE'
    path_col = 'Best_Cov1D_Regression_Path'
    
    if ticker in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value < current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_file_path, index=False)
            print(f"Best model for {ticker} saved with RMSE: {study.best_value}")
        else:
            print(f"Previous model RMSE: {current_score} is better for {ticker} than RMSE: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_file_path, index=False)
        print(f"Best model for {ticker} saved with RMSE: {study.best_value}")
    
    return ticker_df

In [8]:
class LSTMResidualBlock(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1, dropout_rate=0.5):
        super(LSTMResidualBlock, self).__init__()
        
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)
        self.dropout = nn.Dropout(dropout_rate)
        
        if input_size != hidden_size:
            self.residual_fc = nn.Linear(input_size, hidden_size)
        else:
            self.residual_fc = nn.Identity()

    def forward(self, x):
        residual = self.residual_fc(x)
        out, _ = self.lstm(x)
        out = self.dropout(out)
        out += residual
        return out

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_blocks=1, num_layers=1, dropout_rate=0.5, classification=True):
        super(LSTMModel, self).__init__()
        self.blocks = nn.Sequential(
            LSTMResidualBlock(input_size, hidden_size, num_layers=num_layers, dropout_rate=dropout_rate),
            *[LSTMResidualBlock(hidden_size, hidden_size, num_layers=num_layers, dropout_rate=dropout_rate) for _ in range(num_blocks-1)]
        )
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)  # Global average pooling for 1D
        self.fc = nn.Linear(hidden_size, 2 if classification else 1)
        self.classification = classification

    def forward(self, x):
        out = self.blocks(x)
        out = out.mean(dim=1)  # Global average pooling
        out = self.fc(out)
        if self.classification:
            out = F.log_softmax(out, dim=1)
        return out

In [9]:
def process_lstm_classification(X, y, gpu_available, ticker, ticker_df, csv_file_path):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy()
    
    def create_sequences(X, y, sequence_length):
        sequences_X, sequences_y = [], []
        for i in range(len(X) - sequence_length + 1):
            sequences_X.append(X[i:i + sequence_length])
            sequences_y.append(y[i + sequence_length - 1])
        return np.array(sequences_X), np.array(sequences_y)
    
    def lstm_objective(trial):
        sequence_length = trial.suggest_categorical('sequence_length', [5, 7, 14, 20, 30])
        
        # Create sequences
        X_seq, y_seq = create_sequences(X, y, sequence_length)
        
        # Split data into training, validation, and test sets
        TEST_SIZE = 0.2
        VAL_SIZE = 0.1
        RANDOM_STATE = 42
        
        X_train, X_temp, y_train, y_temp = train_test_split(X_seq, y_seq, test_size=TEST_SIZE + VAL_SIZE, random_state=RANDOM_STATE)
        X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=TEST_SIZE / (TEST_SIZE + VAL_SIZE), random_state=RANDOM_STATE)
        
        input_size = X_train.shape[2]  # Number of features
        hidden_size = trial.suggest_int('hidden_size', 16, 128)
        num_layers = trial.suggest_int('num_layers', 1, 3)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = LSTMModel(input_size, hidden_size, num_blocks, num_layers, dropout_rate, classification=True).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2))
        criterion = nn.CrossEntropyLoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.long).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.long).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_accuracy = accuracy_score(target_val.cpu(), val_pred.cpu())
            return val_accuracy  # Return accuracy directly
    
    study = optuna.create_study(direction='maximize')
    study.optimize(lstm_objective, n_trials=100)
    
    # Reshape X to match the best sequence length
    best_sequence_length = study.best_params['sequence_length']
    X_seq, y_seq = create_sequences(X, y, best_sequence_length)
    
    best_model = LSTMModel(X_seq.shape[2], study.best_params['hidden_size'], study.best_params['num_blocks'], study.best_params['num_layers'], study.best_params['dropout_rate'], classification=True).to(device)
    model_path = f'../models/pytorch/lstm-classification/{ticker}.pth'
    
    # Update ticker_df and save the best model
    metric_col = 'Best_LSTM_Classification_Accuracy'
    path_col = 'Best_LSTM_Classification_Path'
    
    if ticker in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value > current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_file_path, index=False)
            print(f"Best model for {ticker} saved with accuracy: {study.best_value}")
        else:
            print(f"Previous model accuracy: {current_score} is better for {ticker} than accuracy: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_file_path, index=False)
        print(f"Best model for {ticker} saved with accuracy: {study.best_value}")
        
    return ticker_df

In [10]:
def process_lstm_regression(X, y, gpu_available, ticker, ticker_df, csv_file_path):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy().reshape(-1, 1)
    
    def create_sequences(X, y, sequence_length):
        sequences_X, sequences_y = [], []
        for i in range(len(X) - sequence_length + 1):
            sequences_X.append(X[i:i + sequence_length])
            sequences_y.append(y[i + sequence_length - 1])
        return np.array(sequences_X), np.array(sequences_y)
    
    def lstm_objective(trial):
        sequence_length = trial.suggest_categorical('sequence_length', [5, 7, 14, 20, 30])
        
        # Create sequences
        X_seq, y_seq = create_sequences(X, y, sequence_length)
        
        # Split data into training, validation, and test sets
        TEST_SIZE = 0.2
        VAL_SIZE = 0.1
        RANDOM_STATE = 42
        
        X_train, X_temp, y_train, y_temp = train_test_split(X_seq, y_seq, test_size=TEST_SIZE + VAL_SIZE, random_state=RANDOM_STATE)
        X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=TEST_SIZE / (TEST_SIZE + VAL_SIZE), random_state=RANDOM_STATE)
        
        input_size = X_train.shape[2]  # Number of features
        hidden_size = trial.suggest_int('hidden_size', 16, 128)
        num_layers = trial.suggest_int('num_layers', 1, 3)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = LSTMModel(input_size, hidden_size, num_blocks, num_layers, dropout_rate, classification=False).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2))
        criterion = nn.MSELoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.float32).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.float32).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_rmse = root_mean_squared_error(target_val.cpu(), val_pred.cpu())
            return val_rmse
    
    study = optuna.create_study(direction='minimize')
    study.optimize(lstm_objective, n_trials=100)
    
    # Reshape X to match the best sequence length
    best_sequence_length = study.best_params['sequence_length']
    X_seq, y_seq = create_sequences(X, y, best_sequence_length)
    
    best_model = LSTMModel(X_seq.shape[2], study.best_params['hidden_size'], study.best_params['num_blocks'], study.best_params['num_layers'], study.best_params['dropout_rate'], classification=False).to(device)
    model_path = f'../models/pytorch/lstm-regression/{ticker}.pth'
    
    # Update ticker_df and save the best model
    metric_col = 'Best_LSTM_Regression_RMSE'
    path_col = 'Best_LSTM_Regression_Path'
    
    if ticker in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value < current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_file_path, index=False)
            print(f"Best model for {ticker} saved with RMSE: {study.best_value}")
        else:
            print(f"Previous model RMSE: {current_score} is better for {ticker} than RMSE: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_file_path, index=False)
        print(f"Best model for {ticker} saved with RMSE: {study.best_value}")
        
    return ticker_df

In [11]:
class Transformer(nn.Module):
    def __init__(self, input_dim, num_heads, num_layers, num_classes=1, classification=True):
        super(Transformer, self).__init__()
        encoder_layer = nn.TransformerEncoderLayer(d_model=input_dim, nhead=num_heads, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(input_dim, num_classes)
        self.classification = classification

    def forward(self, x):
        out = self.transformer_encoder(x)
        out = out.mean(dim=1)  # Global average pooling
        out = self.fc(out)
        if self.classification:
            out = torch.softmax(out, dim=1)  # Use softmax for classification
        return out

In [12]:
def process_transformer_classification(X, y, gpu_available):
    device = torch.device("cuda" if gpu_available and torch.cuda.is_available() else "cpu")
    
    # Convert DataFrame to tensors
    X_tensor = torch.tensor(X.values, dtype=torch.float32)
    y_tensor = torch.tensor(y.values, dtype=torch.long)
    
    # Split data into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)
    
    # Create DataLoader
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_val, y_val)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    
    def objective(trial):
        input_dim = X.shape[1]
        num_heads = trial.suggest_int('num_heads', 2, 8)
        num_layers = trial.suggest_int('num_layers', 1, 4)
        lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
        
        # Ensure input_dim is divisible by num_heads
        while input_dim % num_heads != 0:
            num_heads = trial.suggest_int('num_heads', 2, 8)
        
        model = Transformer(input_dim=input_dim, num_heads=num_heads, num_layers=num_layers, num_classes=len(y.unique()), classification=True).to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=lr)
        
        # Training loop
        model.train()
        for epoch in range(10):  # Number of epochs can be a hyperparameter too
            for batch_X, batch_y in train_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
        
        # Validation loop
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                _, predicted = torch.max(outputs.data, 1)
                total += batch_y.size(0)
                correct += (predicted == batch_y).sum().item()
        
        accuracy = correct / total

        # Print trial information
        print(f"Trial {trial.number}: Value: {accuracy}, Params: {trial.params}")
        return accuracy
    
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=50)
    
    print("Best hyperparameters: ", study.best_params)
    print("Best accuracy: ", study.best_value)

In [13]:
for ticker in ticker_list:
    dataframe = pd.read_csv(f"../data/{ticker}.csv")
    X, y_classifier, y_regressor = preprocess_data(dataframe)
    ticker_df = process_conv1d_classification(X, y_classifier, gpu_available, ticker, ticker_df, csv_file_path)
    ticker_df = process_conv1d_regression(X, y_regressor, gpu_available, ticker, ticker_df, csv_file_path)
    ticker_df = process_lstm_classification(X, y_classifier, gpu_available, ticker, ticker_df, csv_file_path)
    ticker_df = process_lstm_regression(X, y_regressor, gpu_available, ticker, ticker_df, csv_file_path)
    #process_transformer_classification(X, y_classifier, gpu_available)
    #break

[I 2024-09-06 15:32:36,131] A new study created in memory with name: no-name-daa6a9bd-639f-48e2-b911-1add9ef82f4a
[I 2024-09-06 15:32:37,672] Trial 0 finished with value: 0.2727272727272727 and parameters: {'out_channels': 52, 'kernel_size': 3, 'num_blocks': 5, 'l2_lambda': 0.006270787769864004, 'dropout_rate': 0.2891102783051703, 'lr': 0.0007422369179945341}. Best is trial 0 with value: 0.2727272727272727.
  return F.conv1d(input, weight, bias, self.stride,
[I 2024-09-06 15:32:38,722] Trial 1 finished with value: 0.30303030303030304 and parameters: {'out_channels': 98, 'kernel_size': 6, 'num_blocks': 5, 'l2_lambda': 0.003938089229619387, 'dropout_rate': 0.12995710977466013, 'lr': 0.0013947833948874948}. Best is trial 1 with value: 0.30303030303030304.
[I 2024-09-06 15:32:39,561] Trial 2 finished with value: 0.6060606060606061 and parameters: {'out_channels': 66, 'kernel_size': 5, 'num_blocks': 5, 'l2_lambda': 0.00897849595603438, 'dropout_rate': 0.33626338656770655, 'lr': 0.0060427670

Best model for CL=F saved with accuracy: 0.7575757575757576


[I 2024-09-06 15:33:41,283] Trial 0 finished with value: 1.3886516599165026 and parameters: {'out_channels': 90, 'kernel_size': 6, 'num_blocks': 4, 'l2_lambda': 0.00657164458802797, 'dropout_rate': 0.4371678796444408, 'lr': 0.0034685001295447463}. Best is trial 0 with value: 1.3886516599165026.
[I 2024-09-06 15:33:41,941] Trial 1 finished with value: 1.3886516599165026 and parameters: {'out_channels': 19, 'kernel_size': 7, 'num_blocks': 5, 'l2_lambda': 0.0014856476214847314, 'dropout_rate': 0.33143691751000903, 'lr': 0.0075914922811774165}. Best is trial 0 with value: 1.3886516599165026.
[I 2024-09-06 15:33:42,472] Trial 2 finished with value: 1.3886516599165026 and parameters: {'out_channels': 16, 'kernel_size': 3, 'num_blocks': 4, 'l2_lambda': 0.009376128274588223, 'dropout_rate': 0.2996227703907822, 'lr': 0.002657644215139651}. Best is trial 0 with value: 1.3886516599165026.
[I 2024-09-06 15:33:43,220] Trial 3 finished with value: 1.3886516599165026 and parameters: {'out_channels': 

Best model for CL=F saved with RMSE: 1.3886516599165026


[I 2024-09-06 15:34:48,929] Trial 0 finished with value: 0.3125 and parameters: {'sequence_length': 5, 'hidden_size': 69, 'num_layers': 3, 'num_blocks': 5, 'dropout_rate': 0.1265665690632424, 'lr': 0.008956453809011486}. Best is trial 0 with value: 0.3125.
[I 2024-09-06 15:34:49,474] Trial 1 finished with value: 0.4 and parameters: {'sequence_length': 14, 'hidden_size': 56, 'num_layers': 2, 'num_blocks': 3, 'dropout_rate': 0.1414337851982428, 'lr': 0.004612788395810417}. Best is trial 1 with value: 0.4.
[I 2024-09-06 15:34:50,377] Trial 2 finished with value: 0.5384615384615384 and parameters: {'sequence_length': 30, 'hidden_size': 102, 'num_layers': 3, 'num_blocks': 5, 'dropout_rate': 0.14940485021601316, 'lr': 0.0014797489981980716}. Best is trial 2 with value: 0.5384615384615384.
[I 2024-09-06 15:34:50,655] Trial 3 finished with value: 0.3125 and parameters: {'sequence_length': 5, 'hidden_size': 104, 'num_layers': 1, 'num_blocks': 2, 'dropout_rate': 0.2093363874380607, 'lr': 0.00952

Best model for CL=F saved with accuracy: 0.8125


[I 2024-09-06 15:35:50,265] Trial 0 finished with value: 1.2452720211668697 and parameters: {'sequence_length': 5, 'hidden_size': 101, 'num_layers': 2, 'num_blocks': 3, 'dropout_rate': 0.38151851666919145, 'lr': 0.009557113999096955}. Best is trial 0 with value: 1.2452720211668697.
[I 2024-09-06 15:35:50,738] Trial 1 finished with value: 1.4983875848460895 and parameters: {'sequence_length': 14, 'hidden_size': 52, 'num_layers': 3, 'num_blocks': 2, 'dropout_rate': 0.39561667283547874, 'lr': 0.003326194461550983}. Best is trial 0 with value: 1.2452720211668697.
[I 2024-09-06 15:35:51,211] Trial 2 finished with value: 1.2135031624553303 and parameters: {'sequence_length': 20, 'hidden_size': 50, 'num_layers': 3, 'num_blocks': 2, 'dropout_rate': 0.38092044722171525, 'lr': 0.003613470169773538}. Best is trial 2 with value: 1.2135031624553303.
[I 2024-09-06 15:35:51,398] Trial 3 finished with value: 1.4983875848460895 and parameters: {'sequence_length': 14, 'hidden_size': 79, 'num_layers': 1,

Best model for CL=F saved with RMSE: 1.2135031624553303


[I 2024-09-06 15:36:37,841] Trial 0 finished with value: 0.45454545454545453 and parameters: {'out_channels': 100, 'kernel_size': 4, 'num_blocks': 3, 'l2_lambda': 0.0008839515464066132, 'dropout_rate': 0.4721962336366453, 'lr': 0.008177737508248125}. Best is trial 0 with value: 0.45454545454545453.
[I 2024-09-06 15:36:38,605] Trial 1 finished with value: 0.45454545454545453 and parameters: {'out_channels': 112, 'kernel_size': 4, 'num_blocks': 4, 'l2_lambda': 0.003466346128554033, 'dropout_rate': 0.3778358466909105, 'lr': 0.003008000702564062}. Best is trial 0 with value: 0.45454545454545453.
[I 2024-09-06 15:36:38,855] Trial 2 finished with value: 0.45454545454545453 and parameters: {'out_channels': 118, 'kernel_size': 5, 'num_blocks': 1, 'l2_lambda': 0.006319783451140293, 'dropout_rate': 0.10770646286585439, 'lr': 0.00299770064990644}. Best is trial 0 with value: 0.45454545454545453.
[I 2024-09-06 15:36:39,790] Trial 3 finished with value: 0.45454545454545453 and parameters: {'out_cha

Best model for NVDA saved with accuracy: 0.5454545454545454


[I 2024-09-06 15:37:31,595] Trial 0 finished with value: 4.167355508960071 and parameters: {'out_channels': 27, 'kernel_size': 5, 'num_blocks': 4, 'l2_lambda': 0.006403486622686283, 'dropout_rate': 0.47729887069969046, 'lr': 0.005073446453017918}. Best is trial 0 with value: 4.167355508960071.
[I 2024-09-06 15:37:31,785] Trial 1 finished with value: 4.167355508960071 and parameters: {'out_channels': 120, 'kernel_size': 4, 'num_blocks': 1, 'l2_lambda': 0.0004809625126716811, 'dropout_rate': 0.45528912505852703, 'lr': 0.005577440231411345}. Best is trial 0 with value: 4.167355508960071.
[I 2024-09-06 15:37:32,682] Trial 2 finished with value: 4.167355508960071 and parameters: {'out_channels': 105, 'kernel_size': 3, 'num_blocks': 5, 'l2_lambda': 0.00715507473069219, 'dropout_rate': 0.11677128981040036, 'lr': 0.0067310225902946445}. Best is trial 0 with value: 4.167355508960071.
[I 2024-09-06 15:37:32,884] Trial 3 finished with value: 4.167355508960071 and parameters: {'out_channels': 102,

Best model for NVDA saved with RMSE: 4.167355508960071


[I 2024-09-06 15:38:27,704] Trial 0 finished with value: 0.4 and parameters: {'sequence_length': 7, 'hidden_size': 50, 'num_layers': 1, 'num_blocks': 5, 'dropout_rate': 0.2608189123330176, 'lr': 0.006561084683445658}. Best is trial 0 with value: 0.4.
[I 2024-09-06 15:38:27,934] Trial 1 finished with value: 0.7333333333333333 and parameters: {'sequence_length': 14, 'hidden_size': 60, 'num_layers': 2, 'num_blocks': 1, 'dropout_rate': 0.40518560231302125, 'lr': 0.002593649508292302}. Best is trial 1 with value: 0.7333333333333333.
[I 2024-09-06 15:38:28,368] Trial 2 finished with value: 0.26666666666666666 and parameters: {'sequence_length': 14, 'hidden_size': 54, 'num_layers': 3, 'num_blocks': 2, 'dropout_rate': 0.29466453108219737, 'lr': 0.009552190276082116}. Best is trial 1 with value: 0.7333333333333333.
[I 2024-09-06 15:38:28,712] Trial 3 finished with value: 0.6428571428571429 and parameters: {'sequence_length': 20, 'hidden_size': 92, 'num_layers': 1, 'num_blocks': 3, 'dropout_rate

Best model for NVDA saved with accuracy: 0.7692307692307693


[I 2024-09-06 15:39:28,331] Trial 1 finished with value: 4.325488823322819 and parameters: {'sequence_length': 30, 'hidden_size': 110, 'num_layers': 2, 'num_blocks': 5, 'dropout_rate': 0.39727552375009656, 'lr': 0.0030082363743354876}. Best is trial 1 with value: 4.325488823322819.
[I 2024-09-06 15:39:29,145] Trial 2 finished with value: 5.404297805011556 and parameters: {'sequence_length': 20, 'hidden_size': 128, 'num_layers': 3, 'num_blocks': 4, 'dropout_rate': 0.31721721010246384, 'lr': 0.009638123492326213}. Best is trial 1 with value: 4.325488823322819.
[I 2024-09-06 15:39:29,681] Trial 3 finished with value: 4.325488823322819 and parameters: {'sequence_length': 30, 'hidden_size': 74, 'num_layers': 1, 'num_blocks': 5, 'dropout_rate': 0.15088343111501126, 'lr': 0.006664676128109541}. Best is trial 1 with value: 4.325488823322819.
[I 2024-09-06 15:39:30,276] Trial 4 finished with value: 5.404297805011556 and parameters: {'sequence_length': 20, 'hidden_size': 23, 'num_layers': 3, 'nu

Best model for NVDA saved with RMSE: 3.758128904585536


[I 2024-09-06 15:40:16,729] Trial 0 finished with value: 0.5142857142857142 and parameters: {'out_channels': 104, 'kernel_size': 6, 'num_blocks': 3, 'l2_lambda': 0.006274003304011915, 'dropout_rate': 0.12326760673378145, 'lr': 0.009933941458609465}. Best is trial 0 with value: 0.5142857142857142.
[I 2024-09-06 15:40:17,709] Trial 1 finished with value: 0.4857142857142857 and parameters: {'out_channels': 121, 'kernel_size': 7, 'num_blocks': 4, 'l2_lambda': 0.0015274411120524112, 'dropout_rate': 0.28791922266561, 'lr': 0.004460493879273565}. Best is trial 0 with value: 0.5142857142857142.
[I 2024-09-06 15:40:18,086] Trial 2 finished with value: 0.5142857142857142 and parameters: {'out_channels': 28, 'kernel_size': 6, 'num_blocks': 2, 'l2_lambda': 0.009195800028745775, 'dropout_rate': 0.11707538236854634, 'lr': 0.002434302841428731}. Best is trial 0 with value: 0.5142857142857142.
[I 2024-09-06 15:40:18,610] Trial 3 finished with value: 0.5142857142857142 and parameters: {'out_channels': 

Best model for SGDUSD=X saved with accuracy: 0.6


[I 2024-09-06 15:41:20,567] Trial 0 finished with value: 0.0016006725511762895 and parameters: {'out_channels': 123, 'kernel_size': 7, 'num_blocks': 2, 'l2_lambda': 0.001383819283843266, 'dropout_rate': 0.3621246507776943, 'lr': 0.006822923177566809}. Best is trial 0 with value: 0.0016006725511762895.
[I 2024-09-06 15:41:20,819] Trial 1 finished with value: 0.0016006725511762895 and parameters: {'out_channels': 52, 'kernel_size': 7, 'num_blocks': 1, 'l2_lambda': 0.005131726022284185, 'dropout_rate': 0.3675143446188599, 'lr': 0.002649089237336314}. Best is trial 0 with value: 0.0016006725511762895.
[I 2024-09-06 15:41:21,130] Trial 2 finished with value: 0.0016006725511762895 and parameters: {'out_channels': 84, 'kernel_size': 5, 'num_blocks': 2, 'l2_lambda': 0.00499328364095774, 'dropout_rate': 0.4850636328173632, 'lr': 0.001743650168207923}. Best is trial 0 with value: 0.0016006725511762895.
[I 2024-09-06 15:41:21,529] Trial 3 finished with value: 0.0016006725511762895 and parameters:

Best model for SGDUSD=X saved with RMSE: 0.0016006725511762895


[I 2024-09-06 15:42:08,780] Trial 0 finished with value: 0.4375 and parameters: {'sequence_length': 7, 'hidden_size': 106, 'num_layers': 1, 'num_blocks': 1, 'dropout_rate': 0.30052520875881045, 'lr': 0.009491256615273318}. Best is trial 0 with value: 0.4375.
[I 2024-09-06 15:42:09,246] Trial 1 finished with value: 0.4375 and parameters: {'sequence_length': 7, 'hidden_size': 53, 'num_layers': 2, 'num_blocks': 3, 'dropout_rate': 0.45601960224761595, 'lr': 0.0060107486723908485}. Best is trial 0 with value: 0.4375.
[I 2024-09-06 15:42:09,665] Trial 2 finished with value: 0.5 and parameters: {'sequence_length': 7, 'hidden_size': 30, 'num_layers': 1, 'num_blocks': 4, 'dropout_rate': 0.3601556647924957, 'lr': 0.007730684137523124}. Best is trial 2 with value: 0.5.
[I 2024-09-06 15:42:10,260] Trial 3 finished with value: 0.5714285714285714 and parameters: {'sequence_length': 30, 'hidden_size': 108, 'num_layers': 2, 'num_blocks': 4, 'dropout_rate': 0.4863893930688028, 'lr': 0.00257192628753197

Best model for SGDUSD=X saved with accuracy: 0.8571428571428571


[I 2024-09-06 15:42:57,723] Trial 0 finished with value: 0.001676912428126705 and parameters: {'sequence_length': 30, 'hidden_size': 73, 'num_layers': 2, 'num_blocks': 4, 'dropout_rate': 0.43310314370862524, 'lr': 0.0003813278015067773}. Best is trial 0 with value: 0.001676912428126705.
[I 2024-09-06 15:42:58,073] Trial 1 finished with value: 0.001756282420979557 and parameters: {'sequence_length': 5, 'hidden_size': 35, 'num_layers': 2, 'num_blocks': 2, 'dropout_rate': 0.11261889814246105, 'lr': 0.008448721971706695}. Best is trial 0 with value: 0.001676912428126705.
[I 2024-09-06 15:42:58,328] Trial 2 finished with value: 0.001953165305963973 and parameters: {'sequence_length': 20, 'hidden_size': 128, 'num_layers': 3, 'num_blocks': 1, 'dropout_rate': 0.42872986034822735, 'lr': 0.0075444563763265565}. Best is trial 0 with value: 0.001676912428126705.
[I 2024-09-06 15:42:58,818] Trial 3 finished with value: 0.001953165305963973 and parameters: {'sequence_length': 20, 'hidden_size': 41, 

Best model for SGDUSD=X saved with RMSE: 0.001676912428126705


[I 2024-09-06 15:43:47,435] Trial 0 finished with value: 0.5142857142857142 and parameters: {'out_channels': 91, 'kernel_size': 3, 'num_blocks': 1, 'l2_lambda': 0.0029395950180582785, 'dropout_rate': 0.4381287188073063, 'lr': 0.007047780973505605}. Best is trial 0 with value: 0.5142857142857142.
[I 2024-09-06 15:43:48,096] Trial 1 finished with value: 0.5142857142857142 and parameters: {'out_channels': 118, 'kernel_size': 6, 'num_blocks': 3, 'l2_lambda': 0.009765441222491077, 'dropout_rate': 0.1910418410414253, 'lr': 0.008832344574873531}. Best is trial 0 with value: 0.5142857142857142.
[I 2024-09-06 15:43:48,943] Trial 2 finished with value: 0.5142857142857142 and parameters: {'out_channels': 105, 'kernel_size': 6, 'num_blocks': 4, 'l2_lambda': 0.00526048394804378, 'dropout_rate': 0.21066431588389567, 'lr': 0.002942315688609814}. Best is trial 0 with value: 0.5142857142857142.
[I 2024-09-06 15:43:49,344] Trial 3 finished with value: 0.5142857142857142 and parameters: {'out_channels': 

Best model for USDSGD=X saved with accuracy: 0.6571428571428571


[I 2024-09-06 15:44:39,763] Trial 0 finished with value: 0.002848010049792017 and parameters: {'out_channels': 119, 'kernel_size': 3, 'num_blocks': 5, 'l2_lambda': 0.00826662243499472, 'dropout_rate': 0.2779379301384287, 'lr': 0.0012902169760517087}. Best is trial 0 with value: 0.002848010049792017.
[I 2024-09-06 15:44:40,735] Trial 1 finished with value: 0.002848010049792017 and parameters: {'out_channels': 82, 'kernel_size': 6, 'num_blocks': 5, 'l2_lambda': 0.005776264279372309, 'dropout_rate': 0.30180716725563916, 'lr': 0.0006612835447072382}. Best is trial 0 with value: 0.002848010049792017.
[I 2024-09-06 15:44:41,363] Trial 2 finished with value: 0.002848010049792017 and parameters: {'out_channels': 67, 'kernel_size': 4, 'num_blocks': 4, 'l2_lambda': 0.0002083631933768931, 'dropout_rate': 0.4401359971685427, 'lr': 0.0017582174862832646}. Best is trial 0 with value: 0.002848010049792017.
[I 2024-09-06 15:44:42,102] Trial 3 finished with value: 0.002848010049792017 and parameters: {

Best model for USDSGD=X saved with RMSE: 0.002848010049792017


[I 2024-09-06 15:45:43,374] Trial 0 finished with value: 0.35294117647058826 and parameters: {'sequence_length': 5, 'hidden_size': 110, 'num_layers': 2, 'num_blocks': 5, 'dropout_rate': 0.15806303014517062, 'lr': 0.0030994587353792586}. Best is trial 0 with value: 0.35294117647058826.
[I 2024-09-06 15:45:43,583] Trial 1 finished with value: 0.47058823529411764 and parameters: {'sequence_length': 5, 'hidden_size': 79, 'num_layers': 1, 'num_blocks': 1, 'dropout_rate': 0.17765152957430652, 'lr': 0.00024757585544576646}. Best is trial 1 with value: 0.47058823529411764.
[I 2024-09-06 15:45:44,195] Trial 2 finished with value: 0.35714285714285715 and parameters: {'sequence_length': 30, 'hidden_size': 76, 'num_layers': 2, 'num_blocks': 4, 'dropout_rate': 0.2245673432964772, 'lr': 0.003244783648879615}. Best is trial 1 with value: 0.47058823529411764.
[I 2024-09-06 15:45:44,539] Trial 3 finished with value: 0.4666666666666667 and parameters: {'sequence_length': 20, 'hidden_size': 21, 'num_laye

Best model for USDSGD=X saved with accuracy: 0.8571428571428571


[I 2024-09-06 15:46:40,626] Trial 0 finished with value: 0.003266740616755657 and parameters: {'sequence_length': 7, 'hidden_size': 72, 'num_layers': 1, 'num_blocks': 1, 'dropout_rate': 0.3387019250824339, 'lr': 0.0009884208806466219}. Best is trial 0 with value: 0.003266740616755657.
[I 2024-09-06 15:46:41,583] Trial 1 finished with value: 0.0030279561688525214 and parameters: {'sequence_length': 30, 'hidden_size': 97, 'num_layers': 3, 'num_blocks': 5, 'dropout_rate': 0.48240357941303313, 'lr': 0.008670509240950251}. Best is trial 1 with value: 0.0030279561688525214.
[I 2024-09-06 15:46:41,880] Trial 2 finished with value: 0.0035020182395090887 and parameters: {'sequence_length': 20, 'hidden_size': 33, 'num_layers': 1, 'num_blocks': 2, 'dropout_rate': 0.18274319227171965, 'lr': 0.004986262856830615}. Best is trial 1 with value: 0.0030279561688525214.
[I 2024-09-06 15:46:42,455] Trial 3 finished with value: 0.003266740616755657 and parameters: {'sequence_length': 7, 'hidden_size': 115,

Best model for USDSGD=X saved with RMSE: 0.0030279561688525214
