In [1]:
import os
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, root_mean_squared_error

logical_cores = os.cpu_count()
print(f"Number of logical CPU cores: {logical_cores}")

num_workers = max(1, logical_cores // 2)
print(f"Number of workers set to: {num_workers}")

def is_gpu_available():
    try:
        return torch.cuda.is_available()
    except ImportError:
        return False

gpu_available = is_gpu_available()
print(f"GPU available: {gpu_available}")

Number of logical CPU cores: 16
Number of workers set to: 8
GPU available: True


In [3]:
os.makedirs('../models/hyperparameters-search-models/pytorch/conv1d-classification/', exist_ok=True)
os.makedirs('../models/hyperparameters-search-models/pytorch/conv1d-regression/', exist_ok=True)
os.makedirs('../models/hyperparameters-search-models/pytorch/lstm-classification/', exist_ok=True)
os.makedirs('../models/hyperparameters-search-models/pytorch/lstm-regression/', exist_ok=True)
os.makedirs('../models/hyperparameters-search-models/pytorch/transformer-classification/', exist_ok=True)
os.makedirs('../models/hyperparameters-search-models/pytorch/transformer-regression/', exist_ok=True)

os.makedirs('../models/best-hyperparameters/pytorch/conv1d-classification/', exist_ok=True)
os.makedirs('../models/best-hyperparameters/pytorch/conv1d-regression/', exist_ok=True)
os.makedirs('../models/best-hyperparameters/pytorch/lstm-classification/', exist_ok=True)
os.makedirs('../models/best-hyperparameters/pytorch/lstm-regression/', exist_ok=True)
os.makedirs('../models/best-hyperparameters/pytorch/transformer-classification/', exist_ok=True)
os.makedirs('../models/best-hyperparameters/pytorch/transformer-regression/', exist_ok=True)

os.makedirs('../models/trained-models/pytorch/conv1d-classification/', exist_ok=True)
os.makedirs('../models/trained-models/pytorch/conv1d-regression/', exist_ok=True)
os.makedirs('../models/trained-models/pytorch/lstm-classification/', exist_ok=True)
os.makedirs('../models/trained-models/pytorch/lstm-regression/', exist_ok=True)
os.makedirs('../models/trained-models/pytorch/transformer-classification/', exist_ok=True)
os.makedirs('../models/trained-models/pytorch/transformer-regression/', exist_ok=True)

path = '../data/train'
ticker_list = []

if os.path.exists(path):
    ticker_list = [os.path.splitext(f)[0] for f in os.listdir(path) if f.endswith('.csv')]


In [2]:
def load_or_create_ticker_df(csv_file_path):
    """
    Load the existing ticker DataFrame from a CSV file if it exists,
    otherwise create a new DataFrame with predefined column types.
    Ensure the DataFrame has the specified columns, add any missing columns,
    and rearrange the columns in alphabetical order, excluding 'Ticker_Symbol'.

    Args:
    csv_file_path (str): The path to the CSV file.

    Returns:
    pd.DataFrame: The loaded or newly created DataFrame.
    """
    # Define the column types
    column_types = {
        "Ticker_Symbol": str,
        "Best_Cov1D_Classification_Accuracy": float,
        "Best_Cov1D_Classification_Path": str,
        "Best_Cov1D_Regression_RMSE": float,
        "Best_Cov1D_Regression_Path": str,
        "Best_LSTM_Classification_Accuracy": float,
        "Best_LSTM_Classification_Path": str,
        "Best_LSTM_Regression_RMSE": float,
        "Best_LSTM_Regression_Path": str,
        "Best_Transformer_Classification_Accuracy": float,
        "Best_Transformer_Classification_Path": str,
        "Best_Transformer_Regression_RMSE": float,
        "Best_Transformer_Regression_Path": str,
        "Best_XGBClassifier_Classification_Accuracy": float,
        "Best_XGBClassifier_Classification_Path": str,
        "Best_XGBRegressor_Regression_RMSE": float,
        "Best_XGBRegressor_Regression_Path": str
    }


    if os.path.isfile(csv_file_path):
        # Load the existing file into a DataFrame
        ticker_df = pd.read_csv(csv_file_path)
        
        # Ensure all specified columns are present
        for column, dtype in column_types.items():
            if column not in ticker_df.columns:
                ticker_df[column] = pd.Series(dtype=dtype)
        
        # Reorder columns alphabetically, excluding 'Ticker_Symbol'
        columns = ["Ticker_Symbol"] + sorted([col for col in ticker_df.columns if col != "Ticker_Symbol"])
        ticker_df = ticker_df[columns]
    else:
        # Create a new DataFrame with the specified column types
        ticker_df = pd.DataFrame(columns=column_types.keys()).astype(column_types)
    
    return ticker_df

In [4]:
def training_preprocess_data(df):
    if df.isna().sum().sum() > 0 or df.isin([float('inf'), float('-inf')]).sum().sum() > 0:
        df = df.replace([float('inf'), float('-inf')], float('nan')).dropna()

    df = df.dropna()

    columns_to_drop = [
        'NEXT_DAY_CLOSEPRICE', 'DAILY_CLOSEPRICE_CHANGE', 'CLOSEPRICE_DIRECTION',
        'DAILY_MIDPRICE', 'NEXT_DAY_MIDPRICE', 'DAILY_MIDPRICE_CHANGE', 'MIDPRICE_DIRECTION', 'Date'
    ]
    X = df.drop(columns=columns_to_drop)
    y_classifier = (np.sign(df['DAILY_CLOSEPRICE_CHANGE']) >= 0).astype(int)
    y_regressor = df['DAILY_CLOSEPRICE_CHANGE']

    return X, y_classifier, y_regressor

In [5]:
class Conv1ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, l2_lambda=0.01, dropout_rate=0.5):
        super(Conv1ResidualBlock, self).__init__()
        
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding='same')
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, stride=1, padding='same')
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(dropout_rate)
        
        nn.init.kaiming_normal_(self.conv1.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.conv2.weight, nonlinearity='relu')
        nn.init.zeros_(self.conv1.bias)
        nn.init.zeros_(self.conv2.bias)
        
        self.l2_lambda = l2_lambda

        if in_channels != out_channels:
            self.residual_conv = nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=1)
        else:
            self.residual_conv = nn.Identity()

    def forward(self, x):
        residual = self.residual_conv(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += residual
        out = self.relu(out)
        
        return out

class Conv1DModel(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, num_blocks=1, l2_lambda=0.01, dropout_rate=0.5, classification=True):
        super(Conv1DModel, self).__init__()
        self.blocks = nn.Sequential(
            Conv1ResidualBlock(in_channels, out_channels, kernel_size, l2_lambda=l2_lambda, dropout_rate=dropout_rate),
            *[Conv1ResidualBlock(out_channels, out_channels, kernel_size, l2_lambda=l2_lambda, dropout_rate=dropout_rate) for _ in range(num_blocks-1)]
        )
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)  # Global average pooling for 1D
        self.fc = nn.Linear(out_channels, 2 if classification else 1)
        self.classification = classification

    def forward(self, x):
        out = self.blocks(x)
        out = self.global_avg_pool(out)
        out = out.view(out.size(0), -1)  # Flatten the tensor
        out = self.fc(out)
        if self.classification:
            out = F.log_softmax(out, dim=1)
        return out

In [6]:
def conv1d_classification_hyperparameters_search(X, y, gpu_available, ticker_symbol):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy()
    
    # Reshape X for Conv1D
    NUM_CHANNELS = 1
    X = X.reshape((X.shape[0], NUM_CHANNELS, -1))  # Reshape for Conv1D: (batch_size, num_channels, sequence_length)
    
    # Split data into training and validation sets
    TEST_SIZE = 0.2
    RANDOM_STATE = 42
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)
    
    def conv1d_objective(trial):
        in_channels = X_train.shape[1]
        out_channels = trial.suggest_int('out_channels', 16, 128)
        kernel_size = trial.suggest_int('kernel_size', 3, 7)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        l2_lambda = trial.suggest_float('l2_lambda', 1e-5, 1e-2)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = Conv1DModel(in_channels, out_channels, kernel_size, num_blocks, l2_lambda, dropout_rate, classification=True).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2), weight_decay=l2_lambda)
        criterion = nn.CrossEntropyLoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.long).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.long).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_accuracy = accuracy_score(target_val.cpu(), val_pred.cpu())
            return val_accuracy  # Return accuracy directly
    
    study = optuna.create_study(direction='maximize')  # Change direction to 'maximize'
    study.optimize(conv1d_objective, n_trials=100)
    
    best_model = Conv1DModel(X.shape[1], study.best_params['out_channels'], study.best_params['kernel_size'], study.best_params['num_blocks'], study.best_params['l2_lambda'], study.best_params['dropout_rate'], classification=True).to(device)
    model_path = f'../models/hyperparameters-search-models/pytorch/conv1d-classification/{ticker_symbol}.pth'
    csv_path = f'../models/hyperparameters-search-models/ticker-all-models-best-hyperparameters-list.csv'

    ticker_df = load_or_create_ticker_df(csv_path)
    
    # Update ticker_df and save the best model
    metric_col = 'Best_Cov1D_Classification_Accuracy'
    path_col = 'Best_Cov1D_Classification_Path'
    
    if ticker_symbol in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker_symbol, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value > current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker_symbol, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_path, index=False)
            print(f"Best model for {ticker_symbol} saved with accuracy: {study.best_value}")
        else:
            print(f"Previous model accuracy: {current_score} is better for {ticker_symbol} than accuracy: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker_symbol], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_path, index=False)
        print(f"Best model for {ticker_symbol} saved with accuracy: {study.best_value}")

In [7]:
def conv1d_regression_hyperparameters_search(X, y, gpu_available, ticker_symbol):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy().reshape(-1, 1)
    
    # Reshape X for Conv1D
    NUM_CHANNELS = 1
    X = X.reshape((X.shape[0], NUM_CHANNELS, -1))  # Reshape for Conv1D: (batch_size, num_channels, sequence_length)
    
    # Split data into training and validation sets
    TEST_SIZE = 0.2
    RANDOM_STATE = 42
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)
    
    def conv1d_objective(trial):
        in_channels = X_train.shape[1]
        out_channels = trial.suggest_int('out_channels', 16, 128)
        kernel_size = trial.suggest_int('kernel_size', 3, 7)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        l2_lambda = trial.suggest_float('l2_lambda', 1e-5, 1e-2)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = Conv1DModel(in_channels, out_channels, kernel_size, num_blocks, l2_lambda, dropout_rate, classification=False).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2), weight_decay=l2_lambda)
        criterion = nn.MSELoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.float32).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.float32).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_rmse = root_mean_squared_error(target_val.cpu(), val_pred.cpu())
            return val_rmse
    
    study = optuna.create_study(direction='minimize')
    study.optimize(conv1d_objective, n_trials=100)
    
    best_model = Conv1DModel(X.shape[1], study.best_params['out_channels'], study.best_params['kernel_size'], study.best_params['num_blocks'], study.best_params['l2_lambda'], study.best_params['dropout_rate'], classification=False).to(device)
    model_path = f'../models/hyperparameters-search-models/pytorch/conv1d-regression/{ticker_symbol}.pth'
    csv_path = f'../models/hyperparameters-search-models/ticker-all-models-best-hyperparameters-list.csv'

    ticker_df = load_or_create_ticker_df(csv_path)
    
    # Update ticker_df and save the best model
    metric_col = 'Best_Cov1D_Regression_RMSE'
    path_col = 'Best_Cov1D_Regression_Path'
    
    if ticker_symbol in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker_symbol, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value < current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker_symbol, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_path, index=False)
            print(f"Best model for {ticker_symbol} saved with RMSE: {study.best_value}")
        else:
            print(f"Previous model RMSE: {current_score} is better for {ticker_symbol} than RMSE: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker_symbol], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_path, index=False)
        print(f"Best model for {ticker_symbol} saved with RMSE: {study.best_value}")

In [8]:
class LSTMResidualBlock(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1, dropout_rate=0.5):
        super(LSTMResidualBlock, self).__init__()
        
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)
        self.dropout = nn.Dropout(dropout_rate)
        
        if input_size != hidden_size:
            self.residual_fc = nn.Linear(input_size, hidden_size)
        else:
            self.residual_fc = nn.Identity()

    def forward(self, x):
        residual = self.residual_fc(x)
        out, _ = self.lstm(x)
        out = self.dropout(out)
        out += residual
        return out

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_blocks=1, num_layers=1, dropout_rate=0.5, classification=True):
        super(LSTMModel, self).__init__()
        self.blocks = nn.Sequential(
            LSTMResidualBlock(input_size, hidden_size, num_layers=num_layers, dropout_rate=dropout_rate),
            *[LSTMResidualBlock(hidden_size, hidden_size, num_layers=num_layers, dropout_rate=dropout_rate) for _ in range(num_blocks-1)]
        )
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)  # Global average pooling for 1D
        self.fc = nn.Linear(hidden_size, 2 if classification else 1)
        self.classification = classification

    def forward(self, x):
        out = self.blocks(x)
        out = out.mean(dim=1)  # Global average pooling
        out = self.fc(out)
        if self.classification:
            out = F.log_softmax(out, dim=1)
        return out

In [9]:
def lstm_classification_hyperparameters_search(X, y, gpu_available, ticker_symbol):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy()
    
    def create_sequences(X, y, sequence_length):
        sequences_X, sequences_y = [], []
        for i in range(len(X) - sequence_length + 1):
            sequences_X.append(X[i:i + sequence_length])
            sequences_y.append(y[i + sequence_length - 1])
        return np.array(sequences_X), np.array(sequences_y)
    
    def lstm_objective(trial):
        sequence_length = trial.suggest_categorical('sequence_length', [5, 7, 14, 20, 30])
        
        # Create sequences
        X_seq, y_seq = create_sequences(X, y, sequence_length)
        
        # Split data into training, validation, and test sets
        TEST_SIZE = 0.2
        VAL_SIZE = 0.1
        RANDOM_STATE = 42
        
        X_train, X_temp, y_train, y_temp = train_test_split(X_seq, y_seq, test_size=TEST_SIZE + VAL_SIZE, random_state=RANDOM_STATE)
        X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=TEST_SIZE / (TEST_SIZE + VAL_SIZE), random_state=RANDOM_STATE)
        
        input_size = X_train.shape[2]  # Number of features
        hidden_size = trial.suggest_int('hidden_size', 16, 128)
        num_layers = trial.suggest_int('num_layers', 1, 3)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = LSTMModel(input_size, hidden_size, num_blocks, num_layers, dropout_rate, classification=True).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2))
        criterion = nn.CrossEntropyLoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.long).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.long).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_accuracy = accuracy_score(target_val.cpu(), val_pred.cpu())
            return val_accuracy  # Return accuracy directly
    
    study = optuna.create_study(direction='maximize')
    study.optimize(lstm_objective, n_trials=100)
    
    # Reshape X to match the best sequence length
    best_sequence_length = study.best_params['sequence_length']
    X_seq, y_seq = create_sequences(X, y, best_sequence_length)
    
    best_model = LSTMModel(X_seq.shape[2], study.best_params['hidden_size'], study.best_params['num_blocks'], study.best_params['num_layers'], study.best_params['dropout_rate'], classification=True).to(device)
    model_path = f'../models/hyperparameters-search-models/pytorch/lstm-classification/{ticker_symbol}.pth'
    csv_path = f'../models/hyperparameters-search-models/ticker-all-models-best-hyperparameters-list.csv'

    ticker_df = load_or_create_ticker_df(csv_path)
    
    # Update ticker_df and save the best model
    metric_col = 'Best_LSTM_Classification_Accuracy'
    path_col = 'Best_LSTM_Classification_Path'
    
    if ticker_symbol in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker_symbol, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value > current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker_symbol, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_path, index=False)
            print(f"Best model for {ticker_symbol} saved with accuracy: {study.best_value}")
        else:
            print(f"Previous model accuracy: {current_score} is better for {ticker_symbol} than accuracy: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker_symbol], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_path, index=False)
        print(f"Best model for {ticker_symbol} saved with accuracy: {study.best_value}")

In [10]:
def lstm_regression_hyperparameters_search(X, y, gpu_available, ticker_symbol):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy().reshape(-1, 1)
    
    def create_sequences(X, y, sequence_length):
        sequences_X, sequences_y = [], []
        for i in range(len(X) - sequence_length + 1):
            sequences_X.append(X[i:i + sequence_length])
            sequences_y.append(y[i + sequence_length - 1])
        return np.array(sequences_X), np.array(sequences_y)
    
    def lstm_objective(trial):
        sequence_length = trial.suggest_categorical('sequence_length', [5, 7, 14, 20, 30])
        
        # Create sequences
        X_seq, y_seq = create_sequences(X, y, sequence_length)
        
        # Split data into training, validation, and test sets
        TEST_SIZE = 0.2
        VAL_SIZE = 0.1
        RANDOM_STATE = 42
        
        X_train, X_temp, y_train, y_temp = train_test_split(X_seq, y_seq, test_size=TEST_SIZE + VAL_SIZE, random_state=RANDOM_STATE)
        X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=TEST_SIZE / (TEST_SIZE + VAL_SIZE), random_state=RANDOM_STATE)
        
        input_size = X_train.shape[2]  # Number of features
        hidden_size = trial.suggest_int('hidden_size', 16, 128)
        num_layers = trial.suggest_int('num_layers', 1, 3)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = LSTMModel(input_size, hidden_size, num_blocks, num_layers, dropout_rate, classification=False).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2))
        criterion = nn.MSELoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.float32).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.float32).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_rmse = root_mean_squared_error(target_val.cpu(), val_pred.cpu())
            return val_rmse
    
    study = optuna.create_study(direction='minimize')
    study.optimize(lstm_objective, n_trials=100)
    
    # Reshape X to match the best sequence length
    best_sequence_length = study.best_params['sequence_length']
    X_seq, y_seq = create_sequences(X, y, best_sequence_length)
    
    best_model = LSTMModel(X_seq.shape[2], study.best_params['hidden_size'], study.best_params['num_blocks'], study.best_params['num_layers'], study.best_params['dropout_rate'], classification=False).to(device)
    model_path = f'../models/hyperparameters-search-models/pytorch/lstm-regression/{ticker_symbol}.pth'
    csv_path = f'../models/hyperparameters-search-models/ticker-all-models-best-hyperparameters-list.csv'

    ticker_df = load_or_create_ticker_df(csv_path)
    
    # Update ticker_df and save the best model
    metric_col = 'Best_LSTM_Regression_RMSE'
    path_col = 'Best_LSTM_Regression_Path'
    
    if ticker_symbol in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker_symbol, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value < current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker_symbol, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_path, index=False)
            print(f"Best model for {ticker_symbol} saved with RMSE: {study.best_value}")
        else:
            print(f"Previous model RMSE: {current_score} is better for {ticker_symbol} than RMSE: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker_symbol], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_path, index=False)
        print(f"Best model for {ticker_symbol} saved with RMSE: {study.best_value}")

In [11]:
class Transformer(nn.Module):
    def __init__(self, input_dim, num_heads, num_layers, num_classes=1, classification=True):
        super(Transformer, self).__init__()
        encoder_layer = nn.TransformerEncoderLayer(d_model=input_dim, nhead=num_heads, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(input_dim, num_classes)
        self.classification = classification

    def forward(self, x):
        out = self.transformer_encoder(x)
        out = out.mean(dim=1)  # Global average pooling
        out = self.fc(out)
        if self.classification:
            out = torch.softmax(out, dim=1)  # Use softmax for classification
        return out

In [12]:
def process_transformer_classification(X, y, gpu_available):
    device = torch.device("cuda" if gpu_available and torch.cuda.is_available() else "cpu")
    
    # Convert DataFrame to tensors
    X_tensor = torch.tensor(X.values, dtype=torch.float32)
    y_tensor = torch.tensor(y.values, dtype=torch.long)
    
    # Split data into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)
    
    # Create DataLoader
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_val, y_val)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    
    def objective(trial):
        input_dim = X.shape[1]
        num_heads = trial.suggest_int('num_heads', 2, 8)
        num_layers = trial.suggest_int('num_layers', 1, 4)
        lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
        
        # Ensure input_dim is divisible by num_heads
        while input_dim % num_heads != 0:
            num_heads = trial.suggest_int('num_heads', 2, 8)
        
        model = Transformer(input_dim=input_dim, num_heads=num_heads, num_layers=num_layers, num_classes=len(y.unique()), classification=True).to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=lr)
        
        # Training loop
        model.train()
        for epoch in range(10):  # Number of epochs can be a hyperparameter too
            for batch_X, batch_y in train_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
        
        # Validation loop
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                _, predicted = torch.max(outputs.data, 1)
                total += batch_y.size(0)
                correct += (predicted == batch_y).sum().item()
        
        accuracy = correct / total

        # Print trial information
        print(f"Trial {trial.number}: Value: {accuracy}, Params: {trial.params}")
        return accuracy
    
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=50)
    
    print("Best hyperparameters: ", study.best_params)
    print("Best accuracy: ", study.best_value)

In [13]:
for ticker_symbol in ticker_list:
    dataframe = pd.read_csv(f"../data/train/{ticker_symbol}.csv")
    X, y_classifier, y_regressor = training_preprocess_data(dataframe)
    ticker_df = conv1d_classification_hyperparameters_search(X, y_classifier, gpu_available, ticker_symbol, ticker_df, csv_file_path)
    ticker_df = conv1d_regression_hyperparameters_search(X, y_regressor, gpu_available, ticker_symbol, ticker_df, csv_file_path)
    ticker_df = lstm_classification_hyperparameters_search(X, y_classifier, gpu_available, ticker_symbol, ticker_df, csv_file_path)
    ticker_df = lstm_regression_hyperparameters_search(X, y_regressor, gpu_available, ticker_symbol, ticker_df, csv_file_path)
    #process_transformer_classification(X, y_classifier, gpu_available)
    #break

[I 2024-09-08 13:36:13,674] A new study created in memory with name: no-name-106078b0-1cc9-4812-8f1a-a2cd1f2ce6f5
[I 2024-09-08 13:36:15,946] Trial 0 finished with value: 0.5492957746478874 and parameters: {'out_channels': 107, 'kernel_size': 7, 'num_blocks': 2, 'l2_lambda': 0.00938256880912232, 'dropout_rate': 0.28830045032304863, 'lr': 0.0001604442339903336}. Best is trial 0 with value: 0.5492957746478874.
[I 2024-09-08 13:36:16,617] Trial 1 finished with value: 0.4507042253521127 and parameters: {'out_channels': 23, 'kernel_size': 3, 'num_blocks': 5, 'l2_lambda': 0.005740783657424676, 'dropout_rate': 0.31695633193652145, 'lr': 0.002518200061875061}. Best is trial 0 with value: 0.5492957746478874.
[I 2024-09-08 13:36:17,393] Trial 2 finished with value: 0.5211267605633803 and parameters: {'out_channels': 59, 'kernel_size': 7, 'num_blocks': 5, 'l2_lambda': 0.004379518197771923, 'dropout_rate': 0.2874138848448198, 'lr': 0.004226488907014316}. Best is trial 0 with value: 0.5492957746478

Best model for CL=F saved with accuracy: 0.5915492957746479


[I 2024-09-08 13:37:22,127] Trial 0 finished with value: 1.6320944297190914 and parameters: {'out_channels': 111, 'kernel_size': 5, 'num_blocks': 5, 'l2_lambda': 0.00696380367465711, 'dropout_rate': 0.3155468047803976, 'lr': 0.005763847719158206}. Best is trial 0 with value: 1.6320944297190914.
[I 2024-09-08 13:37:23,448] Trial 1 finished with value: 1.6320944297190914 and parameters: {'out_channels': 126, 'kernel_size': 5, 'num_blocks': 5, 'l2_lambda': 0.008948326988770737, 'dropout_rate': 0.3916653313904449, 'lr': 0.0026996159555835}. Best is trial 0 with value: 1.6320944297190914.
[I 2024-09-08 13:37:24,143] Trial 2 finished with value: 1.6320944297190914 and parameters: {'out_channels': 31, 'kernel_size': 7, 'num_blocks': 5, 'l2_lambda': 0.00950205144600134, 'dropout_rate': 0.3744513475886846, 'lr': 0.008990292517498088}. Best is trial 0 with value: 1.6320944297190914.
[I 2024-09-08 13:37:24,354] Trial 3 finished with value: 1.6320944297190914 and parameters: {'out_channels': 38, '

Best model for CL=F saved with RMSE: 1.6320944297190914


[I 2024-09-08 13:38:27,376] Trial 0 finished with value: 0.5882352941176471 and parameters: {'sequence_length': 14, 'hidden_size': 100, 'num_layers': 1, 'num_blocks': 4, 'dropout_rate': 0.25756366457839663, 'lr': 0.00919607768143768}. Best is trial 0 with value: 0.5882352941176471.
[I 2024-09-08 13:38:27,648] Trial 1 finished with value: 0.5588235294117647 and parameters: {'sequence_length': 14, 'hidden_size': 123, 'num_layers': 2, 'num_blocks': 1, 'dropout_rate': 0.15728000175014742, 'lr': 0.0024728286879631954}. Best is trial 0 with value: 0.5882352941176471.
[I 2024-09-08 13:38:28,129] Trial 2 finished with value: 0.4375 and parameters: {'sequence_length': 30, 'hidden_size': 42, 'num_layers': 2, 'num_blocks': 3, 'dropout_rate': 0.17358871898819045, 'lr': 0.008303382662240968}. Best is trial 0 with value: 0.5882352941176471.
[I 2024-09-08 13:38:29,041] Trial 3 finished with value: 0.5 and parameters: {'sequence_length': 14, 'hidden_size': 18, 'num_layers': 3, 'num_blocks': 5, 'dropou

Best model for CL=F saved with accuracy: 0.6764705882352942


[I 2024-09-08 13:39:37,930] Trial 0 finished with value: 1.6454061178840402 and parameters: {'sequence_length': 14, 'hidden_size': 23, 'num_layers': 3, 'num_blocks': 1, 'dropout_rate': 0.1858372076652937, 'lr': 0.004640111433110708}. Best is trial 0 with value: 1.6454061178840402.
[I 2024-09-08 13:39:38,523] Trial 1 finished with value: 1.6454061178840402 and parameters: {'sequence_length': 14, 'hidden_size': 69, 'num_layers': 3, 'num_blocks': 3, 'dropout_rate': 0.22666655063808536, 'lr': 0.0067030937301490465}. Best is trial 0 with value: 1.6454061178840402.
[I 2024-09-08 13:39:39,460] Trial 2 finished with value: 1.6454061178840402 and parameters: {'sequence_length': 14, 'hidden_size': 80, 'num_layers': 3, 'num_blocks': 5, 'dropout_rate': 0.137440672726081, 'lr': 0.0017003436199784204}. Best is trial 0 with value: 1.6454061178840402.
[I 2024-09-08 13:39:40,024] Trial 3 finished with value: 1.5608105176943416 and parameters: {'sequence_length': 5, 'hidden_size': 119, 'num_layers': 1, 

Best model for CL=F saved with RMSE: 1.2330613792665257


[I 2024-09-08 13:40:21,428] Trial 0 finished with value: 0.4084507042253521 and parameters: {'out_channels': 103, 'kernel_size': 5, 'num_blocks': 3, 'l2_lambda': 0.004214073322359933, 'dropout_rate': 0.415777685388523, 'lr': 0.00893054328674254}. Best is trial 0 with value: 0.4084507042253521.
[I 2024-09-08 13:40:21,834] Trial 1 finished with value: 0.4084507042253521 and parameters: {'out_channels': 49, 'kernel_size': 3, 'num_blocks': 3, 'l2_lambda': 0.0031319469364601807, 'dropout_rate': 0.40770188881388425, 'lr': 0.003595993254073045}. Best is trial 0 with value: 0.4084507042253521.
[I 2024-09-08 13:40:22,190] Trial 2 finished with value: 0.5915492957746479 and parameters: {'out_channels': 66, 'kernel_size': 7, 'num_blocks': 2, 'l2_lambda': 0.0053957248609396245, 'dropout_rate': 0.23832273023341033, 'lr': 0.004298906221840181}. Best is trial 2 with value: 0.5915492957746479.
[I 2024-09-08 13:40:23,694] Trial 3 finished with value: 0.5915492957746479 and parameters: {'out_channels': 

Best model for NVDA saved with accuracy: 0.5915492957746479


[I 2024-09-08 13:41:25,798] Trial 0 finished with value: 1.683446181698328 and parameters: {'out_channels': 24, 'kernel_size': 5, 'num_blocks': 1, 'l2_lambda': 0.005357372386950887, 'dropout_rate': 0.1885323756968115, 'lr': 0.00043304945144966803}. Best is trial 0 with value: 1.683446181698328.
[I 2024-09-08 13:41:26,672] Trial 1 finished with value: 1.683446181698328 and parameters: {'out_channels': 84, 'kernel_size': 6, 'num_blocks': 4, 'l2_lambda': 0.0009387126533554742, 'dropout_rate': 0.3265449363066303, 'lr': 0.005342478098518179}. Best is trial 0 with value: 1.683446181698328.
[I 2024-09-08 13:41:27,677] Trial 2 finished with value: 1.683446181698328 and parameters: {'out_channels': 77, 'kernel_size': 7, 'num_blocks': 5, 'l2_lambda': 0.00864868077596812, 'dropout_rate': 0.3586706557777628, 'lr': 0.003887182243552183}. Best is trial 0 with value: 1.683446181698328.
[I 2024-09-08 13:41:28,029] Trial 3 finished with value: 1.683446181698328 and parameters: {'out_channels': 79, 'ker

Best model for NVDA saved with RMSE: 1.683446181698328


[I 2024-09-08 13:42:21,237] Trial 0 finished with value: 0.6470588235294118 and parameters: {'sequence_length': 7, 'hidden_size': 68, 'num_layers': 2, 'num_blocks': 1, 'dropout_rate': 0.29930745981460005, 'lr': 0.003745549135057754}. Best is trial 0 with value: 0.6470588235294118.
[I 2024-09-08 13:42:21,978] Trial 1 finished with value: 0.59375 and parameters: {'sequence_length': 30, 'hidden_size': 76, 'num_layers': 3, 'num_blocks': 4, 'dropout_rate': 0.34579530669652564, 'lr': 0.008022183885678292}. Best is trial 0 with value: 0.6470588235294118.
[I 2024-09-08 13:42:22,498] Trial 2 finished with value: 0.40625 and parameters: {'sequence_length': 30, 'hidden_size': 33, 'num_layers': 1, 'num_blocks': 5, 'dropout_rate': 0.24074443358159858, 'lr': 0.008526924238797405}. Best is trial 0 with value: 0.6470588235294118.
[I 2024-09-08 13:42:22,894] Trial 3 finished with value: 0.38235294117647056 and parameters: {'sequence_length': 14, 'hidden_size': 101, 'num_layers': 2, 'num_blocks': 2, 'dr

Best model for NVDA saved with accuracy: 0.7058823529411765


[I 2024-09-08 13:43:06,740] Trial 0 finished with value: 2.250046973668736 and parameters: {'sequence_length': 5, 'hidden_size': 104, 'num_layers': 3, 'num_blocks': 1, 'dropout_rate': 0.25943571163488266, 'lr': 0.0009123790848058709}. Best is trial 0 with value: 2.250046973668736.
[I 2024-09-08 13:43:07,411] Trial 1 finished with value: 1.7255608732912031 and parameters: {'sequence_length': 14, 'hidden_size': 30, 'num_layers': 3, 'num_blocks': 3, 'dropout_rate': 0.1116439917591122, 'lr': 0.007896397602254112}. Best is trial 1 with value: 1.7255608732912031.
[I 2024-09-08 13:43:07,745] Trial 2 finished with value: 2.250046973668736 and parameters: {'sequence_length': 5, 'hidden_size': 80, 'num_layers': 1, 'num_blocks': 2, 'dropout_rate': 0.39894261025575295, 'lr': 0.003491060526886918}. Best is trial 1 with value: 1.7255608732912031.
[I 2024-09-08 13:43:08,913] Trial 3 finished with value: 1.7308573407410872 and parameters: {'sequence_length': 30, 'hidden_size': 110, 'num_layers': 3, 'n

Best model for NVDA saved with RMSE: 1.4169319811777301


[I 2024-09-08 13:43:55,335] Trial 0 finished with value: 0.4864864864864865 and parameters: {'out_channels': 111, 'kernel_size': 6, 'num_blocks': 2, 'l2_lambda': 0.008108878023216717, 'dropout_rate': 0.45756806817848195, 'lr': 0.0005566731428837671}. Best is trial 0 with value: 0.4864864864864865.
[I 2024-09-08 13:43:56,159] Trial 1 finished with value: 0.5405405405405406 and parameters: {'out_channels': 112, 'kernel_size': 3, 'num_blocks': 4, 'l2_lambda': 0.003434107978847839, 'dropout_rate': 0.2344807456324352, 'lr': 0.005227016271846748}. Best is trial 1 with value: 0.5405405405405406.
[I 2024-09-08 13:43:56,515] Trial 2 finished with value: 0.4864864864864865 and parameters: {'out_channels': 74, 'kernel_size': 7, 'num_blocks': 2, 'l2_lambda': 0.009910418804181403, 'dropout_rate': 0.4723937292346748, 'lr': 0.0009128459916624604}. Best is trial 1 with value: 0.5405405405405406.
[I 2024-09-08 13:43:57,749] Trial 3 finished with value: 0.4864864864864865 and parameters: {'out_channels'

Best model for SGDUSD=X saved with accuracy: 0.6621621621621622


[I 2024-09-08 13:45:10,723] Trial 0 finished with value: 0.0021569563708677087 and parameters: {'out_channels': 54, 'kernel_size': 3, 'num_blocks': 5, 'l2_lambda': 0.002854259134453624, 'dropout_rate': 0.3163911134484655, 'lr': 0.0053132251617609245}. Best is trial 0 with value: 0.0021569563708677087.
[I 2024-09-08 13:45:10,978] Trial 1 finished with value: 0.0021569563708677087 and parameters: {'out_channels': 68, 'kernel_size': 3, 'num_blocks': 1, 'l2_lambda': 0.00191814886738372, 'dropout_rate': 0.20228058083671352, 'lr': 0.0005171193926305585}. Best is trial 0 with value: 0.0021569563708677087.
[I 2024-09-08 13:45:11,483] Trial 2 finished with value: 0.0021569563708677087 and parameters: {'out_channels': 45, 'kernel_size': 5, 'num_blocks': 2, 'l2_lambda': 0.007217158642302066, 'dropout_rate': 0.19452108810312954, 'lr': 0.00849321584133759}. Best is trial 0 with value: 0.0021569563708677087.
[I 2024-09-08 13:45:12,270] Trial 3 finished with value: 0.0021569563708677087 and parameter

Best model for SGDUSD=X saved with RMSE: 0.0021569563708677087


[I 2024-09-08 13:46:39,296] Trial 0 finished with value: 0.4411764705882353 and parameters: {'sequence_length': 30, 'hidden_size': 27, 'num_layers': 1, 'num_blocks': 2, 'dropout_rate': 0.3094764683335308, 'lr': 0.0022294023304689063}. Best is trial 0 with value: 0.4411764705882353.
[I 2024-09-08 13:46:39,958] Trial 1 finished with value: 0.5 and parameters: {'sequence_length': 5, 'hidden_size': 124, 'num_layers': 3, 'num_blocks': 3, 'dropout_rate': 0.48002957860052275, 'lr': 0.005141209752742179}. Best is trial 1 with value: 0.5.
[I 2024-09-08 13:46:40,455] Trial 2 finished with value: 0.5294117647058824 and parameters: {'sequence_length': 30, 'hidden_size': 78, 'num_layers': 2, 'num_blocks': 2, 'dropout_rate': 0.35416919332654495, 'lr': 0.004901286676130344}. Best is trial 2 with value: 0.5294117647058824.
[I 2024-09-08 13:46:41,297] Trial 3 finished with value: 0.4722222222222222 and parameters: {'sequence_length': 7, 'hidden_size': 116, 'num_layers': 3, 'num_blocks': 4, 'dropout_rat

Best model for SGDUSD=X saved with accuracy: 0.6571428571428571


[I 2024-09-08 13:47:28,687] Trial 0 finished with value: 0.0018109719720688245 and parameters: {'sequence_length': 20, 'hidden_size': 75, 'num_layers': 3, 'num_blocks': 3, 'dropout_rate': 0.29088246559637454, 'lr': 0.0025010510848901524}. Best is trial 0 with value: 0.0018109719720688245.
[I 2024-09-08 13:47:29,399] Trial 1 finished with value: 0.0018109719720688245 and parameters: {'sequence_length': 20, 'hidden_size': 82, 'num_layers': 2, 'num_blocks': 4, 'dropout_rate': 0.19294440860727957, 'lr': 0.001231731326014502}. Best is trial 0 with value: 0.0018109719720688245.
[I 2024-09-08 13:47:29,887] Trial 2 finished with value: 0.0022484587237977228 and parameters: {'sequence_length': 30, 'hidden_size': 35, 'num_layers': 1, 'num_blocks': 4, 'dropout_rate': 0.48663496543561335, 'lr': 0.007959178218855112}. Best is trial 0 with value: 0.0018109719720688245.
[I 2024-09-08 13:47:30,672] Trial 3 finished with value: 0.0018109719720688245 and parameters: {'sequence_length': 20, 'hidden_size'

Best model for SGDUSD=X saved with RMSE: 0.0017995774309984452


[I 2024-09-08 13:48:17,033] Trial 0 finished with value: 0.5135135135135135 and parameters: {'out_channels': 57, 'kernel_size': 7, 'num_blocks': 5, 'l2_lambda': 0.006069589723776523, 'dropout_rate': 0.2994114994268726, 'lr': 0.008635501271364514}. Best is trial 0 with value: 0.5135135135135135.
[I 2024-09-08 13:48:17,632] Trial 1 finished with value: 0.5135135135135135 and parameters: {'out_channels': 61, 'kernel_size': 3, 'num_blocks': 2, 'l2_lambda': 0.0008156843767012156, 'dropout_rate': 0.19720588063808797, 'lr': 0.0012238116621428846}. Best is trial 0 with value: 0.5135135135135135.
[I 2024-09-08 13:48:18,398] Trial 2 finished with value: 0.4864864864864865 and parameters: {'out_channels': 72, 'kernel_size': 6, 'num_blocks': 2, 'l2_lambda': 0.002026845297973054, 'dropout_rate': 0.4349510057886853, 'lr': 0.0005083403946938414}. Best is trial 0 with value: 0.5135135135135135.
[I 2024-09-08 13:48:18,744] Trial 3 finished with value: 0.4864864864864865 and parameters: {'out_channels':

Best model for USDSGD=X saved with accuracy: 0.6756756756756757


[I 2024-09-08 13:49:53,313] Trial 0 finished with value: 0.003894525079952908 and parameters: {'out_channels': 112, 'kernel_size': 3, 'num_blocks': 4, 'l2_lambda': 0.006778005919676742, 'dropout_rate': 0.2283408050037958, 'lr': 0.0032033961092248196}. Best is trial 0 with value: 0.003894525079952908.
[I 2024-09-08 13:49:54,240] Trial 1 finished with value: 0.003894525079952908 and parameters: {'out_channels': 101, 'kernel_size': 5, 'num_blocks': 2, 'l2_lambda': 0.006954172024819561, 'dropout_rate': 0.22719591373477424, 'lr': 0.009656738968687193}. Best is trial 0 with value: 0.003894525079952908.
[I 2024-09-08 13:49:54,872] Trial 2 finished with value: 0.003894525079952908 and parameters: {'out_channels': 42, 'kernel_size': 4, 'num_blocks': 3, 'l2_lambda': 0.004756608396601409, 'dropout_rate': 0.30144804571732753, 'lr': 0.007984435666487176}. Best is trial 0 with value: 0.003894525079952908.
[I 2024-09-08 13:49:55,599] Trial 3 finished with value: 0.003894525079952908 and parameters: {

Best model for USDSGD=X saved with RMSE: 0.003894525079952908


[I 2024-09-08 13:51:44,706] Trial 0 finished with value: 0.37142857142857144 and parameters: {'sequence_length': 20, 'hidden_size': 58, 'num_layers': 3, 'num_blocks': 4, 'dropout_rate': 0.4573185801022118, 'lr': 0.007919871769395155}. Best is trial 0 with value: 0.37142857142857144.
[I 2024-09-08 13:51:45,205] Trial 1 finished with value: 0.42857142857142855 and parameters: {'sequence_length': 14, 'hidden_size': 35, 'num_layers': 1, 'num_blocks': 4, 'dropout_rate': 0.1971677284349465, 'lr': 0.007769520661754972}. Best is trial 1 with value: 0.42857142857142855.
[I 2024-09-08 13:51:45,478] Trial 2 finished with value: 0.6111111111111112 and parameters: {'sequence_length': 5, 'hidden_size': 111, 'num_layers': 1, 'num_blocks': 2, 'dropout_rate': 0.21891021955433687, 'lr': 0.00873288377452763}. Best is trial 2 with value: 0.6111111111111112.
[I 2024-09-08 13:51:47,273] Trial 3 finished with value: 0.5142857142857142 and parameters: {'sequence_length': 20, 'hidden_size': 122, 'num_layers': 

Best model for USDSGD=X saved with accuracy: 0.6857142857142857


[I 2024-09-08 13:52:27,373] Trial 0 finished with value: 0.0032857278777233432 and parameters: {'sequence_length': 5, 'hidden_size': 55, 'num_layers': 2, 'num_blocks': 2, 'dropout_rate': 0.16434136880436706, 'lr': 0.003996702160833635}. Best is trial 0 with value: 0.0032857278777233432.
[I 2024-09-08 13:52:27,654] Trial 1 finished with value: 0.004058508508193819 and parameters: {'sequence_length': 30, 'hidden_size': 32, 'num_layers': 3, 'num_blocks': 1, 'dropout_rate': 0.3474562680187375, 'lr': 0.009875652393360794}. Best is trial 0 with value: 0.0032857278777233432.
[I 2024-09-08 13:52:28,084] Trial 2 finished with value: 0.003225686499294803 and parameters: {'sequence_length': 7, 'hidden_size': 19, 'num_layers': 3, 'num_blocks': 2, 'dropout_rate': 0.38993400578836035, 'lr': 0.0004640867911610209}. Best is trial 2 with value: 0.003225686499294803.
[I 2024-09-08 13:52:28,712] Trial 3 finished with value: 0.004058508508193819 and parameters: {'sequence_length': 30, 'hidden_size': 124, 

Best model for USDSGD=X saved with RMSE: 0.003225686499294803
