In [1]:
import os
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, root_mean_squared_error

logical_cores = os.cpu_count()
print(f"Number of logical CPU cores: {logical_cores}")

num_workers = max(1, logical_cores // 2)
print(f"Number of workers set to: {num_workers}")

def is_gpu_available():
    try:
        return torch.cuda.is_available()
    except ImportError:
        return False

gpu_available = is_gpu_available()
print(f"GPU available: {gpu_available}")

Number of logical CPU cores: 16
Number of workers set to: 8
GPU available: True


In [2]:
def load_or_create_ticker_df(csv_file_path):
    """
    Load the existing ticker DataFrame from a CSV file if it exists,
    otherwise create a new DataFrame with predefined column types.
    Ensure the DataFrame has the specified columns, add any missing columns,
    and rearrange the columns in alphabetical order, excluding 'Ticker_Symbol'.

    Args:
    csv_file_path (str): The path to the CSV file.

    Returns:
    pd.DataFrame: The loaded or newly created DataFrame.
    """
    # Define the column types
    column_types = {
        "Ticker_Symbol": str,
        "Best_Cov1D_Classification_Accuracy": float,
        "Best_Cov1D_Classification_Path": str,
        "Best_Cov1D_Regression_RMSE": float,
        "Best_Cov1D_Regression_Path": str,
        "Best_LSTM_Classification_Accuracy": float,
        "Best_LSTM_Classification_Path": str,
        "Best_LSTM_Regression_RMSE": float,
        "Best_LSTM_Regression_Path": str,
        "Best_Transformer_Classification_Accuracy": float,
        "Best_TransformerClassification_Path": str,
        "Best_Transformer_Regression_RMSE": float,
        "Best_Transformer_Regression_Path": str,
        "Best_XGBClassifier_Classification_Accuracy": float,
        "Best_XGBClassifier_Classification_Path": str,
        "Best_XGBRegressor_Regression_RMSE": float,
        "Best_XGBRegressor_Regression_Path": str
    }


    if os.path.isfile(csv_file_path):
        # Load the existing file into a DataFrame
        ticker_df = pd.read_csv(csv_file_path)
        
        # Ensure all specified columns are present
        for column, dtype in column_types.items():
            if column not in ticker_df.columns:
                ticker_df[column] = pd.Series(dtype=dtype)
        
        # Reorder columns alphabetically, excluding 'Ticker_Symbol'
        columns = ["Ticker_Symbol"] + sorted([col for col in ticker_df.columns if col != "Ticker_Symbol"])
        ticker_df = ticker_df[columns]
    else:
        # Create a new DataFrame with the specified column types
        ticker_df = pd.DataFrame(columns=column_types.keys()).astype(column_types)
    
    return ticker_df

csv_file_path = "../ticker-best-model.csv"
ticker_df = load_or_create_ticker_df(csv_file_path)

In [3]:
os.makedirs('../models/pytorch/conv1d-classification/', exist_ok=True)
os.makedirs('../models/pytorch/conv1d-regression/', exist_ok=True)
os.makedirs('../models/pytorch/lstm-classification/', exist_ok=True)
os.makedirs('../models/pytorch/lstm-regression/', exist_ok=True)
os.makedirs('../models/pytorch/transformer-classification/', exist_ok=True)
os.makedirs('../models/pytorch/transformer-regression/', exist_ok=True)


path = '../data/train'
ticker_list = []

if os.path.exists(path):
    ticker_list = [os.path.splitext(f)[0] for f in os.listdir(path) if f.endswith('.csv')]


In [4]:
def preprocess_data(df):
    if df.isna().sum().sum() > 0 or df.isin([float('inf'), float('-inf')]).sum().sum() > 0:
        df = df.replace([float('inf'), float('-inf')], float('nan')).dropna()

    df = df.dropna()

    columns_to_drop = [
        'NEXT_DAY_CLOSEPRICE', 'DAILY_CLOSEPRICE_CHANGE', 'CLOSEPRICE_DIRECTION',
        'DAILY_MIDPRICE', 'NEXT_DAY_MIDPRICE', 'DAILY_MIDPRICE_CHANGE', 'MIDPRICE_DIRECTION', 'Date'
    ]
    X = df.drop(columns=columns_to_drop)
    y_classifier = (df['DAILY_CLOSEPRICE_CHANGE'] > 0).astype(int)
    y_regressor = df['DAILY_CLOSEPRICE_CHANGE']

    return X, y_classifier, y_regressor

In [5]:
class Conv1ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, l2_lambda=0.01, dropout_rate=0.5):
        super(Conv1ResidualBlock, self).__init__()
        
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding='same')
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, stride=1, padding='same')
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(dropout_rate)
        
        nn.init.kaiming_normal_(self.conv1.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.conv2.weight, nonlinearity='relu')
        nn.init.zeros_(self.conv1.bias)
        nn.init.zeros_(self.conv2.bias)
        
        self.l2_lambda = l2_lambda

        if in_channels != out_channels:
            self.residual_conv = nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=1)
        else:
            self.residual_conv = nn.Identity()

    def forward(self, x):
        residual = self.residual_conv(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += residual
        out = self.relu(out)
        
        return out

class Conv1DModel(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, num_blocks=1, l2_lambda=0.01, dropout_rate=0.5, classification=True):
        super(Conv1DModel, self).__init__()
        self.blocks = nn.Sequential(
            Conv1ResidualBlock(in_channels, out_channels, kernel_size, l2_lambda=l2_lambda, dropout_rate=dropout_rate),
            *[Conv1ResidualBlock(out_channels, out_channels, kernel_size, l2_lambda=l2_lambda, dropout_rate=dropout_rate) for _ in range(num_blocks-1)]
        )
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)  # Global average pooling for 1D
        self.fc = nn.Linear(out_channels, 2 if classification else 1)
        self.classification = classification

    def forward(self, x):
        out = self.blocks(x)
        out = self.global_avg_pool(out)
        out = out.view(out.size(0), -1)  # Flatten the tensor
        out = self.fc(out)
        if self.classification:
            out = F.log_softmax(out, dim=1)
        return out

In [6]:
def conv1d_classification_hyperparameters_search(X, y, gpu_available, ticker, ticker_df, csv_file_path):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy()
    
    # Reshape X for Conv1D
    NUM_CHANNELS = 1
    X = X.reshape((X.shape[0], NUM_CHANNELS, -1))  # Reshape for Conv1D: (batch_size, num_channels, sequence_length)
    
    # Split data into training and validation sets
    TEST_SIZE = 0.2
    RANDOM_STATE = 42
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)
    
    def conv1d_objective(trial):
        in_channels = X_train.shape[1]
        out_channels = trial.suggest_int('out_channels', 16, 128)
        kernel_size = trial.suggest_int('kernel_size', 3, 7)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        l2_lambda = trial.suggest_float('l2_lambda', 1e-5, 1e-2)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = Conv1DModel(in_channels, out_channels, kernel_size, num_blocks, l2_lambda, dropout_rate, classification=True).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2), weight_decay=l2_lambda)
        criterion = nn.CrossEntropyLoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.long).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.long).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_accuracy = accuracy_score(target_val.cpu(), val_pred.cpu())
            return val_accuracy  # Return accuracy directly
    
    study = optuna.create_study(direction='maximize')  # Change direction to 'maximize'
    study.optimize(conv1d_objective, n_trials=100)
    
    best_model = Conv1DModel(X.shape[1], study.best_params['out_channels'], study.best_params['kernel_size'], study.best_params['num_blocks'], study.best_params['l2_lambda'], study.best_params['dropout_rate'], classification=True).to(device)
    model_path = f'../models/pytorch/conv1d-classification/{ticker}.pth'
    
    # Update ticker_df and save the best model
    metric_col = 'Best_Cov1D_Classification_Accuracy'
    path_col = 'Best_Cov1D_Classification_Path'
    
    if ticker in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value > current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_file_path, index=False)
            print(f"Best model for {ticker} saved with accuracy: {study.best_value}")
        else:
            print(f"Previous model accuracy: {current_score} is better for {ticker} than accuracy: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_file_path, index=False)
        print(f"Best model for {ticker} saved with accuracy: {study.best_value}")
    
    return ticker_df

In [7]:
def conv1d_regression_hyperparameters_search(X, y, gpu_available, ticker, ticker_df, csv_file_path):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy().reshape(-1, 1)
    
    # Reshape X for Conv1D
    NUM_CHANNELS = 1
    X = X.reshape((X.shape[0], NUM_CHANNELS, -1))  # Reshape for Conv1D: (batch_size, num_channels, sequence_length)
    
    # Split data into training and validation sets
    TEST_SIZE = 0.2
    RANDOM_STATE = 42
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)
    
    def conv1d_objective(trial):
        in_channels = X_train.shape[1]
        out_channels = trial.suggest_int('out_channels', 16, 128)
        kernel_size = trial.suggest_int('kernel_size', 3, 7)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        l2_lambda = trial.suggest_float('l2_lambda', 1e-5, 1e-2)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = Conv1DModel(in_channels, out_channels, kernel_size, num_blocks, l2_lambda, dropout_rate, classification=False).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2), weight_decay=l2_lambda)
        criterion = nn.MSELoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.float32).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.float32).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_rmse = root_mean_squared_error(target_val.cpu(), val_pred.cpu())
            return val_rmse
    
    study = optuna.create_study(direction='minimize')
    study.optimize(conv1d_objective, n_trials=100)
    
    best_model = Conv1DModel(X.shape[1], study.best_params['out_channels'], study.best_params['kernel_size'], study.best_params['num_blocks'], study.best_params['l2_lambda'], study.best_params['dropout_rate'], classification=False).to(device)
    model_path = f'../models/pytorch/conv1d-regression/{ticker}.pth'
    
    # Update ticker_df and save the best model
    metric_col = 'Best_Cov1D_Regression_RMSE'
    path_col = 'Best_Cov1D_Regression_Path'
    
    if ticker in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value < current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_file_path, index=False)
            print(f"Best model for {ticker} saved with RMSE: {study.best_value}")
        else:
            print(f"Previous model RMSE: {current_score} is better for {ticker} than RMSE: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_file_path, index=False)
        print(f"Best model for {ticker} saved with RMSE: {study.best_value}")
    
    return ticker_df

In [8]:
class LSTMResidualBlock(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1, dropout_rate=0.5):
        super(LSTMResidualBlock, self).__init__()
        
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)
        self.dropout = nn.Dropout(dropout_rate)
        
        if input_size != hidden_size:
            self.residual_fc = nn.Linear(input_size, hidden_size)
        else:
            self.residual_fc = nn.Identity()

    def forward(self, x):
        residual = self.residual_fc(x)
        out, _ = self.lstm(x)
        out = self.dropout(out)
        out += residual
        return out

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_blocks=1, num_layers=1, dropout_rate=0.5, classification=True):
        super(LSTMModel, self).__init__()
        self.blocks = nn.Sequential(
            LSTMResidualBlock(input_size, hidden_size, num_layers=num_layers, dropout_rate=dropout_rate),
            *[LSTMResidualBlock(hidden_size, hidden_size, num_layers=num_layers, dropout_rate=dropout_rate) for _ in range(num_blocks-1)]
        )
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)  # Global average pooling for 1D
        self.fc = nn.Linear(hidden_size, 2 if classification else 1)
        self.classification = classification

    def forward(self, x):
        out = self.blocks(x)
        out = out.mean(dim=1)  # Global average pooling
        out = self.fc(out)
        if self.classification:
            out = F.log_softmax(out, dim=1)
        return out

In [9]:
def lstm_classification_hyperparameters_search(X, y, gpu_available, ticker, ticker_df, csv_file_path):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy()
    
    def create_sequences(X, y, sequence_length):
        sequences_X, sequences_y = [], []
        for i in range(len(X) - sequence_length + 1):
            sequences_X.append(X[i:i + sequence_length])
            sequences_y.append(y[i + sequence_length - 1])
        return np.array(sequences_X), np.array(sequences_y)
    
    def lstm_objective(trial):
        sequence_length = trial.suggest_categorical('sequence_length', [5, 7, 14, 20, 30])
        
        # Create sequences
        X_seq, y_seq = create_sequences(X, y, sequence_length)
        
        # Split data into training, validation, and test sets
        TEST_SIZE = 0.2
        VAL_SIZE = 0.1
        RANDOM_STATE = 42
        
        X_train, X_temp, y_train, y_temp = train_test_split(X_seq, y_seq, test_size=TEST_SIZE + VAL_SIZE, random_state=RANDOM_STATE)
        X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=TEST_SIZE / (TEST_SIZE + VAL_SIZE), random_state=RANDOM_STATE)
        
        input_size = X_train.shape[2]  # Number of features
        hidden_size = trial.suggest_int('hidden_size', 16, 128)
        num_layers = trial.suggest_int('num_layers', 1, 3)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = LSTMModel(input_size, hidden_size, num_blocks, num_layers, dropout_rate, classification=True).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2))
        criterion = nn.CrossEntropyLoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.long).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.long).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_accuracy = accuracy_score(target_val.cpu(), val_pred.cpu())
            return val_accuracy  # Return accuracy directly
    
    study = optuna.create_study(direction='maximize')
    study.optimize(lstm_objective, n_trials=100)
    
    # Reshape X to match the best sequence length
    best_sequence_length = study.best_params['sequence_length']
    X_seq, y_seq = create_sequences(X, y, best_sequence_length)
    
    best_model = LSTMModel(X_seq.shape[2], study.best_params['hidden_size'], study.best_params['num_blocks'], study.best_params['num_layers'], study.best_params['dropout_rate'], classification=True).to(device)
    model_path = f'../models/pytorch/lstm-classification/{ticker}.pth'
    
    # Update ticker_df and save the best model
    metric_col = 'Best_LSTM_Classification_Accuracy'
    path_col = 'Best_LSTM_Classification_Path'
    
    if ticker in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value > current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_file_path, index=False)
            print(f"Best model for {ticker} saved with accuracy: {study.best_value}")
        else:
            print(f"Previous model accuracy: {current_score} is better for {ticker} than accuracy: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_file_path, index=False)
        print(f"Best model for {ticker} saved with accuracy: {study.best_value}")
        
    return ticker_df

In [10]:
def lstm_regression_hyperparameters_search(X, y, gpu_available, ticker, ticker_df, csv_file_path):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy().reshape(-1, 1)
    
    def create_sequences(X, y, sequence_length):
        sequences_X, sequences_y = [], []
        for i in range(len(X) - sequence_length + 1):
            sequences_X.append(X[i:i + sequence_length])
            sequences_y.append(y[i + sequence_length - 1])
        return np.array(sequences_X), np.array(sequences_y)
    
    def lstm_objective(trial):
        sequence_length = trial.suggest_categorical('sequence_length', [5, 7, 14, 20, 30])
        
        # Create sequences
        X_seq, y_seq = create_sequences(X, y, sequence_length)
        
        # Split data into training, validation, and test sets
        TEST_SIZE = 0.2
        VAL_SIZE = 0.1
        RANDOM_STATE = 42
        
        X_train, X_temp, y_train, y_temp = train_test_split(X_seq, y_seq, test_size=TEST_SIZE + VAL_SIZE, random_state=RANDOM_STATE)
        X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=TEST_SIZE / (TEST_SIZE + VAL_SIZE), random_state=RANDOM_STATE)
        
        input_size = X_train.shape[2]  # Number of features
        hidden_size = trial.suggest_int('hidden_size', 16, 128)
        num_layers = trial.suggest_int('num_layers', 1, 3)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = LSTMModel(input_size, hidden_size, num_blocks, num_layers, dropout_rate, classification=False).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2))
        criterion = nn.MSELoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.float32).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.float32).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_rmse = root_mean_squared_error(target_val.cpu(), val_pred.cpu())
            return val_rmse
    
    study = optuna.create_study(direction='minimize')
    study.optimize(lstm_objective, n_trials=100)
    
    # Reshape X to match the best sequence length
    best_sequence_length = study.best_params['sequence_length']
    X_seq, y_seq = create_sequences(X, y, best_sequence_length)
    
    best_model = LSTMModel(X_seq.shape[2], study.best_params['hidden_size'], study.best_params['num_blocks'], study.best_params['num_layers'], study.best_params['dropout_rate'], classification=False).to(device)
    model_path = f'../models/pytorch/lstm-regression/{ticker}.pth'
    
    # Update ticker_df and save the best model
    metric_col = 'Best_LSTM_Regression_RMSE'
    path_col = 'Best_LSTM_Regression_Path'
    
    if ticker in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value < current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_file_path, index=False)
            print(f"Best model for {ticker} saved with RMSE: {study.best_value}")
        else:
            print(f"Previous model RMSE: {current_score} is better for {ticker} than RMSE: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_file_path, index=False)
        print(f"Best model for {ticker} saved with RMSE: {study.best_value}")
        
    return ticker_df

In [11]:
class Transformer(nn.Module):
    def __init__(self, input_dim, num_heads, num_layers, num_classes=1, classification=True):
        super(Transformer, self).__init__()
        encoder_layer = nn.TransformerEncoderLayer(d_model=input_dim, nhead=num_heads, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(input_dim, num_classes)
        self.classification = classification

    def forward(self, x):
        out = self.transformer_encoder(x)
        out = out.mean(dim=1)  # Global average pooling
        out = self.fc(out)
        if self.classification:
            out = torch.softmax(out, dim=1)  # Use softmax for classification
        return out

In [12]:
def process_transformer_classification(X, y, gpu_available):
    device = torch.device("cuda" if gpu_available and torch.cuda.is_available() else "cpu")
    
    # Convert DataFrame to tensors
    X_tensor = torch.tensor(X.values, dtype=torch.float32)
    y_tensor = torch.tensor(y.values, dtype=torch.long)
    
    # Split data into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)
    
    # Create DataLoader
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_val, y_val)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    
    def objective(trial):
        input_dim = X.shape[1]
        num_heads = trial.suggest_int('num_heads', 2, 8)
        num_layers = trial.suggest_int('num_layers', 1, 4)
        lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
        
        # Ensure input_dim is divisible by num_heads
        while input_dim % num_heads != 0:
            num_heads = trial.suggest_int('num_heads', 2, 8)
        
        model = Transformer(input_dim=input_dim, num_heads=num_heads, num_layers=num_layers, num_classes=len(y.unique()), classification=True).to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=lr)
        
        # Training loop
        model.train()
        for epoch in range(10):  # Number of epochs can be a hyperparameter too
            for batch_X, batch_y in train_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
        
        # Validation loop
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                _, predicted = torch.max(outputs.data, 1)
                total += batch_y.size(0)
                correct += (predicted == batch_y).sum().item()
        
        accuracy = correct / total

        # Print trial information
        print(f"Trial {trial.number}: Value: {accuracy}, Params: {trial.params}")
        return accuracy
    
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=50)
    
    print("Best hyperparameters: ", study.best_params)
    print("Best accuracy: ", study.best_value)

In [14]:
for ticker in ticker_list:
    dataframe = pd.read_csv(f"../data/train/{ticker}.csv")
    X, y_classifier, y_regressor = preprocess_data(dataframe)
    ticker_df = conv1d_classification_hyperparameters_search(X, y_classifier, gpu_available, ticker, ticker_df, csv_file_path)
    ticker_df = conv1d_regression_hyperparameters_search(X, y_regressor, gpu_available, ticker, ticker_df, csv_file_path)
    ticker_df = lstm_classification_hyperparameters_search(X, y_classifier, gpu_available, ticker, ticker_df, csv_file_path)
    ticker_df = lstm_regression_hyperparameters_search(X, y_regressor, gpu_available, ticker, ticker_df, csv_file_path)
    #process_transformer_classification(X, y_classifier, gpu_available)
    #break

[I 2024-09-07 14:59:59,026] A new study created in memory with name: no-name-4fdc36d7-101c-4604-8768-45bc19a42828
  return F.conv1d(input, weight, bias, self.stride,
[I 2024-09-07 15:00:02,860] Trial 0 finished with value: 0.5070422535211268 and parameters: {'out_channels': 118, 'kernel_size': 6, 'num_blocks': 1, 'l2_lambda': 0.0077623328410624665, 'dropout_rate': 0.17303991198604662, 'lr': 0.002039600511548567}. Best is trial 0 with value: 0.5070422535211268.
[I 2024-09-07 15:00:03,347] Trial 1 finished with value: 0.49295774647887325 and parameters: {'out_channels': 102, 'kernel_size': 6, 'num_blocks': 2, 'l2_lambda': 0.004136309074126378, 'dropout_rate': 0.126096155206149, 'lr': 0.004535035387229719}. Best is trial 0 with value: 0.5070422535211268.
[I 2024-09-07 15:00:03,756] Trial 2 finished with value: 0.49295774647887325 and parameters: {'out_channels': 77, 'kernel_size': 6, 'num_blocks': 2, 'l2_lambda': 0.009395058659398844, 'dropout_rate': 0.30147091928242775, 'lr': 0.005373729

Best model for CL=F saved with accuracy: 0.5492957746478874


[I 2024-09-07 15:01:00,492] Trial 0 finished with value: 1.7351794372546436 and parameters: {'out_channels': 83, 'kernel_size': 4, 'num_blocks': 5, 'l2_lambda': 0.004847364693991008, 'dropout_rate': 0.27847087699605716, 'lr': 0.008777364533803293}. Best is trial 0 with value: 1.7351794372546436.
[I 2024-09-07 15:01:01,133] Trial 1 finished with value: 1.7351794372546436 and parameters: {'out_channels': 96, 'kernel_size': 6, 'num_blocks': 3, 'l2_lambda': 0.004149642989084174, 'dropout_rate': 0.25333944493034133, 'lr': 0.007196576853869302}. Best is trial 0 with value: 1.7351794372546436.
[I 2024-09-07 15:01:01,776] Trial 2 finished with value: 1.7351794372546436 and parameters: {'out_channels': 60, 'kernel_size': 6, 'num_blocks': 5, 'l2_lambda': 0.0013860872819551225, 'dropout_rate': 0.4752513880977832, 'lr': 0.009410259646068144}. Best is trial 0 with value: 1.7351794372546436.
[I 2024-09-07 15:01:02,239] Trial 3 finished with value: 1.7351794372546436 and parameters: {'out_channels': 

Best model for CL=F saved with RMSE: 1.7351794372546436


[I 2024-09-07 15:02:04,168] Trial 0 finished with value: 0.5882352941176471 and parameters: {'sequence_length': 14, 'hidden_size': 74, 'num_layers': 2, 'num_blocks': 4, 'dropout_rate': 0.38171314267555656, 'lr': 0.004338267891814119}. Best is trial 0 with value: 0.5882352941176471.
[I 2024-09-07 15:02:04,760] Trial 1 finished with value: 0.6060606060606061 and parameters: {'sequence_length': 20, 'hidden_size': 86, 'num_layers': 2, 'num_blocks': 4, 'dropout_rate': 0.3271372438965061, 'lr': 0.0026967470454218297}. Best is trial 1 with value: 0.6060606060606061.
[I 2024-09-07 15:02:05,496] Trial 2 finished with value: 0.5142857142857142 and parameters: {'sequence_length': 7, 'hidden_size': 27, 'num_layers': 3, 'num_blocks': 4, 'dropout_rate': 0.2177776076274225, 'lr': 0.003947449935531616}. Best is trial 1 with value: 0.6060606060606061.
[I 2024-09-07 15:02:06,095] Trial 3 finished with value: 0.45714285714285713 and parameters: {'sequence_length': 7, 'hidden_size': 84, 'num_layers': 2, '

Best model for CL=F saved with accuracy: 0.6571428571428571


[I 2024-09-07 15:02:53,542] Trial 0 finished with value: 1.239505847179434 and parameters: {'sequence_length': 30, 'hidden_size': 71, 'num_layers': 3, 'num_blocks': 3, 'dropout_rate': 0.3736421629741522, 'lr': 0.0028033497339538145}. Best is trial 0 with value: 1.239505847179434.
[I 2024-09-07 15:02:54,138] Trial 1 finished with value: 1.131356216342103 and parameters: {'sequence_length': 14, 'hidden_size': 36, 'num_layers': 3, 'num_blocks': 3, 'dropout_rate': 0.45961067493719343, 'lr': 0.005855191901556003}. Best is trial 1 with value: 1.131356216342103.
[I 2024-09-07 15:02:54,352] Trial 2 finished with value: 1.4897425962016029 and parameters: {'sequence_length': 7, 'hidden_size': 50, 'num_layers': 1, 'num_blocks': 1, 'dropout_rate': 0.45330886808029947, 'lr': 0.004197620471330745}. Best is trial 1 with value: 1.131356216342103.
[I 2024-09-07 15:02:54,905] Trial 3 finished with value: 1.131356216342103 and parameters: {'sequence_length': 14, 'hidden_size': 23, 'num_layers': 2, 'num_b

Best model for CL=F saved with RMSE: 1.131356216342103


[I 2024-09-07 15:03:51,449] Trial 0 finished with value: 0.5492957746478874 and parameters: {'out_channels': 78, 'kernel_size': 6, 'num_blocks': 4, 'l2_lambda': 0.0001878385021155933, 'dropout_rate': 0.3815438999206232, 'lr': 0.003760132288130997}. Best is trial 0 with value: 0.5492957746478874.
[I 2024-09-07 15:03:51,685] Trial 1 finished with value: 0.4507042253521127 and parameters: {'out_channels': 64, 'kernel_size': 5, 'num_blocks': 1, 'l2_lambda': 0.009237532919413646, 'dropout_rate': 0.24110586125132089, 'lr': 0.00702420619786901}. Best is trial 0 with value: 0.5492957746478874.
[I 2024-09-07 15:03:52,076] Trial 2 finished with value: 0.4507042253521127 and parameters: {'out_channels': 41, 'kernel_size': 6, 'num_blocks': 2, 'l2_lambda': 0.006295079862610599, 'dropout_rate': 0.3499997357626594, 'lr': 0.0017973244803308603}. Best is trial 0 with value: 0.5492957746478874.
[I 2024-09-07 15:03:52,316] Trial 3 finished with value: 0.5492957746478874 and parameters: {'out_channels': 1

Best model for NVDA saved with accuracy: 0.5492957746478874


[I 2024-09-07 15:04:59,106] Trial 0 finished with value: 1.5966128023133297 and parameters: {'out_channels': 94, 'kernel_size': 5, 'num_blocks': 4, 'l2_lambda': 0.007620370310355886, 'dropout_rate': 0.434999487347827, 'lr': 0.001300652680714282}. Best is trial 0 with value: 1.5966128023133297.
[I 2024-09-07 15:04:59,644] Trial 1 finished with value: 1.5966128023133297 and parameters: {'out_channels': 34, 'kernel_size': 7, 'num_blocks': 4, 'l2_lambda': 0.005327106855989134, 'dropout_rate': 0.26467374146664546, 'lr': 0.0066614129582946}. Best is trial 0 with value: 1.5966128023133297.
[I 2024-09-07 15:05:00,458] Trial 2 finished with value: 1.5966128023133297 and parameters: {'out_channels': 98, 'kernel_size': 3, 'num_blocks': 4, 'l2_lambda': 0.008070580621889412, 'dropout_rate': 0.3037250019588618, 'lr': 0.0005018148463020436}. Best is trial 0 with value: 1.5966128023133297.
[I 2024-09-07 15:05:01,154] Trial 3 finished with value: 1.5966128023133297 and parameters: {'out_channels': 107,

Best model for NVDA saved with RMSE: 1.5966128023133297


[I 2024-09-07 15:06:03,071] Trial 0 finished with value: 0.5714285714285714 and parameters: {'sequence_length': 5, 'hidden_size': 41, 'num_layers': 1, 'num_blocks': 1, 'dropout_rate': 0.14212464236937905, 'lr': 0.006800589588484554}. Best is trial 0 with value: 0.5714285714285714.
[I 2024-09-07 15:06:03,357] Trial 1 finished with value: 0.47058823529411764 and parameters: {'sequence_length': 14, 'hidden_size': 75, 'num_layers': 3, 'num_blocks': 1, 'dropout_rate': 0.30675940129162826, 'lr': 0.005088138167295136}. Best is trial 0 with value: 0.5714285714285714.
[I 2024-09-07 15:06:03,820] Trial 2 finished with value: 0.6285714285714286 and parameters: {'sequence_length': 5, 'hidden_size': 30, 'num_layers': 2, 'num_blocks': 3, 'dropout_rate': 0.43793348858601966, 'lr': 0.0008066973759683203}. Best is trial 2 with value: 0.6285714285714286.
[I 2024-09-07 15:06:04,191] Trial 3 finished with value: 0.5294117647058824 and parameters: {'sequence_length': 7, 'hidden_size': 97, 'num_layers': 1, 

Best model for NVDA saved with accuracy: 0.6857142857142857


[I 2024-09-07 15:06:58,017] Trial 0 finished with value: 2.0317481427547914 and parameters: {'sequence_length': 5, 'hidden_size': 70, 'num_layers': 2, 'num_blocks': 1, 'dropout_rate': 0.10386821158186513, 'lr': 0.0031809966474727093}. Best is trial 0 with value: 2.0317481427547914.
[I 2024-09-07 15:06:58,382] Trial 1 finished with value: 1.0408448421175824 and parameters: {'sequence_length': 7, 'hidden_size': 71, 'num_layers': 1, 'num_blocks': 3, 'dropout_rate': 0.3938632919921603, 'lr': 0.00816901499377091}. Best is trial 1 with value: 1.0408448421175824.
[I 2024-09-07 15:06:58,644] Trial 2 finished with value: 2.0317481427547914 and parameters: {'sequence_length': 5, 'hidden_size': 50, 'num_layers': 1, 'num_blocks': 2, 'dropout_rate': 0.49262085441171355, 'lr': 0.0020450687868537737}. Best is trial 1 with value: 1.0408448421175824.
[I 2024-09-07 15:06:58,995] Trial 3 finished with value: 1.0545086464154376 and parameters: {'sequence_length': 30, 'hidden_size': 44, 'num_layers': 1, 'n

Best model for NVDA saved with RMSE: 1.0408448421175824


[I 2024-09-07 15:07:48,515] Trial 0 finished with value: 0.5 and parameters: {'out_channels': 76, 'kernel_size': 3, 'num_blocks': 4, 'l2_lambda': 0.0022361575870950444, 'dropout_rate': 0.29996285310771575, 'lr': 0.0004566472708576548}. Best is trial 0 with value: 0.5.
[I 2024-09-07 15:07:48,752] Trial 1 finished with value: 0.5135135135135135 and parameters: {'out_channels': 74, 'kernel_size': 6, 'num_blocks': 1, 'l2_lambda': 0.0032358752865164074, 'dropout_rate': 0.42984607008287634, 'lr': 0.0007135599079368936}. Best is trial 1 with value: 0.5135135135135135.
[I 2024-09-07 15:07:49,523] Trial 2 finished with value: 0.4864864864864865 and parameters: {'out_channels': 123, 'kernel_size': 3, 'num_blocks': 3, 'l2_lambda': 0.002418935012909276, 'dropout_rate': 0.1943258931293139, 'lr': 0.003794749925087245}. Best is trial 1 with value: 0.5135135135135135.
[I 2024-09-07 15:07:50,365] Trial 3 finished with value: 0.4864864864864865 and parameters: {'out_channels': 109, 'kernel_size': 7, 'nu

Best model for SGDUSD=X saved with accuracy: 0.6081081081081081


[I 2024-09-07 15:08:29,147] Trial 0 finished with value: 0.0018248550832419164 and parameters: {'out_channels': 48, 'kernel_size': 4, 'num_blocks': 3, 'l2_lambda': 0.009547516277917055, 'dropout_rate': 0.42162055596309467, 'lr': 0.006418050676558291}. Best is trial 0 with value: 0.0018248550832419164.
[I 2024-09-07 15:08:29,919] Trial 1 finished with value: 0.0018248550832419164 and parameters: {'out_channels': 69, 'kernel_size': 6, 'num_blocks': 4, 'l2_lambda': 0.004247376150603338, 'dropout_rate': 0.13642081434078587, 'lr': 0.007880688897084999}. Best is trial 0 with value: 0.0018248550832419164.
[I 2024-09-07 15:08:30,486] Trial 2 finished with value: 0.0018248550832419164 and parameters: {'out_channels': 59, 'kernel_size': 5, 'num_blocks': 4, 'l2_lambda': 0.009351334413310831, 'dropout_rate': 0.4755512764418629, 'lr': 0.0018316619873867113}. Best is trial 0 with value: 0.0018248550832419164.
[I 2024-09-07 15:08:30,925] Trial 3 finished with value: 0.0018248550832419164 and paramete

Best model for SGDUSD=X saved with RMSE: 0.0018248550832419164


[I 2024-09-07 15:09:39,446] Trial 0 finished with value: 0.4411764705882353 and parameters: {'sequence_length': 30, 'hidden_size': 39, 'num_layers': 2, 'num_blocks': 1, 'dropout_rate': 0.39767568232233297, 'lr': 0.0043737108760287255}. Best is trial 0 with value: 0.4411764705882353.
[I 2024-09-07 15:09:40,168] Trial 1 finished with value: 0.4857142857142857 and parameters: {'sequence_length': 14, 'hidden_size': 119, 'num_layers': 3, 'num_blocks': 4, 'dropout_rate': 0.1002828546215076, 'lr': 0.005566712611505978}. Best is trial 1 with value: 0.4857142857142857.
[I 2024-09-07 15:09:40,629] Trial 2 finished with value: 0.5 and parameters: {'sequence_length': 30, 'hidden_size': 20, 'num_layers': 3, 'num_blocks': 2, 'dropout_rate': 0.3546655826526828, 'lr': 0.0024047411426532327}. Best is trial 2 with value: 0.5.
[I 2024-09-07 15:09:41,009] Trial 3 finished with value: 0.4166666666666667 and parameters: {'sequence_length': 7, 'hidden_size': 126, 'num_layers': 1, 'num_blocks': 3, 'dropout_ra

Best model for SGDUSD=X saved with accuracy: 0.7058823529411765


[I 2024-09-07 15:10:48,110] Trial 0 finished with value: 0.0018685091756578448 and parameters: {'sequence_length': 20, 'hidden_size': 122, 'num_layers': 2, 'num_blocks': 4, 'dropout_rate': 0.47679864882424716, 'lr': 0.0011091499197808342}. Best is trial 0 with value: 0.0018685091756578448.
[I 2024-09-07 15:10:48,560] Trial 1 finished with value: 0.0018685091756578448 and parameters: {'sequence_length': 20, 'hidden_size': 87, 'num_layers': 3, 'num_blocks': 2, 'dropout_rate': 0.13273707107876948, 'lr': 0.009858391469918302}. Best is trial 0 with value: 0.0018685091756578448.
[I 2024-09-07 15:10:49,407] Trial 2 finished with value: 0.0023277662390990186 and parameters: {'sequence_length': 14, 'hidden_size': 64, 'num_layers': 3, 'num_blocks': 5, 'dropout_rate': 0.3572890417690223, 'lr': 0.005540692097233475}. Best is trial 0 with value: 0.0018685091756578448.
[I 2024-09-07 15:10:49,667] Trial 3 finished with value: 0.0023277662390990186 and parameters: {'sequence_length': 14, 'hidden_size'

Best model for SGDUSD=X saved with RMSE: 0.0015725821722058916


[I 2024-09-07 15:11:52,393] Trial 0 finished with value: 0.4594594594594595 and parameters: {'out_channels': 28, 'kernel_size': 4, 'num_blocks': 1, 'l2_lambda': 0.005051338088907942, 'dropout_rate': 0.12833519237117097, 'lr': 0.0011988816877774356}. Best is trial 0 with value: 0.4594594594594595.
[I 2024-09-07 15:11:53,102] Trial 1 finished with value: 0.5135135135135135 and parameters: {'out_channels': 108, 'kernel_size': 5, 'num_blocks': 3, 'l2_lambda': 0.009855821470545418, 'dropout_rate': 0.21794923386618278, 'lr': 0.007583517626963757}. Best is trial 1 with value: 0.5135135135135135.
[I 2024-09-07 15:11:53,842] Trial 2 finished with value: 0.5135135135135135 and parameters: {'out_channels': 61, 'kernel_size': 5, 'num_blocks': 5, 'l2_lambda': 0.0014227918175082504, 'dropout_rate': 0.39031162891208215, 'lr': 0.009297399601603399}. Best is trial 1 with value: 0.5135135135135135.
[I 2024-09-07 15:11:54,344] Trial 3 finished with value: 0.4864864864864865 and parameters: {'out_channels

Best model for USDSGD=X saved with accuracy: 0.5540540540540541


[I 2024-09-07 15:12:49,938] Trial 0 finished with value: 0.0032912780826539973 and parameters: {'out_channels': 39, 'kernel_size': 4, 'num_blocks': 2, 'l2_lambda': 0.007320088288435464, 'dropout_rate': 0.4050340243661409, 'lr': 0.002791787915107887}. Best is trial 0 with value: 0.0032912780826539973.
[I 2024-09-07 15:12:50,467] Trial 1 finished with value: 0.0032912780826539973 and parameters: {'out_channels': 44, 'kernel_size': 5, 'num_blocks': 3, 'l2_lambda': 0.007311020554779732, 'dropout_rate': 0.48316477093358234, 'lr': 0.006163520591388551}. Best is trial 0 with value: 0.0032912780826539973.
[I 2024-09-07 15:12:51,961] Trial 2 finished with value: 0.0032912780826539973 and parameters: {'out_channels': 125, 'kernel_size': 4, 'num_blocks': 5, 'l2_lambda': 0.0009257635807558144, 'dropout_rate': 0.20065381622900766, 'lr': 0.007863161180030775}. Best is trial 0 with value: 0.0032912780826539973.
[I 2024-09-07 15:12:52,158] Trial 3 finished with value: 0.0032912780826539973 and paramet

Best model for USDSGD=X saved with RMSE: 0.0032912780826539973


[I 2024-09-07 15:13:59,329] Trial 0 finished with value: 0.5142857142857142 and parameters: {'sequence_length': 14, 'hidden_size': 16, 'num_layers': 1, 'num_blocks': 3, 'dropout_rate': 0.23741639035512146, 'lr': 0.009062646734629379}. Best is trial 0 with value: 0.5142857142857142.
[I 2024-09-07 15:13:59,528] Trial 1 finished with value: 0.5 and parameters: {'sequence_length': 30, 'hidden_size': 80, 'num_layers': 1, 'num_blocks': 1, 'dropout_rate': 0.16416581543695086, 'lr': 0.009745610945727678}. Best is trial 0 with value: 0.5142857142857142.
[I 2024-09-07 15:13:59,981] Trial 2 finished with value: 0.37142857142857144 and parameters: {'sequence_length': 20, 'hidden_size': 66, 'num_layers': 1, 'num_blocks': 4, 'dropout_rate': 0.4820348117917327, 'lr': 0.0066041452724889165}. Best is trial 0 with value: 0.5142857142857142.
[I 2024-09-07 15:14:00,749] Trial 3 finished with value: 0.42857142857142855 and parameters: {'sequence_length': 14, 'hidden_size': 17, 'num_layers': 3, 'num_blocks'

Best model for USDSGD=X saved with accuracy: 0.7714285714285715


[I 2024-09-07 15:15:10,593] Trial 0 finished with value: 0.0034548008560613395 and parameters: {'sequence_length': 30, 'hidden_size': 55, 'num_layers': 3, 'num_blocks': 3, 'dropout_rate': 0.45249199429272247, 'lr': 0.0014584558410594198}. Best is trial 0 with value: 0.0034548008560613395.
[I 2024-09-07 15:15:11,116] Trial 1 finished with value: 0.0028111993613357793 and parameters: {'sequence_length': 7, 'hidden_size': 116, 'num_layers': 3, 'num_blocks': 2, 'dropout_rate': 0.1979049917824953, 'lr': 0.007652837818724286}. Best is trial 1 with value: 0.0028111993613357793.
[I 2024-09-07 15:15:11,550] Trial 2 finished with value: 0.003387947906871176 and parameters: {'sequence_length': 20, 'hidden_size': 124, 'num_layers': 2, 'num_blocks': 2, 'dropout_rate': 0.3347070222267552, 'lr': 0.0025073198537943616}. Best is trial 1 with value: 0.0028111993613357793.
[I 2024-09-07 15:15:11,882] Trial 3 finished with value: 0.003387947906871176 and parameters: {'sequence_length': 20, 'hidden_size': 

Best model for USDSGD=X saved with RMSE: 0.0028111993613357793
