In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
from typing import Optional, Union, Tuple, List
import json
import pickle

pd.set_option('display.max_columns', None)
from sklearn.model_selection import GroupKFold

import torch 
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader,TensorDataset
from torch.optim.lr_scheduler import OneCycleLR

import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint


print(f"PyTorch version: {torch.__version__}")
print(f"PyTorch Lightning version: {pl.__version__}")

import optuna
from optuna.visualization import (
    plot_edf
    , plot_optimization_history
    , plot_parallel_coordinate
    , plot_param_importances
    , plot_slice
)

# local modules
import sys
sys.path.append("../src")
from preproc import process_train_data, process_test_data
from transformer import LGBMLeavesEncoder

PyTorch version: 2.4.1
PyTorch Lightning version: 2.4.0


In [2]:
from lightning.pytorch import seed_everything
seed_everything(2112, workers=True);

Seed set to 2112


In [3]:
# useful callbacks
class LearningRateMonitor(pl.Callback):
    def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
        if batch_idx % 100 == 0:  # Log every 100 batches
            lr = pl_module.optimizers().param_groups[0]['lr']
            pl_module.log('learning_rate', lr, prog_bar=True)   

class BestValRMSELogger(pl.Callback):
    def __init__(self):
        super().__init__()
        self.best_val_rmse = float('inf')

    def on_validation_epoch_end(self, trainer, pl_module):
        current_val_rmse = trainer.callback_metrics.get('val_rmse')
        if current_val_rmse is not None:
            self.best_val_rmse = min(self.best_val_rmse, current_val_rmse)
            pl_module.log('best_val_rmse', self.best_val_rmse, prog_bar=True)

***
### load and preprocess data

In [4]:
# define some paths
path_raw = Path("../data/raw")
path_processed = Path("../data/processed")
path_results = Path("../data/results")

# load data
df_train = pd.read_csv(path_raw / "train.csv")
df_test = pd.read_csv(path_raw / "test.csv")

df_train

Unnamed: 0,Id,GameRulesetName,agent1,agent2,Properties,Format,Time,Discrete,Realtime,Turns,...,DoLudeme,Trigger,PlayoutsPerSecond,MovesPerSecond,EnglishRules,LudRules,num_wins_agent1,num_draws_agent1,num_losses_agent1,utility_agent1
0,0,00Y,MCTS-ProgressiveHistory-0.1-MAST-false,MCTS-ProgressiveHistory-0.6-Random200-false,1,1,1,1,0,1,...,0,1,298.07,18877.17,Goal: Connect all three edge colors with a sin...,"(game ""00'Y'"" (players 2) (equipment { (board ...",4,0,11,-0.466667
1,1,00Y,MCTS-ProgressiveHistory-0.1-MAST-false,MCTS-UCB1GRAVE-0.6-NST-true,1,1,1,1,0,1,...,0,1,298.07,18877.17,Goal: Connect all three edge colors with a sin...,"(game ""00'Y'"" (players 2) (equipment { (board ...",5,0,10,-0.333333
2,2,00Y,MCTS-ProgressiveHistory-0.1-MAST-true,MCTS-UCB1-0.1-NST-false,1,1,1,1,0,1,...,0,1,298.07,18877.17,Goal: Connect all three edge colors with a sin...,"(game ""00'Y'"" (players 2) (equipment { (board ...",7,0,8,-0.066667
3,3,00Y,MCTS-ProgressiveHistory-0.1-MAST-true,MCTS-UCB1-0.6-NST-false,1,1,1,1,0,1,...,0,1,298.07,18877.17,Goal: Connect all three edge colors with a sin...,"(game ""00'Y'"" (players 2) (equipment { (board ...",5,0,10,-0.333333
4,4,00Y,MCTS-ProgressiveHistory-0.1-MAST-true,MCTS-UCB1GRAVE-1.41421356237-NST-false,1,1,1,1,0,1,...,0,1,298.07,18877.17,Goal: Connect all three edge colors with a sin...,"(game ""00'Y'"" (players 2) (equipment { (board ...",5,0,10,-0.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
233229,233229,Zuz_Mel_7x7,MCTS-UCB1Tuned-1.41421356237-NST-false,MCTS-ProgressiveHistory-1.41421356237-Random20...,1,1,1,1,0,1,...,0,0,157.52,157174.58,7x7 board. 24 pieces per player. Pieces begin ...,"(game ""Zuz Mel (7x7)"" (players 2) (equipment {...",2,0,13,-0.733333
233230,233230,Zuz_Mel_7x7,MCTS-UCB1Tuned-1.41421356237-Random200-false,MCTS-UCB1-0.6-MAST-false,1,1,1,1,0,1,...,0,0,157.52,157174.58,7x7 board. 24 pieces per player. Pieces begin ...,"(game ""Zuz Mel (7x7)"" (players 2) (equipment {...",9,1,5,0.266667
233231,233231,Zuz_Mel_7x7,MCTS-UCB1Tuned-1.41421356237-Random200-false,MCTS-UCB1GRAVE-1.41421356237-NST-false,1,1,1,1,0,1,...,0,0,157.52,157174.58,7x7 board. 24 pieces per player. Pieces begin ...,"(game ""Zuz Mel (7x7)"" (players 2) (equipment {...",11,3,1,0.666667
233232,233232,Zuz_Mel_7x7,MCTS-UCB1Tuned-1.41421356237-Random200-false,MCTS-UCB1GRAVE-1.41421356237-NST-true,1,1,1,1,0,1,...,0,0,157.52,157174.58,7x7 board. 24 pieces per player. Pieces begin ...,"(game ""Zuz Mel (7x7)"" (players 2) (equipment {...",24,2,4,0.666667


In [5]:
# Load the feature selection results
fs_type = 'uni95'

In [6]:
fs_mapping = {
    "full": None,
    "fsv2": '../feat_selection/select_optuna_lgbm_v2.json',
    "fsv3": '../feat_selection/select_optuna_catb.json',
    "fsv4": '../feat_selection/select_optuna_lgbm_v3.json',
    "fsv23": '../feat_selection/select_optuna_combined_v23.json',
    "fsv24": '../feat_selection/select_optuna_combined_v24.json',
    "fsv34": '../feat_selection/select_optuna_combined_v34.json',
    "int95": '../feat_selection/feat_selection_intersection_at_95.json',
    "int96": '../feat_selection/feat_selection_intersection_at_96.json',
    "int97": '../feat_selection/feat_selection_intersection_at_97.json',
    "int98": '../feat_selection/feat_selection_intersection_at_98.json',
    "int99": '../feat_selection/feat_selection_intersection_at_99.json',
    "uni80": '../feat_selection/feat_selection_union_at_80.json',
    "uni85": '../feat_selection/feat_selection_union_at_85.json',
    "uni90": '../feat_selection/feat_selection_union_at_90.json',
    "uni95": '../feat_selection/feat_selection_union_at_95.json',
}

fs_path = fs_mapping[fs_type]

if fs_path is None:
    feature_selection = dict()
else:
    with open(fs_path, 'r') as f:
        feature_selection = json.load(f)

# Extract the selected features
numerical_cols = feature_selection.get('numerical', None)
categorical_cols = feature_selection.get('categorical', None)

# text_cols = ["LudRules",]
text_cols = list()

print("Numerical features:", len(numerical_cols) if numerical_cols else 0)
print("Categorical features:", len(categorical_cols) if categorical_cols else 0)
print("Text features:", len(text_cols) if text_cols else 0)

Numerical features: 396
Categorical features: 10
Text features: 0


In [7]:
df_train, numerical_cols, categorical_cols, encoder, scaler = process_train_data(
    df_train,
    scale=True,
    scale_type="minmax",
    numerical_cols=numerical_cols,
    categorical_cols=categorical_cols,
    include_position_features=False,
    include_text_features=False,
)

# Print the results
print("Numerical Columns:", len(numerical_cols))
print("Categorical Columns:", len(categorical_cols))

number of all nan cols:  0
number of constant cols:  0
Numerical Columns: 396
Categorical Columns: 10


In [8]:
cat_input_dims = df_train[categorical_cols].nunique(axis=0).values.tolist()
print(cat_input_dims)

[2, 3, 3, 3, 3, 72, 4, 4, 2, 72]


In [9]:
# split1 = pickle.load(open('../data/splits/cv1_Game.pkl', 'rb'))
# split2 = pickle.load(open('../data/splits/cv2_Game.pkl', 'rb'))
# split3 = pickle.load(open('../data/splits/cv3_Game.pkl', 'rb'))
split4 = pickle.load(open('../data/splits/cv4_Game.pkl', 'rb'))


***
### train model


In [10]:
class SoftOrdering1DCNN(pl.LightningModule):

    def __init__(self, 
            num_input_dim: int,
            cat_input_dims: list[int],
            output_dim: int,
            sign_size: int = 32,
            cha_input: int = 16, 
            cha_hidden: int = 32,
            K: int = 2,
            dropout_input: float = 0.2,
            dropout_hidden: float = 0.2, 
            dropout_output: float = 0.2,
            embedding_dropout: float = 0.2,
            learning_rate: float = 1e-3,
            weight_decay: float = 1e-5,
            embedding_dim: Optional[List[int]] = None,
            pct_start: float = 0.2,
            div_factor: float = 10.0,
            final_div_factor: float = 1e4):
        super().__init__()
        self.save_hyperparameters()

        # Initialize embedding dimensions if not provided
        if embedding_dim is None:
            embedding_dim = [min(50, int(1 + np.ceil(np.sqrt(dim)))) for dim in cat_input_dims]
        elif len(embedding_dim) != len(cat_input_dims):
            raise ValueError("Length of embedding_dim must match number of categorical features.")
        
        self.embedding_dim = embedding_dim
        self.embedding_dropout = embedding_dropout
        
        # Create embedding layers
        self.embeddings = nn.ModuleList(
            [nn.Embedding(dim, emb_dim) for dim, emb_dim in zip(cat_input_dims, embedding_dim)]
        )
        self.embedding_dropout_layer = nn.Dropout(self.embedding_dropout)

        # Calculate total input dimension after embeddings
        total_embedding_dim = sum(self.embedding_dim)
        total_input_dim = num_input_dim + total_embedding_dim

        # CNN architecture parameters
        hidden_size = sign_size * cha_input
        self.sign_size1 = sign_size
        self.sign_size2 = sign_size//2
        self.output_size = (sign_size//4) * cha_hidden
        self.cha_input = cha_input
        self.cha_hidden = cha_hidden
        self.K = K

        # Input projection
        self.batch_norm1 = nn.BatchNorm1d(total_input_dim)
        self.dropout1 = nn.Dropout(dropout_input)
        dense1 = nn.Linear(total_input_dim, hidden_size, bias=False)
        self.dense1 = nn.utils.weight_norm(dense1)

        # 1st conv layer
        self.batch_norm_c1 = nn.BatchNorm1d(cha_input)
        conv1 = nn.Conv1d(
            cha_input, 
            cha_input*K, 
            kernel_size=5, 
            stride=1, 
            padding=2,  
            groups=cha_input, 
            bias=False)
        self.conv1 = nn.utils.weight_norm(conv1, dim=None)
        self.ave_po_c1 = nn.AdaptiveAvgPool1d(output_size=self.sign_size2)

        # 2nd conv layer
        self.batch_norm_c2 = nn.BatchNorm1d(cha_input*K)
        self.dropout_c2 = nn.Dropout(dropout_hidden)
        conv2 = nn.Conv1d(
            cha_input*K, 
            cha_hidden, 
            kernel_size=3, 
            stride=1, 
            padding=1, 
            bias=False)
        self.conv2 = nn.utils.weight_norm(conv2, dim=None)

        # 3rd conv layer
        self.batch_norm_c3 = nn.BatchNorm1d(cha_hidden)
        self.dropout_c3 = nn.Dropout(dropout_hidden)
        conv3 = nn.Conv1d(
            cha_hidden, 
            cha_hidden, 
            kernel_size=3, 
            stride=1, 
            padding=1, 
            bias=False)
        self.conv3 = nn.utils.weight_norm(conv3, dim=None)

        # 4th conv layer
        self.batch_norm_c4 = nn.BatchNorm1d(cha_hidden)
        conv4 = nn.Conv1d(
            cha_hidden, 
            cha_hidden, 
            kernel_size=5, 
            stride=1, 
            padding=2, 
            groups=cha_hidden, 
            bias=False)
        self.conv4 = nn.utils.weight_norm(conv4, dim=None)

        self.avg_po_c4 = nn.AvgPool1d(kernel_size=4, stride=2, padding=1)
        self.flt = nn.Flatten()

        # Output head
        self.batch_norm2 = nn.BatchNorm1d(self.output_size)
        self.dropout2 = nn.Dropout(dropout_output)
        dense2 = nn.Linear(self.output_size, output_dim, bias=False)
        self.dense2 = nn.utils.weight_norm(dense2)

        # Training parameters
        self.learning_rate = learning_rate
        self.weight_decay = weight_decay
        self.pct_start = pct_start
        self.div_factor = div_factor
        self.final_div_factor = final_div_factor

        # Initialize lists to store validation outputs
        self.validation_targets = []
        self.validation_predictions = []

    def forward(self, x_num, x_cat):
        # Process categorical variables
        embedded = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        embedded = torch.cat(embedded, dim=1)
        embedded = self.embedding_dropout_layer(embedded)
        
        # Concatenate numerical and embedded categorical features
        x = torch.cat([x_num, embedded], dim=1)

        # Input projection
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = nn.functional.celu(self.dense1(x))

        # Reshape for CNN
        x = x.reshape(x.shape[0], self.cha_input, self.sign_size1)

        # CNN backbone
        x = self.batch_norm_c1(x)
        x = nn.functional.leaky_relu(self.conv1(x))
        x = self.ave_po_c1(x)

        x = self.batch_norm_c2(x)
        x = self.dropout_c2(x)
        x = nn.functional.leaky_relu(self.conv2(x))
        x_s = x

        x = self.batch_norm_c3(x)
        x = self.dropout_c3(x)
        x = nn.functional.leaky_relu(self.conv3(x))

        x = self.batch_norm_c4(x)
        x = self.conv4(x)
        x = x + x_s
        x = nn.functional.leaky_relu(x)

        x = self.avg_po_c4(x)
        x = self.flt(x)

        # Output head
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = self.dense2(x)
        x = nn.functional.hardtanh(x)

        return x.squeeze(-1)

    def training_step(self, batch, batch_idx):
        x_num, x_cat, y = batch
        y_hat = self(x_num, x_cat)
        loss = F.mse_loss(y_hat, y)
        self.log('train_loss', loss, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x_num, x_cat, y = batch
        y_hat = self(x_num, x_cat)
        loss = F.mse_loss(y_hat, y)
        self.log('valid_loss', loss, prog_bar=True)
        # Store targets and predictions for later use
        self.validation_targets.append(y)
        self.validation_predictions.append(y_hat)
        return loss
    
    def predict_step(self, batch, batch_idx):
        if len(batch) == 2:
            x_num, x_cat = batch
        elif len(batch) == 3:
            x_num, x_cat, _ = batch
        y_hat = self(x_num, x_cat)
        return y_hat

    def on_validation_epoch_end(self):
        # Concatenate all targets and predictions
        y = torch.cat(self.validation_targets)
        y_hat = torch.cat(self.validation_predictions)
        rmse = torch.sqrt(F.mse_loss(y_hat, y))
        self.log('val_rmse', rmse, prog_bar=True)
        # Clear the lists for next epoch
        self.validation_targets.clear()
        self.validation_predictions.clear()
                
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(
            self.parameters(), 
            lr=self.learning_rate, 
            weight_decay=self.weight_decay,
        )
        scheduler = OneCycleLR(
            optimizer,
            max_lr=self.learning_rate,
            total_steps=self.trainer.estimated_stepping_batches,
            pct_start=self.pct_start,
            div_factor=self.div_factor,
            final_div_factor=self.final_div_factor,
            anneal_strategy='cos',
            cycle_momentum=True,
            base_momentum=0.85,
            max_momentum=0.95,
        )
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "interval": "step",
            },
        }

In [11]:
def train_and_score(
        # training params
        batch_size=512,
        embedding_dropout=0.2,
        learning_rate=5e-3,
        weight_decay=1e-5,
        pct_start=0.05,
        div_factor=100,
        final_div_factor=100,
        max_epochs=100,
        # model params
        sign_size=16,
        cha_input=64,
        cha_hidden=32,
        K=2,
        dropout_input=0.2,
        dropout_hidden=0.1,
        dropout_output=0.1,
    ):
    """Train multiple 1DCNN models using cross-validation and return mean RMSE score.
    
    Args:
        batch_size: Batch size for training
        embedding_dropout: Dropout rate for embeddings
        learning_rate: Learning rate
        weight_decay: Weight decay
        pct_start: Percentage of training to increase learning rate
        div_factor: Initial learning rate division factor
        final_div_factor: Final learning rate division factor
        max_epochs: Maximum number of epochs
        sign_size: Size of sign embeddings
        cha_input: Number of input channels
        cha_hidden: Number of hidden channels
        K: Number of CNN layers
        dropout_input: Input dropout rate
        dropout_hidden: Hidden layer dropout rate
        dropout_output: Output dropout rate
        
    Returns:
        float: Mean RMSE score across folds
    """
    oof_scores = []

    # Perform cross-validation
    for fold, (train_index, val_index) in enumerate(split4[:3], 1):
        # Split the data
        X_train = df_train.iloc[train_index][numerical_cols + categorical_cols]
        y_train = df_train.iloc[train_index]['utility_agent1']
        X_valid = df_train.iloc[val_index][numerical_cols + categorical_cols]
        y_valid = df_train.iloc[val_index]['utility_agent1']

        lgbm_encoder = LGBMLeavesEncoder(
            num_cols=numerical_cols,
            cat_cols=categorical_cols,
            task='regression',
            n_estimators=100,
            num_leaves=63,
            random_state=2112,
            verbosity=-1,
        )
        lgbm_encoder.fit(X_train, y_train)
        X_train_leaves_lgbm = lgbm_encoder.transform(X_train, verbose=False)
        X_valid_leaves_lgbm = lgbm_encoder.transform(X_valid)
        
        X_train = pd.concat([X_train, X_train_leaves_lgbm], axis=1)
        X_valid = pd.concat([X_valid, X_valid_leaves_lgbm], axis=1) 

        _categorical_cols = (
            categorical_cols + lgbm_encoder.new_columns
        )
        _cat_input_dims = (
            df_train[categorical_cols].nunique(axis=0).values.tolist() + [63]*len(lgbm_encoder.new_columns)
        )

        train_dataset = TensorDataset(
            torch.tensor(X_train[numerical_cols].values, dtype=torch.float32),
            torch.tensor(X_train[_categorical_cols].values, dtype=torch.int32),
            torch.tensor(y_train.values, dtype=torch.float32)
        )
        train_loader = DataLoader(
            train_dataset,
            batch_size=batch_size,
            shuffle=True,
            num_workers=8,
            persistent_workers=True
        )

        valid_dataset = TensorDataset(
            torch.tensor(X_valid[numerical_cols].values, dtype=torch.float32),
            torch.tensor(X_valid[_categorical_cols].values, dtype=torch.int32),
            torch.tensor(y_valid.values, dtype=torch.float32)
        )
        valid_loader = DataLoader(
            valid_dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=8,
            persistent_workers=True
        )

        model = SoftOrdering1DCNN(
            num_input_dim=len(numerical_cols),
            cat_input_dims=_cat_input_dims,
            output_dim=1,
            # model params
            sign_size=sign_size,
            cha_input=cha_input,
            cha_hidden=cha_hidden,
            K=K,
            dropout_input=dropout_input,
            dropout_hidden=dropout_hidden,
            dropout_output=dropout_output,
            embedding_dropout=embedding_dropout,
            # training params
            learning_rate=learning_rate,
            weight_decay=weight_decay,
            pct_start=pct_start,
            div_factor=div_factor,
            final_div_factor=final_div_factor,
        )
        trainer = pl.Trainer(
            max_epochs=max_epochs,
            accelerator="mps",
            callbacks=[
                EarlyStopping(
                    monitor='val_rmse',
                    patience=10,
                    mode='min',
                    verbose=False
                ),
                ModelCheckpoint(monitor='val_rmse', mode='min', save_top_k=1),
            ],
            enable_progress_bar=False,
            logger=False,
        )
        trainer.fit(
            model,
            train_loader,
            valid_loader,
        )

        # Load the best model
        best_model_path = trainer.checkpoint_callback.best_model_path
        model = SoftOrdering1DCNN.load_from_checkpoint(best_model_path)

        # Predict on validation set
        predictions = trainer.predict(model, dataloaders=valid_loader)
        y_pred = torch.cat(predictions).squeeze().cpu().numpy()
         
        # Compute RMSE
        rmse = np.sqrt(np.mean((y_pred - y_valid.values) ** 2))
        oof_scores.append(rmse)

    return np.mean(oof_scores)


In [12]:
def objective(trial):
    # training params
    batch_size = trial.suggest_int('batch_size', 128, 512, step=128)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
    weight_decay = trial.suggest_float('weight_decay', 1e-5, 1e-2, log=True)
    pct_start = trial.suggest_float('pct_start', 0.01, 0.1, step=0.01)
    div_factor = trial.suggest_float('div_factor', 1.0, 100.0, step=1.0)
    final_div_factor = trial.suggest_float('final_div_factor', 1.0, 100.0, step=1.0)
    max_epochs = trial.suggest_int('max_epochs', 20, 100, step=5)
    # model params
    sign_size = trial.suggest_int('sign_size', 8, 32, step=8)
    cha_input = trial.suggest_int('cha_input', 32, 128, step=16)
    cha_hidden = trial.suggest_int('cha_hidden', 32, 128, step=16)
    K = trial.suggest_int('K', 1, 4)
    # dropout
    embedding_dropout = trial.suggest_float('embedding_dropout', 0.0, 0.3, step=0.05)
    dropout_input = trial.suggest_float('dropout_input', 0.0, 0.3, step=0.05)
    dropout_hidden = trial.suggest_float('dropout_hidden', 0.0, 0.3, step=0.05) 
    dropout_output = trial.suggest_float('dropout_output', 0.0, 0.3, step=0.05)

    # Train and evaluate the model with the suggested hyperparameters
    oof_score = train_and_score(
        batch_size=batch_size,
        embedding_dropout=embedding_dropout,
        learning_rate=learning_rate,
        weight_decay=weight_decay,
        pct_start=pct_start,
        div_factor=div_factor,
        final_div_factor=final_div_factor,
        max_epochs=max_epochs,
        sign_size=sign_size,
        cha_input=cha_input,
        cha_hidden=cha_hidden,
        K=K,
        dropout_input=dropout_input,
        dropout_hidden=dropout_hidden,
        dropout_output=dropout_output
    )

    return oof_score


In [13]:
do_optimize = False
timeout = 3600 * 72

study = optuna.create_study(
    study_name="1dcnn",
    direction='minimize',
    storage='sqlite:///1dcnn.db',
    load_if_exists=True,
)

if do_optimize:
    study.optimize(
        objective, 
        n_trials=1000, 
        timeout=timeout,
        n_jobs=1, 
        gc_after_trial=True,
    ) 

[I 2024-12-01 22:52:12,202] Using an existing study with name '1dcnn' instead of creating a new one.


In [24]:
len(study.trials_dataframe())

48

In [26]:
study.trials_dataframe().sort_values("value", ascending=True).head(20)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_K,params_batch_size,params_cha_hidden,params_cha_input,params_div_factor,params_dropout_hidden,params_dropout_input,params_dropout_output,params_embedding_dropout,params_final_div_factor,params_learning_rate,params_max_epochs,params_pct_start,params_sign_size,params_weight_decay,state
29,29,0.414955,2024-11-30 23:45:19.424336,2024-12-01 00:09:02.424595,0 days 00:23:43.000259,1,384,64,64,18.0,0.1,0.2,0.15,0.1,38.0,0.000668,50,0.1,16,0.000309,COMPLETE
11,11,0.415261,2024-11-30 12:55:18.295461,2024-11-30 13:55:44.700524,0 days 01:00:26.405063,2,256,80,64,100.0,0.2,0.3,0.1,0.05,75.0,0.000212,100,0.01,16,0.000175,COMPLETE
25,25,0.4157,2024-11-30 21:21:50.155156,2024-11-30 21:59:20.788113,0 days 00:37:30.632957,2,256,128,64,86.0,0.25,0.25,0.05,0.05,88.0,0.000384,85,0.04,24,0.000332,COMPLETE
0,0,0.41623,2024-11-30 00:27:22.111372,2024-11-30 04:41:02.112946,0 days 04:13:40.001574,1,512,32,64,97.0,0.25,0.3,0.1,0.0,77.0,0.000552,95,0.03,16,0.000165,COMPLETE
9,9,0.417056,2024-11-30 11:49:44.997095,2024-11-30 12:15:48.304717,0 days 00:26:03.307622,3,256,112,128,80.0,0.25,0.15,0.1,0.2,73.0,0.000414,90,0.04,24,0.000887,COMPLETE
33,33,0.417196,2024-12-01 01:32:34.129040,2024-12-01 02:22:51.961289,0 days 00:50:17.832249,2,256,64,48,53.0,0.15,0.3,0.1,0.1,27.0,0.000522,70,0.04,16,5.4e-05,COMPLETE
30,30,0.417545,2024-12-01 00:09:02.528247,2024-12-01 00:43:41.019427,0 days 00:34:38.491180,1,384,64,32,20.0,0.1,0.2,0.2,0.1,36.0,0.000342,50,0.1,24,0.000497,COMPLETE
15,15,0.417796,2024-11-30 15:48:36.262531,2024-11-30 16:14:32.270329,0 days 00:25:56.007798,2,256,64,48,89.0,0.25,0.25,0.05,0.1,82.0,0.001297,85,0.03,16,0.000233,COMPLETE
7,7,0.418613,2024-11-30 11:02:31.653521,2024-11-30 11:15:53.308081,0 days 00:13:21.654560,2,512,64,96,35.0,0.05,0.2,0.0,0.2,96.0,0.004468,75,0.04,16,0.000104,COMPLETE
31,31,0.418748,2024-12-01 00:43:41.137059,2024-12-01 01:08:46.992321,0 days 00:25:05.855262,1,512,80,64,26.0,0.05,0.2,0.15,0.05,40.0,0.000684,50,0.1,16,0.000135,COMPLETE


In [15]:
plot_optimization_history(study)

In [16]:
plot_param_importances(study)

In [17]:
plot_slice(study)

In [18]:
plot_edf(study)

In [19]:
plot_parallel_coordinate(study)

In [20]:
best_params = dict(study.best_params)
best_params

{'batch_size': 384,
 'learning_rate': 0.0006678162130913228,
 'weight_decay': 0.00030931452710892437,
 'pct_start': 0.09999999999999999,
 'div_factor': 18.0,
 'final_div_factor': 38.0,
 'max_epochs': 50,
 'sign_size': 16,
 'cha_input': 64,
 'cha_hidden': 64,
 'K': 1,
 'embedding_dropout': 0.1,
 'dropout_input': 0.2,
 'dropout_hidden': 0.1,
 'dropout_output': 0.15000000000000002}

***