Hyperparameter Tuning

In [1]:
import os
import time
from datetime import datetime

import optuna
import torch
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.callbacks import (Callback, EarlyStopping,
                                         ModelCheckpoint)
from lightning.pytorch.loggers import TensorBoardLogger

from common.file_paths import BASE_DIR
from modules import (LightningTransformerMinecraftStructureGenerator,
                     MinecraftDataModule)


class MetricsTracker(Callback):
    def __init__(self):
        super().__init__()
        self.best_val_loss = float('inf')
        self.best_val_accuracy = 0

    def on_validation_epoch_end(self, trainer, pl_module):
        val_loss = trainer.callback_metrics.get('val_loss')
        val_accuracy = trainer.callback_metrics.get('val_accuracy')
        if val_loss is not None and val_loss < self.best_val_loss:
            self.best_val_loss = val_loss
        if val_accuracy is not None and val_accuracy > self.best_val_accuracy:
            self.best_val_accuracy = val_accuracy


def get_inference_time(model, datamodule):
    # Switch to evaluation mode for inference
    model.eval()

    # Get a single sample from the validation set
    val_dataloader = datamodule.val_dataloader()[0]
    prompt = next(iter(val_dataloader.dataset))[1][0]

    # Measure inference time for a single sample
    torch.cuda.synchronize()  # Ensure CUDA synchronizes before starting timing
    start_time = time.time()
    with torch.no_grad():
        model.generate(prompt, autoregressive=True)
    torch.cuda.synchronize()  # Ensure CUDA synchronizes before stopping timing
    end_time = time.time()

    inference_time = end_time - start_time

    return inference_time


def objective(trial: optuna.Trial):
    seed_everything(0, workers=True)

    lightning_model = LightningTransformerMinecraftStructureGenerator(
        num_classes=20,
        max_sequence_length=512,
        embedding_dim=trial.suggest_categorical(
            "embedding_dim", [16, 32, 64, 128]),
        embedding_dropout=trial.suggest_float(
            "embedding_dropout", 0.1, 0.5, step=0.1),
        decoder_dim=trial.suggest_categorical(
            "decoder_dim", [32, 64, 128, 256]),
        num_heads=trial.suggest_categorical("num_heads", [4, 8, 16, 32]),
        num_layers=trial.suggest_int("num_layers", 2, 4),
        decoder_dropout=trial.suggest_float(
            "decoder_dropout", 0.1, 0.5, step=0.1),
        freeze_encoder=True,
        learning_rate=trial.suggest_float(
            "learning_rate", 1e-5, 1e-3, log=True)
    )

    hdf5_file = os.path.join(BASE_DIR, 'data.h5')
    data_module = MinecraftDataModule(
        file_path=hdf5_file,
        batch_size=trial.suggest_categorical("batch_size", [8, 16, 32]),
    )

    logger = TensorBoardLogger(
        'lightning_logs', name='minecraft_structure_generator', log_graph=False)
    metrics_tracker = MetricsTracker()
    checkpoint_callback = ModelCheckpoint(
        monitor='val_loss',
        mode='min',
        save_last=True,
        save_weights_only=True
    )
    early_stop_callback = EarlyStopping(
        monitor='val_loss',
        patience=30,
        verbose=False,
        mode='min'
    )

    trainer = Trainer(
        max_epochs=5000,
        logger=logger,
        gradient_clip_val=1.0,
        log_every_n_steps=5,
        callbacks=[
            checkpoint_callback,
            early_stop_callback,
            metrics_tracker
        ]
    )

    start_time = time.time()

    trainer.fit(lightning_model, datamodule=data_module)

    end_time = time.time()
    training_time = end_time - start_time

    inference_time = get_inference_time(lightning_model, data_module)

    return metrics_tracker.best_val_loss, metrics_tracker.best_val_accuracy, training_time, inference_time


if __name__ == "__main__":
    # study_name = "study_20231204195755"
    study_name = f"study_{datetime.now().strftime('%Y%m%d%H%M%S')}"
    study = optuna.create_study(directions=("minimize", "maximize", "minimize", "minimize"),
                                study_name=study_name, storage='sqlite:///studies.db', load_if_exists=True)
    study.optimize(objective, n_trials=100)

    print("Best hyperparameters:", study.best_trial.params)

  from .autonotebook import tqdm as notebook_tqdm
[I 2023-12-05 17:08:41,401] A new study created in RDB with name: study_20231205170841
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Missing logger folder: lightning_logs\minecraft_structure_generator





LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 66.8 M
-----------------------------------------------------------------
434 K     Trainable params
66.4 M    Non-trainable params
66.8 M    Total params
267.189   Total estimated model params size (MB)


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00,  6.78it/s]

c:\Users\mmmfr\Documents\Repositories\minecraft-schematic-generator\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


                                                                           

c:\Users\mmmfr\Documents\Repositories\minecraft-schematic-generator\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


Epoch 429: 100%|██████████| 38/38 [00:03<00:00, 10.50it/s, v_num=0]Epoch 00430: reducing learning rate of group 0 to 3.6159e-06.
Epoch 440: 100%|██████████| 38/38 [00:03<00:00, 10.56it/s, v_num=0]Epoch 00441: reducing learning rate of group 0 to 3.6159e-07.
Epoch 448: 100%|██████████| 38/38 [00:03<00:00, 10.11it/s, v_num=0]


[I 2023-12-05 17:39:24,068] Trial 0 finished with values: [0.12581248581409454, 0.9535729289054871, 1839.3226449489594, 2.3469557762145996] and parameters: {'embedding_dim': 128, 'embedding_dropout': 0.4, 'decoder_dim': 64, 'num_heads': 16, 'num_layers': 1, 'decoder_dropout': 0.4, 'learning_rate': 3.615900151296967e-05, 'batch_size': 16}. 
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 68.5 M
-----------------------------------------------------------------
2.2 M     Trainable params
66.4 M    Non-trainable params
68.5 M    Total params
274.172   Total estimated model params size (MB)


Epoch 130: 100%|██████████| 38/38 [00:06<00:00,  5.53it/s, v_num=1]        Epoch 00131: reducing learning rate of group 0 to 4.7152e-05.
Epoch 171: 100%|██████████| 38/38 [00:07<00:00,  5.15it/s, v_num=1]Epoch 00172: reducing learning rate of group 0 to 4.7152e-06.
Epoch 182: 100%|██████████| 38/38 [00:07<00:00,  5.36it/s, v_num=1]Epoch 00183: reducing learning rate of group 0 to 4.7152e-07.
Epoch 190: 100%|██████████| 38/38 [00:06<00:00,  5.55it/s, v_num=1]


[I 2023-12-05 18:02:37,133] Trial 1 finished with values: [0.008596844971179962, 0.9979751706123352, 1385.502732515335, 6.823637008666992] and parameters: {'embedding_dim': 128, 'embedding_dropout': 0.4, 'decoder_dim': 128, 'num_heads': 16, 'num_layers': 3, 'decoder_dropout': 0.30000000000000004, 'learning_rate': 0.00047151632933623846, 'batch_size': 16}. 
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 66.8 M
-----------------------------------------------------------------
459 K     Trainable params
66.4 M    Non-trainable params
66.8 M    Total params
267.291   Total estimated model params size (MB)


Epoch 213: 100%|██████████| 77/77 [00:06<00:00, 11.17it/s, v_num=2]        Epoch 00214: reducing learning rate of group 0 to 1.5328e-05.
Epoch 231: 100%|██████████| 77/77 [00:06<00:00, 11.38it/s, v_num=2]Epoch 00232: reducing learning rate of group 0 to 1.5328e-06.
Epoch 242: 100%|██████████| 77/77 [00:07<00:00,  9.84it/s, v_num=2]Epoch 00243: reducing learning rate of group 0 to 1.5328e-07.
Epoch 250: 100%|██████████| 77/77 [00:07<00:00, 10.22it/s, v_num=2]


[I 2023-12-05 18:31:56,917] Trial 2 finished with values: [0.04242570325732231, 0.9817230105400085, 1755.5944530963898, 3.5104763507843018] and parameters: {'embedding_dim': 16, 'embedding_dropout': 0.0, 'decoder_dim': 32, 'num_heads': 8, 'num_layers': 3, 'decoder_dropout': 0.4, 'learning_rate': 0.00015328292384025394, 'batch_size': 8}. 
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 71.3 M
-----------------------------------------------------------------
5.0 M     Trainable params
66.4 M    Non-trainable params
71.3 M    Total params
285.253   Total estimated model params size (MB)


Epoch 109: 100%|██████████| 38/38 [00:09<00:00,  4.13it/s, v_num=3]        Epoch 00110: reducing learning rate of group 0 to 9.0456e-05.
Epoch 120: 100%|██████████| 38/38 [00:09<00:00,  3.93it/s, v_num=3]Epoch 00121: reducing learning rate of group 0 to 9.0456e-06.
Epoch 128: 100%|██████████| 38/38 [00:09<00:00,  4.07it/s, v_num=3]


[I 2023-12-05 18:54:35,508] Trial 3 finished with values: [0.043716911226511, 0.9854321479797363, 1346.1496863365173, 11.52624249458313] and parameters: {'embedding_dim': 16, 'embedding_dropout': 0.4, 'decoder_dim': 256, 'num_heads': 32, 'num_layers': 3, 'decoder_dropout': 0.30000000000000004, 'learning_rate': 0.0009045556454109557, 'batch_size': 16}. 
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 68.2 M
-----------------------------------------------------------------
1.8 M     Trainable params
66.4 M    Non-trainable params
68.2 M    Total params
272.623   Total estimated model params size (MB)


Epoch 145: 100%|██████████| 9/9 [00:03<00:00,  2.44it/s, v_num=4]          Epoch 00146: reducing learning rate of group 0 to 5.7947e-06.
Epoch 158: 100%|██████████| 9/9 [00:03<00:00,  2.43it/s, v_num=4]Epoch 00159: reducing learning rate of group 0 to 5.7947e-07.
Epoch 169: 100%|██████████| 9/9 [00:03<00:00,  2.41it/s, v_num=4]Epoch 00170: reducing learning rate of group 0 to 5.7947e-08.
Epoch 177: 100%|██████████| 9/9 [00:04<00:00,  2.20it/s, v_num=4]


[I 2023-12-05 19:07:05,910] Trial 4 finished with values: [0.20430174469947815, 0.9320885539054871, 745.33034324646, 4.272830247879028] and parameters: {'embedding_dim': 16, 'embedding_dropout': 0.4, 'decoder_dim': 256, 'num_heads': 32, 'num_layers': 1, 'decoder_dropout': 0.1, 'learning_rate': 5.7946686994088614e-05, 'batch_size': 64}. 
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 67.8 M
-----------------------------------------------------------------
1.5 M     Trainable params
66.4 M    Non-trainable params
67.8 M    Total params
271.328   Total estimated model params size (MB)


Epoch 367: 100%|██████████| 38/38 [00:03<00:00,  9.75it/s, v_num=5]        Epoch 00368: reducing learning rate of group 0 to 5.8565e-06.
Epoch 400: 100%|██████████| 38/38 [00:03<00:00, 10.02it/s, v_num=5]Epoch 00401: reducing learning rate of group 0 to 5.8565e-07.
Epoch 411: 100%|██████████| 38/38 [00:03<00:00, 10.10it/s, v_num=5]Epoch 00412: reducing learning rate of group 0 to 5.8565e-08.
Epoch 419: 100%|██████████| 38/38 [00:03<00:00,  9.72it/s, v_num=5]


[I 2023-12-05 19:38:21,070] Trial 5 finished with values: [0.05612330511212349, 0.9785693287849426, 1869.3243000507355, 4.158816814422607] and parameters: {'embedding_dim': 64, 'embedding_dropout': 0.5, 'decoder_dim': 128, 'num_heads': 16, 'num_layers': 2, 'decoder_dropout': 0.1, 'learning_rate': 5.856472461397753e-05, 'batch_size': 16}. 
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 67.9 M
-----------------------------------------------------------------
1.5 M     Trainable params
66.4 M    Non-trainable params
67.9 M    Total params
271.535   Total estimated model params size (MB)


Epoch 87: 100%|██████████| 19/19 [00:02<00:00,  7.81it/s, v_num=6]         Epoch 00088: reducing learning rate of group 0 to 6.3461e-05.
Epoch 98: 100%|██████████| 19/19 [00:02<00:00,  7.67it/s, v_num=6]Epoch 00099: reducing learning rate of group 0 to 6.3461e-06.
Epoch 106: 100%|██████████| 19/19 [00:02<00:00,  6.95it/s, v_num=6]


[I 2023-12-05 19:43:32,480] Trial 6 finished with values: [0.0497308149933815, 0.9792681336402893, 307.15963339805603, 3.4372236728668213] and parameters: {'embedding_dim': 128, 'embedding_dropout': 0.30000000000000004, 'decoder_dim': 128, 'num_heads': 1, 'num_layers': 2, 'decoder_dropout': 0.5, 'learning_rate': 0.0006346077146812594, 'batch_size': 32}. 
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 67.2 M
-----------------------------------------------------------------
861 K     Trainable params
66.4 M    Non-trainable params
67.2 M    Total params
268.897   Total estimated model params size (MB)


Epoch 436: 100%|██████████| 9/9 [00:02<00:00,  3.24it/s, v_num=7]          Epoch 00437: reducing learning rate of group 0 to 5.6650e-06.
Epoch 478: 100%|██████████| 9/9 [00:02<00:00,  3.25it/s, v_num=7]Epoch 00479: reducing learning rate of group 0 to 5.6650e-07.
Epoch 489: 100%|██████████| 9/9 [00:02<00:00,  3.21it/s, v_num=7]Epoch 00490: reducing learning rate of group 0 to 5.6650e-08.
Epoch 497: 100%|██████████| 9/9 [00:02<00:00,  3.10it/s, v_num=7]


[I 2023-12-05 20:08:25,512] Trial 7 finished with values: [0.10081260651350021, 0.9641090631484985, 1489.7675664424896, 2.5120205879211426] and parameters: {'embedding_dim': 128, 'embedding_dropout': 0.2, 'decoder_dim': 128, 'num_heads': 16, 'num_layers': 1, 'decoder_dropout': 0.4, 'learning_rate': 5.664987160702745e-05, 'batch_size': 64}. 
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 66.7 M
-----------------------------------------------------------------
347 K     Trainable params
66.4 M    Non-trainable params
66.7 M    Total params
266.842   Total estimated model params size (MB)


Epoch 138: 100%|██████████| 9/9 [00:02<00:00,  3.51it/s, v_num=8]          Epoch 00139: reducing learning rate of group 0 to 7.3208e-05.
Epoch 186: 100%|██████████| 9/9 [00:02<00:00,  3.64it/s, v_num=8]Epoch 00187: reducing learning rate of group 0 to 7.3208e-06.
Epoch 197: 100%|██████████| 9/9 [00:02<00:00,  3.52it/s, v_num=8]Epoch 00198: reducing learning rate of group 0 to 7.3208e-07.
Epoch 205: 100%|██████████| 9/9 [00:02<00:00,  3.31it/s, v_num=8]


[I 2023-12-05 20:19:04,438] Trial 8 finished with values: [0.06347326189279556, 0.9758994579315186, 635.948739528656, 2.1614162921905518] and parameters: {'embedding_dim': 64, 'embedding_dropout': 0.30000000000000004, 'decoder_dim': 32, 'num_heads': 4, 'num_layers': 2, 'decoder_dropout': 0.4, 'learning_rate': 0.0007320844067757856, 'batch_size': 64}. 
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 71.3 M
-----------------------------------------------------------------
5.0 M     Trainable params
66.4 M    Non-trainable params
71.3 M    Total params
285.253   Total estimated model params size (MB)


Epoch 110: 100%|██████████| 77/77 [00:06<00:00, 11.56it/s, v_num=9]        Epoch 00111: reducing learning rate of group 0 to 3.3841e-06.
Epoch 147: 100%|██████████| 77/77 [00:06<00:00, 11.38it/s, v_num=9]Epoch 00148: reducing learning rate of group 0 to 3.3841e-07.
Epoch 158: 100%|██████████| 77/77 [00:07<00:00, 10.43it/s, v_num=9]Epoch 00159: reducing learning rate of group 0 to 3.3841e-08.
Epoch 166: 100%|██████████| 77/77 [00:07<00:00, 10.34it/s, v_num=9]


[I 2023-12-05 20:42:40,837] Trial 9 finished with values: [0.18732500076293945, 0.933360755443573, 1406.5471096038818, 9.113417625427246] and parameters: {'embedding_dim': 16, 'embedding_dropout': 0.5, 'decoder_dim': 256, 'num_heads': 1, 'num_layers': 3, 'decoder_dropout': 0.30000000000000004, 'learning_rate': 3.38410686955243e-05, 'batch_size': 8}. 
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 67.2 M
-----------------------------------------------------------------
809 K     Trainable params
66.4 M    Non-trainable params
67.2 M    Total params
268.690   Total estimated model params size (MB)


Epoch 77: 100%|██████████| 38/38 [00:02<00:00, 13.95it/s, v_num=10]        Epoch 00078: reducing learning rate of group 0 to 1.5861e-04.
Epoch 113: 100%|██████████| 38/38 [00:02<00:00, 13.94it/s, v_num=10]Epoch 00114: reducing learning rate of group 0 to 1.5861e-05.
Epoch 125: 100%|██████████| 38/38 [00:02<00:00, 14.36it/s, v_num=10]Epoch 00126: reducing learning rate of group 0 to 1.5861e-06.
Epoch 136: 100%|██████████| 38/38 [00:02<00:00, 14.65it/s, v_num=10]Epoch 00137: reducing learning rate of group 0 to 1.5861e-07.
Epoch 144: 100%|██████████| 38/38 [00:02<00:00, 13.70it/s, v_num=10]


[I 2023-12-05 20:50:47,777] Trial 10 finished with values: [0.04497084021568298, 0.9813467264175415, 484.3405501842499, 1.8548102378845215] and parameters: {'embedding_dim': 64, 'embedding_dropout': 0.5, 'decoder_dim': 128, 'num_heads': 8, 'num_layers': 1, 'decoder_dropout': 0.5, 'learning_rate': 0.0015861084831990037, 'batch_size': 16}. 
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 66.7 M
-----------------------------------------------------------------
327 K     Trainable params
66.4 M    Non-trainable params
66.7 M    Total params
266.763   Total estimated model params size (MB)


Epoch 171: 100%|██████████| 19/19 [00:02<00:00,  7.34it/s, v_num=11]       Epoch 00172: reducing learning rate of group 0 to 2.0235e-05.
Epoch 190: 100%|██████████| 19/19 [00:02<00:00,  6.40it/s, v_num=11]Epoch 00191: reducing learning rate of group 0 to 2.0235e-06.
Epoch 201: 100%|██████████| 19/19 [00:02<00:00,  7.67it/s, v_num=11]Epoch 00202: reducing learning rate of group 0 to 2.0235e-07.
Epoch 209: 100%|██████████| 19/19 [00:02<00:00,  7.05it/s, v_num=11]


[I 2023-12-05 21:01:35,814] Trial 11 finished with values: [0.15218426287174225, 0.9510643482208252, 644.8301732540131, 2.4339425563812256] and parameters: {'embedding_dim': 32, 'embedding_dropout': 0.5, 'decoder_dim': 32, 'num_heads': 8, 'num_layers': 2, 'decoder_dropout': 0.0, 'learning_rate': 0.00020235487516685356, 'batch_size': 32}. 
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 67.0 M
-----------------------------------------------------------------
670 K     Trainable params
66.4 M    Non-trainable params
67.0 M    Total params
268.135   Total estimated model params size (MB)


Epoch 25: 100%|██████████| 77/77 [00:15<00:00,  5.08it/s, v_num=12]        Epoch 00026: reducing learning rate of group 0 to 3.0253e-04.
Epoch 36: 100%|██████████| 77/77 [00:14<00:00,  5.23it/s, v_num=12]Epoch 00037: reducing learning rate of group 0 to 3.0253e-05.
Epoch 44: 100%|██████████| 77/77 [00:15<00:00,  4.85it/s, v_num=12]


[I 2023-12-05 21:12:17,797] Trial 12 finished with values: [0.2973358929157257, 0.897864043712616, 630.6392269134521, 10.493111848831177] and parameters: {'embedding_dim': 128, 'embedding_dropout': 0.30000000000000004, 'decoder_dim': 32, 'num_heads': 32, 'num_layers': 4, 'decoder_dropout': 0.1, 'learning_rate': 0.003025333442993568, 'batch_size': 8}. 
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 67.6 M
-----------------------------------------------------------------
1.3 M     Trainable params
66.4 M    Non-trainable params
67.6 M    Total params
270.459   Total estimated model params size (MB)


Epoch 376: 100%|██████████| 38/38 [00:05<00:00,  7.53it/s, v_num=13]       Epoch 00377: reducing learning rate of group 0 to 1.5001e-05.
Epoch 400: 100%|██████████| 38/38 [00:05<00:00,  7.36it/s, v_num=13]Epoch 00401: reducing learning rate of group 0 to 1.5001e-06.
Epoch 420: 100%|██████████| 38/38 [00:05<00:00,  7.07it/s, v_num=13]Epoch 00421: reducing learning rate of group 0 to 1.5001e-07.
Epoch 431: 100%|██████████| 38/38 [00:04<00:00,  9.23it/s, v_num=13]Epoch 00432: reducing learning rate of group 0 to 1.5001e-08.
Epoch 439: 100%|██████████| 38/38 [00:04<00:00,  9.20it/s, v_num=13]


[I 2023-12-05 21:51:40,980] Trial 13 finished with values: [0.01417942252010107, 0.9964520931243896, 2358.475761651993, 3.832237720489502] and parameters: {'embedding_dim': 16, 'embedding_dropout': 0.2, 'decoder_dim': 64, 'num_heads': 2, 'num_layers': 4, 'decoder_dropout': 0.2, 'learning_rate': 0.00015000783384923943, 'batch_size': 16}. 
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 67.1 M
-----------------------------------------------------------------
770 K     Trainable params
66.4 M    Non-trainable params
67.1 M    Total params
268.535   Total estimated model params size (MB)


Epoch 248: 100%|██████████| 38/38 [00:03<00:00, 12.57it/s, v_num=14]       Epoch 00249: reducing learning rate of group 0 to 4.2488e-06.
Epoch 283: 100%|██████████| 38/38 [00:02<00:00, 13.03it/s, v_num=14]Epoch 00284: reducing learning rate of group 0 to 4.2488e-07.
Epoch 294: 100%|██████████| 38/38 [00:03<00:00, 12.66it/s, v_num=14]Epoch 00295: reducing learning rate of group 0 to 4.2488e-08.
Epoch 302: 100%|██████████| 38/38 [00:03<00:00, 11.98it/s, v_num=14]


[I 2023-12-05 22:09:19,310] Trial 14 finished with values: [0.1752757877111435, 0.9369086623191833, 1054.94682264328, 2.0534045696258545] and parameters: {'embedding_dim': 16, 'embedding_dropout': 0.2, 'decoder_dim': 128, 'num_heads': 16, 'num_layers': 1, 'decoder_dropout': 0.30000000000000004, 'learning_rate': 4.2488120386382683e-05, 'batch_size': 16}. 
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 67.1 M
-----------------------------------------------------------------
732 K     Trainable params
66.4 M    Non-trainable params
67.1 M    Total params
268.381   Total estimated model params size (MB)


Epoch 193: 100%|██████████| 19/19 [00:03<00:00,  5.28it/s, v_num=15]       Epoch 00194: reducing learning rate of group 0 to 3.2315e-05.
Epoch 233: 100%|██████████| 19/19 [00:03<00:00,  5.06it/s, v_num=15]Epoch 00234: reducing learning rate of group 0 to 3.2315e-06.
Epoch 259: 100%|██████████| 19/19 [00:03<00:00,  5.35it/s, v_num=15]Epoch 00260: reducing learning rate of group 0 to 3.2315e-07.
Epoch 270: 100%|██████████| 19/19 [00:03<00:00,  5.42it/s, v_num=15]Epoch 00271: reducing learning rate of group 0 to 3.2315e-08.
Epoch 278: 100%|██████████| 19/19 [00:03<00:00,  5.51it/s, v_num=15]


[I 2023-12-05 22:26:48,740] Trial 15 finished with values: [0.010125442408025265, 0.9979034662246704, 1044.3304104804993, 3.500105381011963] and parameters: {'embedding_dim': 128, 'embedding_dropout': 0.0, 'decoder_dim': 64, 'num_heads': 16, 'num_layers': 2, 'decoder_dropout': 0.2, 'learning_rate': 0.00032314535304490776, 'batch_size': 32}. 
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 66.8 M
-----------------------------------------------------------------
459 K     Trainable params
66.4 M    Non-trainable params
66.8 M    Total params
267.291   Total estimated model params size (MB)


Epoch 36: 100%|██████████| 9/9 [00:04<00:00,  2.02it/s, v_num=16]          Epoch 00037: reducing learning rate of group 0 to 8.7196e-04.
Epoch 251: 100%|██████████| 9/9 [00:04<00:00,  1.80it/s, v_num=16]Epoch 00252: reducing learning rate of group 0 to 8.7196e-05.
Epoch 268: 100%|██████████| 9/9 [00:04<00:00,  2.13it/s, v_num=16]Epoch 00269: reducing learning rate of group 0 to 8.7196e-06.
Epoch 279: 100%|██████████| 9/9 [00:06<00:00,  1.44it/s, v_num=16]Epoch 00280: reducing learning rate of group 0 to 8.7196e-07.
Epoch 287: 100%|██████████| 9/9 [00:04<00:00,  1.99it/s, v_num=16]


[I 2023-12-05 22:51:59,829] Trial 16 finished with values: [0.19784489274024963, 0.9325544238090515, 1506.3476374149323, 4.002651691436768] and parameters: {'embedding_dim': 16, 'embedding_dropout': 0.0, 'decoder_dim': 32, 'num_heads': 16, 'num_layers': 3, 'decoder_dropout': 0.1, 'learning_rate': 0.008719601420374878, 'batch_size': 64}. 
Seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 67.7 M
-----------------------------------------------------------------
1.3 M     Trainable params
66.4 M    Non-trainable params
67.7 M    Total params
270.764   Total estimated model params size (MB)


Epoch 104:  40%|████      | 31/77 [00:03<00:04, 10.22it/s, v_num=17]       

Running Best Model

In [2]:
import os

import optuna
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.profilers import SimpleProfiler

from common.file_paths import BASE_DIR
from modules import (GenerateSchematicCallback,
                     LightningTransformerMinecraftStructureGenerator,
                     MinecraftDataModule)

study_name = 'study_20231205030812'
storage_url = 'sqlite:///studies.db'

study = optuna.load_study(study_name=study_name, storage=storage_url)


def get_nth_best_trial(study, n, objective_id=0):
    # Sort the completed trials of the study object by the specified objective value.
    sorted_trials = sorted(
        study.trials, key=lambda t: t.values[objective_id] if t.values is not None else float('inf'))

    # Return the parameters of the n-th best trial.
    return sorted_trials[n].params


objective_id = 1
x = 0
nth_best_params = get_nth_best_trial(study, x, objective_id=0)
print(nth_best_params)

seed_everything(1, workers=True)

lightning_model = LightningTransformerMinecraftStructureGenerator(
    num_classes=20,
    max_sequence_length=512,
    freeze_encoder=True,
    **nth_best_params
)

hdf5_file = os.path.join(BASE_DIR, 'data.h5')
data_module = MinecraftDataModule(
    file_path=hdf5_file,
    batch_size=32,
    # num_workers=4
)

logger = TensorBoardLogger(
    'lightning_logs', name='minecraft_structure_generator', log_graph=False)
profiler = SimpleProfiler()
checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',
    mode='min',
    save_last=True
)
early_stop_callback = EarlyStopping(
    monitor='val_loss',
    patience=50,
    verbose=False,
    mode='min'
)
generate_schematic_callback = GenerateSchematicCallback(
    save_path='schematic_viewer/public/schematics/',
    data_module=data_module,
    generate_train=False,
    generate_val=True,
    generate_all_datasets=False,
    generate_every_n_epochs=10,
    autoregressive=True
)

trainer = Trainer(
    max_epochs=5000,
    logger=logger,
    profiler=profiler,
    gradient_clip_val=1.0,
    log_every_n_steps=5,
    callbacks=[
        checkpoint_callback,
        early_stop_callback,
        generate_schematic_callback
    ]
)

trainer.fit(lightning_model, datamodule=data_module)

Seed set to 1


{'embedding_dim': 64, 'embedding_dropout': 0.5, 'decoder_dim': 32, 'num_heads': 2, 'num_layers': 3, 'decoder_dropout': 0.2, 'learning_rate': 0.0006395506262517658}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                                   | Params
-----------------------------------------------------------------
0 | model | TransformerMinecraftStructureGenerator | 66.9 M
-----------------------------------------------------------------
489 K     Trainable params
66.4 M    Non-trainable params
66.9 M    Total params
267.410   Total estimated model params size (MB)


Epoch 322: 100%|██████████| 21/21 [00:03<00:00,  5.85it/s, v_num=22]       Epoch 00323: reducing learning rate of group 0 to 6.3955e-05.
Epoch 369: 100%|██████████| 21/21 [00:06<00:00,  3.08it/s, v_num=22]Epoch 00370: reducing learning rate of group 0 to 6.3955e-06.
Epoch 380: 100%|██████████| 21/21 [00:03<00:00,  5.95it/s, v_num=22]Epoch 00381: reducing learning rate of group 0 to 6.3955e-07.
Epoch 391: 100%|██████████| 21/21 [00:03<00:00,  5.81it/s, v_num=22]Epoch 00392: reducing learning rate of group 0 to 6.3955e-08.
Epoch 402: 100%|██████████| 21/21 [00:03<00:00,  5.80it/s, v_num=22]Epoch 00403: reducing learning rate of group 0 to 6.3955e-09.
Epoch 408: 100%|██████████| 21/21 [00:03<00:00,  5.57it/s, v_num=22]


FIT Profiler Report

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Action                                                                                                                                                               	|  Mean duration (s)	|  Num calls      	|  Total time (s) 	|  Percentage %   	|
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Total                                                                                                                                                                	|  -     