In [1]:
# Note: This is a hack to allow importing from the parent directory
import sys
from pathlib import Path

sys.path.append(str(Path().resolve().parent))

# Note: Ignore warnings, be brave (YoLo)
import warnings

warnings.filterwarnings("ignore")

In [2]:
import torch
import optuna
import torch.nn as nn
import torch.optim as optim
from models import ConvAutoencoder
from data import CIFAR10GaussianSplatsDataset
from utils import train, transform_and_collate

results_path = Path("../logs/conv_autoencoder_test_3/")
results_path.mkdir(parents=True, exist_ok=True)

# Use separate model for each part of the splat
channels_dim = {"means": 3, "quats": 4, "scales": 3, "opacities": 1, "colors": 12}
join_mode = "dict"

train_dataset = CIFAR10GaussianSplatsDataset(
    root="../data/CIFAR10GS",
    train=True,
    init_type="grid",
)
val_dataset = CIFAR10GaussianSplatsDataset(
    root="../data/CIFAR10GS",
    val=True,
    init_type="grid",
)

In [None]:
def objective(trial, param: str, dim: int):
    # Define hyperparameter search space
    lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-3)
    epochs = trial.suggest_categorical("epochs", [10, 25, 50])
    grad_clip = trial.suggest_uniform("grad_clip", 0.5, 2.0)
    weight_init = trial.suggest_categorical("weight_init", [True, False])

    # Define train parameters
    model = ConvAutoencoder(channels_dim=dim, weight_init=weight_init)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=32,
        shuffle=True,
        num_workers=4,
        collate_fn=lambda batch: transform_and_collate(batch, join_mode, param),
    )
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=32,
        shuffle=False,
        num_workers=4,
        collate_fn=lambda batch: transform_and_collate(batch, join_mode, param),
    )
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = nn.MSELoss()
    epochs = epochs
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min", patience=5)
    grad_clip = grad_clip
    compile_model = True

    results = train(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        optimizer=optimizer,
        criterion=criterion,
        epochs=epochs,
        device=device,
        scheduler=scheduler,
        grad_clip=grad_clip,
        logger=print,
        compile_model=compile_model,
        model_path=results_path / param / "model.pt",
    )
    return results["val_loss"][-1]


for param, dim in channels_dim.items():
    print("Processing parameter:", param)
    param_path = results_path / param
    param_path.mkdir(parents=True, exist_ok=True)

    # Run hyperparameter search
    study = optuna.create_study(
        direction="minimize",
        study_name=f"conv_autoencoder_test_3/{param}",
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=5),
    )
    study.optimize(lambda trial: objective(trial, param, dim), n_trials=100, n_jobs=1)
    print(f"Best trial:{study.best_trial}")
    with open(param_path / "best_trial.txt", "w") as f:
        f.write(f"Best trial:{study.best_trial}")
    optuna.visualization.plot_optimization_history(study).write_image(
        str(results_path / param / "opt_history.png")
    )
    optuna.visualization.plot_param_importance(study).write_image(
        str(results_path / param / "param_importance.png")
    )

[I 2025-01-19 21:12:27,099] A new study created in memory with name: conv_autoencoder_test_3/means


Processing parameter: means


Epoch 1/1: 100%|██████████| 9/9 [00:20<00:00,  2.32s/batch]
[I 2025-01-19 21:12:55,282] Trial 0 finished with value: 0.34991193811098736 and parameters: {'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}. Best is trial 0 with value: 0.34991193811098736.
[I 2025-01-19 21:12:55,288] A new study created in memory with name: conv_autoencoder_test_3/quats


Epoch 1/1 | Train Loss: 0.3619 | Val Loss: 0.3499
Train Loss: 0.3619 | Val Loss: 0.3499 | Training time: 26.14s
Best trial:FrozenTrial(number=0, state=1, values=[0.34991193811098736], datetime_start=datetime.datetime(2025, 1, 19, 21, 12, 27, 103615), datetime_complete=datetime.datetime(2025, 1, 19, 21, 12, 55, 280931), params={'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'lr': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'weight_decay': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'loss_fn': CategoricalDistribution(choices=(<class 'torch.nn.modules.loss.L1Loss'>,)), 'epochs': IntDistribution(high=1, log=False, low=1, step=1), 'grad_clip': FloatDistribution(high=1.0, log=False, low=1.0, step=None), 'weight_init': CategoricalDistribution(choices=(True,))}, trial_id=0, value=None)
Processing parameter:

Epoch 1/1: 100%|██████████| 9/9 [00:30<00:00,  3.43s/batch]
[I 2025-01-19 21:13:38,970] Trial 0 finished with value: 0.2928269604841868 and parameters: {'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}. Best is trial 0 with value: 0.2928269604841868.
[I 2025-01-19 21:13:38,982] A new study created in memory with name: conv_autoencoder_test_3/scales


Epoch 1/1 | Train Loss: 0.2960 | Val Loss: 0.2928
Train Loss: 0.2960 | Val Loss: 0.2928 | Training time: 43.65s
Best trial:FrozenTrial(number=0, state=1, values=[0.2928269604841868], datetime_start=datetime.datetime(2025, 1, 19, 21, 12, 55, 294281), datetime_complete=datetime.datetime(2025, 1, 19, 21, 13, 38, 970394), params={'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'lr': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'weight_decay': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'loss_fn': CategoricalDistribution(choices=(<class 'torch.nn.modules.loss.L1Loss'>,)), 'epochs': IntDistribution(high=1, log=False, low=1, step=1), 'grad_clip': FloatDistribution(high=1.0, log=False, low=1.0, step=None), 'weight_init': CategoricalDistribution(choices=(True,))}, trial_id=0, value=None)
Processing parameter: 

Epoch 1/1: 100%|██████████| 9/9 [00:01<00:00,  6.48batch/s]
[I 2025-01-19 21:13:40,948] Trial 0 finished with value: 0.38455350200335187 and parameters: {'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}. Best is trial 0 with value: 0.38455350200335187.
[I 2025-01-19 21:13:40,951] A new study created in memory with name: conv_autoencoder_test_3/opacities


Epoch 1/1 | Train Loss: 0.4072 | Val Loss: 0.3846
Train Loss: 0.4072 | Val Loss: 0.3846 | Training time: 1.95s
Best trial:FrozenTrial(number=0, state=1, values=[0.38455350200335187], datetime_start=datetime.datetime(2025, 1, 19, 21, 13, 38, 983911), datetime_complete=datetime.datetime(2025, 1, 19, 21, 13, 40, 947764), params={'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'lr': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'weight_decay': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'loss_fn': CategoricalDistribution(choices=(<class 'torch.nn.modules.loss.L1Loss'>,)), 'epochs': IntDistribution(high=1, log=False, low=1, step=1), 'grad_clip': FloatDistribution(high=1.0, log=False, low=1.0, step=None), 'weight_init': CategoricalDistribution(choices=(True,))}, trial_id=0, value=None)
Processing parameter: 

Epoch 1/1: 100%|██████████| 9/9 [00:21<00:00,  2.37s/batch]
W0119 21:14:02.847000 681639 torch/_dynamo/convert_frame.py:844] [0/8] torch._dynamo hit config.cache_size_limit (8)
W0119 21:14:02.847000 681639 torch/_dynamo/convert_frame.py:844] [0/8]    function: 'forward' (/home/mokot/LMU/VisualRepresentationLearning/Project/models/abstract_autoencoder.py:18)
W0119 21:14:02.847000 681639 torch/_dynamo/convert_frame.py:844] [0/8]    last reason: 0/0: GLOBAL_STATE changed: grad_mode 
W0119 21:14:02.847000 681639 torch/_dynamo/convert_frame.py:844] [0/8] To log all recompilation reasons, use TORCH_LOGS="recompiles".
W0119 21:14:02.847000 681639 torch/_dynamo/convert_frame.py:844] [0/8] To diagnose recompilation issues, see https://pytorch.org/docs/main/torch.compiler_troubleshooting.html.
[I 2025-01-19 21:14:03,039] Trial 0 finished with value: 0.1789642870426178 and parameters: {'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0

Epoch 1/1 | Train Loss: 0.2905 | Val Loss: 0.1790
Train Loss: 0.2905 | Val Loss: 0.1790 | Training time: 22.07s
Best trial:FrozenTrial(number=0, state=1, values=[0.1789642870426178], datetime_start=datetime.datetime(2025, 1, 19, 21, 13, 40, 953196), datetime_complete=datetime.datetime(2025, 1, 19, 21, 14, 3, 38867), params={'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'lr': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'weight_decay': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'loss_fn': CategoricalDistribution(choices=(<class 'torch.nn.modules.loss.L1Loss'>,)), 'epochs': IntDistribution(high=1, log=False, low=1, step=1), 'grad_clip': FloatDistribution(high=1.0, log=False, low=1.0, step=None), 'weight_init': CategoricalDistribution(choices=(True,))}, trial_id=0, value=None)
Processing parameter: co

Epoch 1/1: 100%|██████████| 9/9 [00:01<00:00,  4.93batch/s]
[I 2025-01-19 21:14:05,642] Trial 0 finished with value: 0.24367456634839377 and parameters: {'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}. Best is trial 0 with value: 0.24367456634839377.


Epoch 1/1 | Train Loss: 0.2490 | Val Loss: 0.2437
Train Loss: 0.2490 | Val Loss: 0.2437 | Training time: 2.58s
Best trial:FrozenTrial(number=0, state=1, values=[0.24367456634839377], datetime_start=datetime.datetime(2025, 1, 19, 21, 14, 3, 46204), datetime_complete=datetime.datetime(2025, 1, 19, 21, 14, 5, 641032), params={'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'lr': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'weight_decay': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'loss_fn': CategoricalDistribution(choices=(<class 'torch.nn.modules.loss.L1Loss'>,)), 'epochs': IntDistribution(high=1, log=False, low=1, step=1), 'grad_clip': FloatDistribution(high=1.0, log=False, low=1.0, step=None), 'weight_init': CategoricalDistribution(choices=(True,))}, trial_id=0, value=None)
