In [1]:
# Note: This is a hack to allow importing from the parent directory
import sys
from pathlib import Path

sys.path.append(str(Path().resolve().parent))

# Note: Ignore warnings, be brave (YoLo)
import warnings

warnings.filterwarnings("ignore")

In [2]:
import torch
import optuna
import torch.nn as nn
import torch.optim as optim
from models import ResNetAutoencoder
from data import CIFAR10GaussianSplatsDataset
from utils import train, transform_and_collate

results_path = Path("../logs/resnet_autoencoder_test_3/")
results_path.mkdir(parents=True, exist_ok=True)

# Use separate model for each part of the splat
channels_dim = {"means": 3, "quats": 4, "scales": 3, "opacities": 1, "colors": 12}
join_mode = "dict"

train_dataset = CIFAR10GaussianSplatsDataset(
    root="../data/CIFAR10GS",
    train=True,
    init_type="grid",
)
val_dataset = CIFAR10GaussianSplatsDataset(
    root="../data/CIFAR10GS",
    val=True,
    init_type="grid",
)

In [3]:
def objective(trial, param: str, dim: int):
    # Define hyperparameter search space
    lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-3)
    loss_fn = trial.suggest_categorical("loss_fn", [nn.L1Loss, nn.MSELoss])
    epochs = trial.suggest_int("epochs", 10, 100, 50)
    grad_clip = trial.suggest_uniform("grad_clip", 0.5, 2.0)
    weight_init = trial.suggest_categorical("weight_init", [True, False])

    # Define train parameters
    model = ResNetAutoencoder(channels_dim=dim, weight_init=weight_init)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=32,
        shuffle=True,
        num_workers=4,
        collate_fn=lambda batch: transform_and_collate(batch, join_mode, param),
    )
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=32,
        shuffle=False,
        num_workers=4,
        collate_fn=lambda batch: transform_and_collate(batch, join_mode, param),
    )
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = loss_fn()
    epochs = epochs
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min", patience=5)
    grad_clip = grad_clip
    compile_model = True

    results = train(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        optimizer=optimizer,
        criterion=criterion,
        epochs=epochs,
        device=device,
        scheduler=scheduler,
        grad_clip=grad_clip,
        logger=print,
        compile_model=compile_model,
        model_path=results_path / param / "model.pt",
    )
    return results["val_loss"][-1]


for param, dim in channels_dim.items():
    print("Processing parameter:", param)
    param_path = results_path / param
    param_path.mkdir(parents=True, exist_ok=True)

    # Run hyperparameter search
    study = optuna.create_study(
        direction="minimize", study_name=f"resnet_autoencoder_test_3/{param}"
    )
    study.optimize(lambda trial: objective(trial, param, dim), n_trials=100, n_jobs=4)
    print(f"Best trial:{study.best_trial}")
    with open(param_path / "best_trial.txt", "w") as f:
        f.write(f"Best trial:{study.best_trial}")
    optuna.visualization.plot_optimization_history(study).write_image(
        str(results_path / param / "opt_history.png")
    )
    optuna.visualization.plot_param_importances(study).write_image(
        str(results_path / param / "param_importances.png")
    )

[I 2025-01-20 19:05:04,704] A new study created in memory with name: resnet_autoencoder_test_3/means


Processing parameter: means


Epoch 1/1: 100%|██████████| 9/9 [01:29<00:00,  9.99s/batch]
[I 2025-01-20 19:07:05,147] Trial 0 finished with value: 0.34408602118492126 and parameters: {'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}. Best is trial 0 with value: 0.34408602118492126.
[I 2025-01-20 19:07:05,149] A new study created in memory with name: resnet_autoencoder_test_3/quats


Epoch 1/1 | Train Loss: 0.3909 | Val Loss: 0.3441
Train Loss: 0.3909 | Val Loss: 0.3441 | Training time: 119.07s
Best trial:FrozenTrial(number=0, state=1, values=[0.34408602118492126], datetime_start=datetime.datetime(2025, 1, 20, 19, 5, 4, 705733), datetime_complete=datetime.datetime(2025, 1, 20, 19, 7, 5, 147072), params={'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'lr': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'weight_decay': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'loss_fn': CategoricalDistribution(choices=(<class 'torch.nn.modules.loss.L1Loss'>,)), 'epochs': IntDistribution(high=1, log=False, low=1, step=1), 'grad_clip': FloatDistribution(high=1.0, log=False, low=1.0, step=None), 'weight_init': CategoricalDistribution(choices=(True,))}, trial_id=0, value=None)
Processing parameter: qu

Epoch 1/1: 100%|██████████| 9/9 [00:58<00:00,  6.54s/batch]
[I 2025-01-20 19:08:42,296] Trial 0 finished with value: 0.2947850525379181 and parameters: {'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}. Best is trial 0 with value: 0.2947850525379181.
[I 2025-01-20 19:08:42,301] A new study created in memory with name: resnet_autoencoder_test_3/scales


Epoch 1/1 | Train Loss: 0.3687 | Val Loss: 0.2948
Train Loss: 0.3687 | Val Loss: 0.2948 | Training time: 97.08s
Best trial:FrozenTrial(number=0, state=1, values=[0.2947850525379181], datetime_start=datetime.datetime(2025, 1, 20, 19, 7, 5, 150177), datetime_complete=datetime.datetime(2025, 1, 20, 19, 8, 42, 294869), params={'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'lr': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'weight_decay': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'loss_fn': CategoricalDistribution(choices=(<class 'torch.nn.modules.loss.L1Loss'>,)), 'epochs': IntDistribution(high=1, log=False, low=1, step=1), 'grad_clip': FloatDistribution(high=1.0, log=False, low=1.0, step=None), 'weight_init': CategoricalDistribution(choices=(True,))}, trial_id=0, value=None)
Processing parameter: sca

Epoch 1/1: 100%|██████████| 9/9 [00:04<00:00,  2.00batch/s]
[I 2025-01-20 19:08:47,962] Trial 0 finished with value: 0.4677492082118988 and parameters: {'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}. Best is trial 0 with value: 0.4677492082118988.
[I 2025-01-20 19:08:47,966] A new study created in memory with name: resnet_autoencoder_test_3/opacities


Epoch 1/1 | Train Loss: 0.5551 | Val Loss: 0.4677
Train Loss: 0.5551 | Val Loss: 0.4677 | Training time: 5.53s
Best trial:FrozenTrial(number=0, state=1, values=[0.4677492082118988], datetime_start=datetime.datetime(2025, 1, 20, 19, 8, 42, 302767), datetime_complete=datetime.datetime(2025, 1, 20, 19, 8, 47, 961786), params={'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'lr': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'weight_decay': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'loss_fn': CategoricalDistribution(choices=(<class 'torch.nn.modules.loss.L1Loss'>,)), 'epochs': IntDistribution(high=1, log=False, low=1, step=1), 'grad_clip': FloatDistribution(high=1.0, log=False, low=1.0, step=None), 'weight_init': CategoricalDistribution(choices=(True,))}, trial_id=0, value=None)
Processing parameter: opa

Epoch 1/1: 100%|██████████| 9/9 [01:00<00:00,  6.71s/batch]
W0120 19:09:48.822000 852878 torch/_dynamo/convert_frame.py:844] [0/8] torch._dynamo hit config.cache_size_limit (8)
W0120 19:09:48.822000 852878 torch/_dynamo/convert_frame.py:844] [0/8]    function: 'forward' (/home/mokot/LMU/VisualRepresentationLearning/Project/models/abstract_autoencoder.py:18)
W0120 19:09:48.822000 852878 torch/_dynamo/convert_frame.py:844] [0/8]    last reason: 0/0: GLOBAL_STATE changed: grad_mode 
W0120 19:09:48.822000 852878 torch/_dynamo/convert_frame.py:844] [0/8] To log all recompilation reasons, use TORCH_LOGS="recompiles".
W0120 19:09:48.822000 852878 torch/_dynamo/convert_frame.py:844] [0/8] To diagnose recompilation issues, see https://pytorch.org/docs/main/torch.compiler_troubleshooting.html.
[I 2025-01-20 19:09:49,120] Trial 0 finished with value: 0.7375386357307434 and parameters: {'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0

Epoch 1/1 | Train Loss: 0.8341 | Val Loss: 0.7375
Train Loss: 0.8341 | Val Loss: 0.7375 | Training time: 61.08s
Best trial:FrozenTrial(number=0, state=1, values=[0.7375386357307434], datetime_start=datetime.datetime(2025, 1, 20, 19, 8, 47, 967964), datetime_complete=datetime.datetime(2025, 1, 20, 19, 9, 49, 119060), params={'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'lr': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'weight_decay': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'loss_fn': CategoricalDistribution(choices=(<class 'torch.nn.modules.loss.L1Loss'>,)), 'epochs': IntDistribution(high=1, log=False, low=1, step=1), 'grad_clip': FloatDistribution(high=1.0, log=False, low=1.0, step=None), 'weight_init': CategoricalDistribution(choices=(True,))}, trial_id=0, value=None)
Processing parameter: co

Epoch 1/1: 100%|██████████| 9/9 [00:03<00:00,  2.43batch/s]
[I 2025-01-20 19:09:53,591] Trial 0 finished with value: 0.2587871154149373 and parameters: {'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}. Best is trial 0 with value: 0.2587871154149373.


Epoch 1/1 | Train Loss: 0.3504 | Val Loss: 0.2588
Train Loss: 0.3504 | Val Loss: 0.2588 | Training time: 4.40s
Best trial:FrozenTrial(number=0, state=1, values=[0.2587871154149373], datetime_start=datetime.datetime(2025, 1, 20, 19, 9, 49, 125049), datetime_complete=datetime.datetime(2025, 1, 20, 19, 9, 53, 590883), params={'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'lr': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'weight_decay': FloatDistribution(high=0.1, log=True, low=0.1, step=None), 'loss_fn': CategoricalDistribution(choices=(<class 'torch.nn.modules.loss.L1Loss'>,)), 'epochs': IntDistribution(high=1, log=False, low=1, step=1), 'grad_clip': FloatDistribution(high=1.0, log=False, low=1.0, step=None), 'weight_init': CategoricalDistribution(choices=(True,))}, trial_id=0, value=None)
