In [1]:
# Note: This is a hack to allow importing from the parent directory
import sys
from pathlib import Path

sys.path.append(str(Path().resolve().parent))

# Note: Ignore warnings, be brave (YoLo)
import warnings

warnings.filterwarnings("ignore")

In [2]:
import torch
import optuna
import torch.nn as nn
import torch.optim as optim
from models import ConvAutoencoder
from data import CIFAR10GaussianSplatsDataset
from utils import train, transform_and_collate

results_path = Path("../logs/conv_autoencoder_test_1/")
results_path.mkdir(parents=True, exist_ok=True)

# Use one model for the whole splat
channels_dim = 23
join_mode = "concat"

train_dataset = CIFAR10GaussianSplatsDataset(
    root="../data/CIFAR10GS",
    train=True,
    init_type="grid",
)
val_dataset = CIFAR10GaussianSplatsDataset(
    root="../data/CIFAR10GS",
    val=True,
    init_type="grid",
)

In [3]:
model = ConvAutoencoder(channels_dim=channels_dim, weight_init=True)
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
    num_workers=4,
    collate_fn=lambda batch: transform_and_collate(batch, join_mode),
)
val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=4,
    collate_fn=lambda batch: transform_and_collate(batch, join_mode),
)
optimizer = optim.Adam(model.parameters(), lr=1e-1, weight_decay=1e-1)
criterion = nn.L1Loss()
epochs = 1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min", patience=5)
grad_clip = 1.0
compile_model = True

results = train(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    optimizer=optimizer,
    criterion=criterion,
    epochs=epochs,
    device=device,
    scheduler=scheduler,
    grad_clip=grad_clip,
    logger=print,
    compile_model=compile_model,
    model_path=results_path / "model.pth",
)

Epoch 1/1:   0%|          | 0/9 [00:06<?, ?batch/s]


RuntimeError: The size of tensor a (46) must match the size of tensor b (32) at non-singleton dimension 3

In [3]:
def objective(trial):
    # Define hyperparameter search space
    # lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
    # weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-3)
    # loss_fn = trial.suggest_categorical("loss_fn", [nn.L1Loss, nn.MSELoss])
    # epochs = trial.suggest_int("epochs", 10, 100, 50)
    # grad_clip = trial.suggest_uniform("grad_clip", 0.5, 2.0)
    # weight_init = trial.suggest_categorical("weight_init", [True, False])
    lr = trial.suggest_loguniform("lr", 1e-1, 1e-1)
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-1, 1e-1)
    loss_fn = trial.suggest_categorical("loss_fn", [nn.L1Loss])
    epochs = trial.suggest_int("epochs", 1, 1, 1)
    grad_clip = trial.suggest_uniform("grad_clip", 1.0, 1.0)
    weight_init = trial.suggest_categorical("weight_init", [True])

    # Define train parameters
    model = ConvAutoencoder(channels_dim=channels_dim, weight_init=weight_init)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=32,
        shuffle=True,
        num_workers=4,
        collate_fn=lambda batch: transform_and_collate(batch, join_mode),
    )
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=32,
        shuffle=False,
        num_workers=4,
        collate_fn=lambda batch: transform_and_collate(batch, join_mode),
    )
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = loss_fn()
    epochs = epochs
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min", patience=5)
    grad_clip = grad_clip
    compile_model = True

    results = train(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        optimizer=optimizer,
        criterion=criterion,
        epochs=epochs,
        device=device,
        scheduler=scheduler,
        grad_clip=grad_clip,
        logger=print,
        compile_model=compile_model,
        model_path=results_path / "model.pth",
    )
    return results["val_loss"][-1]


# Run hyperparameter search
study = optuna.create_study(direction="minimize", study_name="conv_autoencoder_test_1")
# study.optimize(objective, n_trials=100)
study.optimize(objective, n_trials=1)
print(f"Best trial:{study.best_trial}")
with open(results_path / "best_trial.txt", "w") as f:
    f.write(f"Best trial:{study.best_trial}")

[I 2025-01-19 17:35:38,021] A new study created in memory with name: no-name-ce863403-cc72-4856-86be-f7a62b036889
Epoch 1/1:   0%|          | 0/9 [00:30<?, ?batch/s]
[W 2025-01-19 17:36:09,881] Trial 0 failed with parameters: {'lr': 0.1, 'weight_decay': 0.1, 'loss_fn': <class 'torch.nn.modules.loss.L1Loss'>, 'epochs': 1, 'grad_clip': 1.0, 'weight_init': True} because of the following error: RuntimeError('The size of tensor a (46) must match the size of tensor b (32) at non-singleton dimension 3').
Traceback (most recent call last):
  File "/home/mokot/LMU/VisualRepresentationLearning/Project/.venv/lib/python3.11/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_617941/136306747.py", line 40, in objective
    results = train(
              ^^^^^^
  File "/home/mokot/LMU/VisualRepresentationLearning/Project/utils/autoencoder.py", line 72, in train
    loss = criterion(x_hat, x)
    

RuntimeError: The size of tensor a (46) must match the size of tensor b (32) at non-singleton dimension 3