In [None]:
# Hack to always autoreload modules and avoid restarting the kernel each time

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

In [1]:
# Note: This is a hack to allow importing from the parent directory
import sys
from pathlib import Path

sys.path.append(str(Path().resolve().parent))

In [3]:
import optuna
import torch
from models.gaussian_image_trainer import GaussianImageTrainer
from configs import Config
from utils.search_space import is_valid_combination
import torchvision
from constants import CIFAR10_TRANSFORM
from utils.data import create_default_image
from utils import visualize_tensor
import logging
import sys

# Only used one image for the first tuning
dataset = torchvision.datasets.CIFAR10(root="./data", download=True)
# cifar_image = CIFAR10_TRANSFORM(dataset[2][0])
cifar_imgs = [CIFAR10_TRANSFORM(dataset[i][0]) for i in range(4)]
visualize_tensor(cifar_imgs[1])

Files already downloaded and verified


Error: no "view" rule for type "image/png" passed its test case
       (for more information, add "--debug=1" on the command line)


In [28]:
# New experiments with some key learnings:]
# 1. LR 0.01 seems to be the best.
# 2. Max Steps 2000 is better than 1000, larger values are not much better.
# 3. Init Type: KNN is the best, Grid is still interesting for other reasons (AE), don't use random. 
# 4. Model Type: 2dgs is superior
# 5. More points is better, but 32x32 is already very good.
# 6. Init Scale: 2.0 -> 3.0 is also good
# 7. Init Opacity: 0.5 -> when Opacity Regularization is None, other init opacities also worked well
# 8. Opacity Regularization: None -> very clear winner
# 9. Scale Regularization: 0.1
# 10. Extent: 1.0 -> 2.0 and 4.0 are very similar also, could pick any

# Add stream handler of stdout to show the messages
optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))
study_name_grid = "study-w-grid-3"  # Unique identifier of the study.
storage_name_grid = "sqlite:///{}.db".format(study_name_grid)
study_w_grid = optuna.create_study(study_name=study_name_grid, storage=storage_name_grid, load_if_exists=True)

[I 2025-01-09 03:18:46,129] A new study created in RDB with name: study-w-grid-3


A new study created in RDB with name: study-w-grid-3
A new study created in RDB with name: study-w-grid-3


In [29]:
def objective_w_grid_3(trial: optuna.Trial) -> float:
    """
    Objective function for Optuna hyperparameter optimization.

    Args:
        trial (optuna.Trial): The trial object used to suggest hyperparameters.

    Returns:
        float: The evaluation metric to minimize (e.g., validation loss).
    """
    # Conditional sampling
    # group_optimization = trial.suggest_categorical("group_optimization", [True, False])
    # strategy = None
    group_optimization = True # For now, as having default strategy was causing an error
    if not group_optimization:
        strategy = trial.suggest_categorical("strategy", ["default", "mcmc"])

    # Hyperparameter suggestions
    loss_weights = [
        1/3,
        1/3,
        1/3,
    ]

    scale_regularization = 0.1
    opacity_regularization = None

    # init_type = trial.suggest_categorical("init_type", ["grid", "knn"])
    init_type = "grid"  # Focus on the option with a fixed grid for this study
    num_points = 32*32
    extent = trial.suggest_categorical("extent", [1.0, 2.0, 4.0])
    init_scale = 2.0
    init_opacity = 0.5

    max_steps = 1000
    learning_rate = 0.01

    model_type = "2dgs"  # Left out "2dgs-inria" as it needed further dependencies
    bilateral_grid = False

    image_index = trial.suggest_categorical("image_index", [0, 1, 2, 3])

    # Added these hyperparameters:
    sparse_gradient = trial.suggest_categorical("sparse_gradient", [True, False])
    # normal_loss_weight = trial.suggest_float("normal_loss_weight", 0.1, 1.0, log=True)
    # distortion_loss_weight = trial.suggest_float("distortion_loss_weight", 0.1, 1.0, log=True)
    sh_degree = trial.suggest_categorical("sh_degree", [0, 1, 2, 3])  # I guess this is correct as it seems to behave as a boolean

    if not is_valid_combination({
        "learning_rate": learning_rate,
        "loss_weights": loss_weights,
        "group_optimization": group_optimization,
        # "strategy": strategy,
        "learning_rate": learning_rate,
        "model_type": model_type,
        "bilateral_grid": bilateral_grid,
        # Added these:
        "sparse_gradient": sparse_gradient,
        #"normal_loss_weight": normal_loss_weight,
        #"distortion_loss_weight": distortion_loss_weight,
        "sh_degree": sh_degree,
    }, GaussianImageTrainer):
        raise optuna.exceptions.TrialPruned("Invalid hyperparameter combination")

    # Create Config object
    cfg = Config(
        save_results=False,
        save_logs=False,
        seed=42,
        image=cifar_imgs[image_index],  # Replace with actual ground truth image tensor
        max_steps=max_steps,
        learning_rate=learning_rate,
        loss_weights=loss_weights,
        init_type=init_type,
        num_points=num_points,
        init_scale=init_scale,
        init_opacity=init_opacity,
        scale_regularization=scale_regularization,
        opacity_regularization=opacity_regularization,
        extent=extent,
        group_optimization=group_optimization,
        # Added these:
        sparse_gradient=sparse_gradient,
        #normal_loss_weight=normal_loss_weight,
        #distortion_loss_weight=distortion_loss_weight,
        sh_degree=sh_degree,
        # strategy=strategy,
        model_type=model_type,
        # bilateral_grid=bilateral_grid,
        bilateral_grid=False
    )

    # Initialize and train the model
    trainer = GaussianImageTrainer(cfg)
    result = trainer.train()

    # Evaluate generated image quality (e.g., L1 loss with ground truth)
    generated_image = result.cpu()  # Replace with actual rendered output
    ground_truth_image = cfg.image.cpu()
    # evaluation_metric = torch.nn.functional.l1_loss(generated_image, ground_truth_image)  # Old version
    evaluation_metric = trainer.l1_loss * 1/3 + trainer.mse_loss * 1/3 + trainer.ssim_loss * 1/3  # Simply take average as loss

    return evaluation_metric.item()

In [30]:
study_w_grid.optimize(objective_w_grid_3, n_trials=1000, n_jobs=1, timeout=7200)

Model initialized. Number of Gaussians: 1024




Loss: 0.049 (L1: 0.014, MSE: 0.000, SSIM: 0.058): 100%|[36m██████[0m| 1000/1000 [00:06<00:00, 159.47step/s][0m
[I 2025-01-09 03:18:57,575] Trial 0 finished with value: 0.023919718340039253 and parameters: {'extent': 2.0, 'image_index': 3, 'sparse_gradient': False, 'sh_degree': 0}. Best is trial 0 with value: 0.023919718340039253.


Final loss: 0.048982612788677216
Total Time: Rasterization: 0.973s, Backward: 2.486s
Trial 0 finished with value: 0.023919718340039253 and parameters: {'extent': 2.0, 'image_index': 3, 'sparse_gradient': False, 'sh_degree': 0}. Best is trial 0 with value: 0.023919718340039253.
Trial 0 finished with value: 0.023919718340039253 and parameters: {'extent': 2.0, 'image_index': 3, 'sparse_gradient': False, 'sh_degree': 0}. Best is trial 0 with value: 0.023919718340039253.
Model initialized. Number of Gaussians: 1024


Training Progress:   0%|[36m                                                 [0m| 0/1000 [00:00<?, ?step/s][0m
[W 2025-01-09 03:18:58,974] Trial 1 failed with parameters: {'extent': 1.0, 'image_index': 2, 'sparse_gradient': True, 'sh_degree': 0} because of the following error: RuntimeError('Sparse division requires a scalar or zero-dim dense tensor divisor (got shape [1024, 1] for divisor)').
Traceback (most recent call last):
  File "/home/h/harjesruiloba/miniforge3/envs/nerfstudio/lib/python3.8/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "/tmp/user/22473/ipykernel_2101078/1871683771.py", line 95, in objective_w_grid_3
    result = trainer.train()
  File "/home/h/harjesruiloba/Projects/visual-representation-learning/models/gaussian_image_trainer.py", line 568, in train
    start = time.time()
  File "/home/h/harjesruiloba/miniforge3/envs/nerfstudio/lib/python3.8/site-packages/torch/_tensor.py", line 492, in backward
   

Trial 1 failed with parameters: {'extent': 1.0, 'image_index': 2, 'sparse_gradient': True, 'sh_degree': 0} because of the following error: RuntimeError('Sparse division requires a scalar or zero-dim dense tensor divisor (got shape [1024, 1] for divisor)').
Traceback (most recent call last):
  File "/home/h/harjesruiloba/miniforge3/envs/nerfstudio/lib/python3.8/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "/tmp/user/22473/ipykernel_2101078/1871683771.py", line 95, in objective_w_grid_3
    result = trainer.train()
  File "/home/h/harjesruiloba/Projects/visual-representation-learning/models/gaussian_image_trainer.py", line 568, in train
    start = time.time()
  File "/home/h/harjesruiloba/miniforge3/envs/nerfstudio/lib/python3.8/site-packages/torch/_tensor.py", line 492, in backward
    torch.autograd.backward(
  File "/home/h/harjesruiloba/miniforge3/envs/nerfstudio/lib/python3.8/site-packages/torch/autograd/__init__.py", li

[W 2025-01-09 03:18:59,011] Trial 1 failed with value None.


Trial 1 failed with value None.
Trial 1 failed with value None.


RuntimeError: Sparse division requires a scalar or zero-dim dense tensor divisor (got shape [1024, 1] for divisor)