# Record3D Point Cloud Visualization

This notebook visualizes combined point clouds from all cameras to check alignment.


In [2]:
%load_ext autoreload
%autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
!pip install --upgrade setuptools wheel
!pip install -q scikit-learn matplotlib
!pip install -q plotly
!pip install -q open3d --ignore-installed blinker
!pip install -q --upgrade Pillow
!pip install -q plotly
!pip install packaging==21.3 --force-reinstall --no-deps
!pip install -q gsplat
!pip install -q pytorch-msssim


    
import sys
import os
from pathlib import Path
import numpy as np
import torch

path = os.getcwd() + "workspace/Home_Reconstruction/scene/"
sys.path.append(path)
path = os.getcwd() + "workspace/Home_Reconstruction/"
sys.path.append(path)

from data_loaders.record3d_loader import Record3DScene
from objectgs_model import ObjectGSModel

# For notebook visualization
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline

[0mCollecting packaging==21.3
  Using cached packaging-21.3-py3-none-any.whl.metadata (15 kB)
Using cached packaging-21.3-py3-none-any.whl (40 kB)
Installing collected packages: packaging
  Attempting uninstall: packaging
    Found existing installation: packaging 25.0
    Uninstalling packaging-25.0:
      Successfully uninstalled packaging-25.0
Successfully installed packaging-21.3
[0mJupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


## Load Scene


In [4]:
scene_path = Path("/workspace/Home_Reconstruction/data_scenes/maria bedroom")

#Load Scene
scene = Record3DScene(
    scene_path=scene_path,
)


# Create ObjectGS model from scene point cloud
model = ObjectGSModel(
    point_cloud=scene.points,
    colors=scene.colors,
    object_ids=None,  # All set to 1
    voxel_size=0.08,
    k=10
)

Loaded metadata from /workspace/Home_Reconstruction/data_scenes/maria bedroom/EXR_RGBD/metadata.json
  Image dimensions: 720x960
  Number of frames: 9720

Dataset split:
  Training: 486 frames
  Testing: 97 frames

Creating 486 cameras...
Created 486 cameras

Creating 97 cameras...
Created 97 cameras

Loading point cloud...
Loading point cloud from /workspace/Home_Reconstruction/data_scenes/maria bedroom/processed.ply
  Loaded 533,494 points
Scene initialized with 533,494 points

Initializing ObjectGS model (FIXED VERSION)
  Point cloud: 533,494 points
  Voxel size: 0.08m
  Gaussians per anchor: 10
  Number of objects: 2
  Created 8162 anchors
  Total Gaussians: 81,620
  ✓ Initialized scales to SMALL (~0.018)
  ✓ Initialized opacities to LOW (0.1) - KEY FIX!
  ✓ Initialized colors to preserve point cloud colors
  ✓ Pre-computed anchor data



In [4]:
# In your notebook, run this to reload:
import importlib
import vis_tools.pc_viewer
importlib.reload(vis_tools.pc_viewer)
from vis_tools.pc_viewer import *

# Now it should work:
visualize_pointclouds(
    "/workspace/Home_Reconstruction/data_scenes/maria bedroom",
    ["processed.ply"]
)


Loading 1 point cloud(s) from maria bedroom

Loading processed.ply...
  ✓ 533,494 points




Summary:
processed.ply                     533,494 points (150,000 displayed)



In [16]:
import sys
from pathlib import Path
sys.path.insert(0, '/workspace/Home_Reconstruction')

from data_loaders.record3d_loader import Record3DScene
from scene.objectgs_model import ObjectGSModel
import importlib
from scene import train
import gsplat

# Reload the module
importlib.reload(train)

# Import from reloaded module
from scene.train import GaussianTrainer

PROJECT_ROOT = Path('/workspace/Home_Reconstruction')

In [10]:
# # Load scene
scene = Record3DScene(
    scene_path=Path("/workspace/Home_Reconstruction/data_scenes/maria bedroom")
)




Loaded metadata from /workspace/Home_Reconstruction/data_scenes/maria bedroom/EXR_RGBD/metadata.json
  Image dimensions: 720x960
  Number of frames: 9720

Dataset split:
  Training: 486 frames
  Testing: 97 frames

Creating 486 cameras...
Created 486 cameras

Creating 97 cameras...
Created 97 cameras

Loading point cloud...
Loading point cloud from /workspace/Home_Reconstruction/data_scenes/maria bedroom/processed.ply
  Loaded 533,494 points
Scene initialized with 533,494 points

Initializing ObjectGS model (FIXED VERSION)
  Point cloud: 533,494 points
  Voxel size: 0.08m
  Gaussians per anchor: 10
  Number of objects: 2
  Created 8162 anchors
  Total Gaussians: 81,620
  ✓ Initialized scales to SMALL (~0.018)
  ✓ Initialized opacities to LOW (0.1) - KEY FIX!
  ✓ Initialized colors to preserve point cloud colors
  ✓ Pre-computed anchor data



In [17]:
# Create model
model = ObjectGSModel(
    point_cloud=scene.points,
    colors=scene.colors,
    voxel_size=0.02,
    k=10
)



Initializing ObjectGS model (FIXED VERSION)
  Point cloud: 533,494 points
  Voxel size: 0.02m
  Gaussians per anchor: 10
  Number of objects: 2
  Created 135769 anchors
  Total Gaussians: 1,357,690
  ✓ Initialized scales to SMALL (~0.018)
  ✓ Initialized opacities to LOW (0.1) - KEY FIX!
  ✓ Initialized colors to preserve point cloud colors
  ✓ Pre-computed anchor data



In [None]:
# Config
config = {
    'lr': 0.001,
    'lr_position': 0.00016,
    'lr_feature': 0.0025,
    'lr_opacity': 0.05,
    'lr_scaling': 0.001,  # REDUCE from 0.005 (key change!)
    
    'num_iterations': 5000,
    'save_interval': 1000,
    'test_interval': 100,
    'log_interval': 100,
    
    'prune_interval': 250,
    'prune_opacity_threshold': 0.001,
    'prune_scale_threshold': 0.1,  # LOWER from 0.1 (prune large Gaussians)
    
    # CRITICAL: Increase volume regularization
    'lambda_ssim': 0.2,
    'lambda_vol': 0.001,  # INCREASE from 0.00002 (5x stronger!)
    
    'checkpoint_dir': str(PROJECT_ROOT / 'checkpoints'),
    'output_dir': str(PROJECT_ROOT / 'outputs'),
}

model = model.to('cuda')
trainer = GaussianTrainer(model, scene, config)

print("\n" + "="*60)
print("SCENE STATISTICS")
print("="*60)
params = model.get_parameters_as_tensors()
print(f"Total Gaussians:    {params['num_gaussians']:,}")
print(f"Total Anchors:      {params['num_anchors']:,}")
print(f"Image resolution:   {scene.train_cameras[0].image_width} x {scene.train_cameras[0].image_height}")
print(f"Train cameras:      {len(trainer.train_cameras)}")
print(f"Test cameras:       {len(trainer.test_cameras)}")
print(f"GPU:                {torch.cuda.get_device_name(0)}")
print(f"Estimated time:     ~2 hours")
print("="*60 + "\n")

# Train
trainer.train()

# Render tests
trainer.render_test_sequence(
    output_dir=str(PROJECT_ROOT / 'final_test_renders')  # Explicit path
)
# Or render single view
trainer.render_single(scene.test_cameras[0], save_path="test_output_render.png")

Pre-computing camera matrices...
✓ Camera data cached on GPU
✓ Optimizer created with 4 parameter groups

Trainer initialized (FULLY OPTIMIZED)
Device: cuda
Train cameras: 486
Test cameras: 97
Optimizer: CONSOLIDATED (4 groups)
Lambda vol: 0.001
Pruning: Every 250 iters


SCENE STATISTICS
Total Gaussians:    1,357,690
Total Anchors:      135,769
Image resolution:   720 x 960
Train cameras:      486
Test cameras:       97
GPU:                NVIDIA GeForce RTX 4090
Estimated time:     ~2 hours

Starting training...



Loss: 0.2993 | L1: 0.2238:   2%|▏         | 99/5000 [17:29<14:00:30, 10.29s/it]

Iter 100: Loss=0.2993, L1=0.2238, Vol=0.000349 | Gaussians=1,357,690, Opacity=0.161, Scale(mean=0.0996, max=1.1516)

[Iter 100] Evaluating...
Test loss: 0.1758
Saved: /workspace/Home_Reconstruction/outputs/test_iter_100.png



Loss: 0.2188 | L1: 0.1667:   4%|▍         | 199/5000 [34:45<13:48:26, 10.35s/it]

Iter 200: Loss=0.2188, L1=0.1667, Vol=0.000085 | Gaussians=1,357,690, Opacity=0.141, Scale(mean=0.0719, max=0.9762)

[Iter 200] Evaluating...
Test loss: 0.1784
Saved: /workspace/Home_Reconstruction/outputs/test_iter_200.png



Loss: 0.2295 | L1: 0.1859:   5%|▍         | 249/5000 [43:24<13:48:42, 10.47s/it]

✓ Optimizer created with 4 parameter groups


Loss: 0.2295 | L1: 0.1859:   5%|▌         | 250/5000 [43:47<23:15:08, 17.62s/it]

  Pruned 29808 anchors (459675 Gaussians)


Loss: 0.2829 | L1: 0.2348:   6%|▌         | 299/5000 [51:16<11:27:53,  8.78s/it]

Iter 300: Loss=0.2829, L1=0.2348, Vol=0.000348 | Gaussians=1,059,610, Opacity=0.152, Scale(mean=0.1041, max=1.0526)

[Iter 300] Evaluating...
Test loss: 0.1708
Saved: /workspace/Home_Reconstruction/outputs/test_iter_300.png



Loss: 0.2894 | L1: 0.2079:   8%|▊         | 399/5000 [1:06:09<11:11:09,  8.75s/it]

Iter 400: Loss=0.2894, L1=0.2079, Vol=0.000283 | Gaussians=1,059,610, Opacity=0.153, Scale(mean=0.0990, max=2.1659)

[Iter 400] Evaluating...
Test loss: 0.1696
Saved: /workspace/Home_Reconstruction/outputs/test_iter_400.png



Loss: 0.2229 | L1: 0.1622:  10%|▉         | 499/5000 [1:21:08<11:28:48,  9.18s/it]

✓ Optimizer created with 4 parameter groups
  Pruned 41998 anchors (625488 Gaussians)


Loss: 0.2229 | L1: 0.1622:  10%|▉         | 499/5000 [1:21:34<11:28:48,  9.18s/it]

Iter 500: Loss=0.2229, L1=0.1622, Vol=0.000426 | Gaussians=639,630, Opacity=0.145, Scale(mean=0.0732, max=0.2501)

[Iter 500] Evaluating...


Loss: 0.2229 | L1: 0.1622:  10%|█         | 500/5000 [1:21:35<21:20:39, 17.08s/it]

Test loss: 0.2040
Saved: /workspace/Home_Reconstruction/outputs/test_iter_500.png



Loss: 0.3507 | L1: 0.2612:  11%|█         | 548/5000 [1:26:25<7:35:19,  6.14s/it] 

In [1]:
"""
COMPREHENSIVE SPEED DIAGNOSTIC - Run this in Jupyter

Copy-paste this entire cell and run it to find the real bottleneck
"""

import torch
import time
import numpy as np
import gsplat

print("="*70)
print("SPEED DIAGNOSTIC FOR OBJECTGS")
print("="*70)
print(f"GPU: {torch.cuda.get_device_name(0)}")
print(f"PyTorch: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"gsplat version: {gsplat.__version__ if hasattr(gsplat, '__version__') else 'unknown'}")
print("="*70)

# ============================================================
# TEST 1: Pure gsplat rendering speed
# ============================================================
print("\n[TEST 1] Pure gsplat rendering (81K Gaussians, 720x960)")
print("-"*70)

device = 'cuda'
num_gaussians = 81620
width, height = 720, 960

# Create dummy data
means = torch.randn(num_gaussians, 3, device=device) * 2
quats = torch.randn(num_gaussians, 4, device=device)
quats = quats / quats.norm(dim=-1, keepdim=True)
scales = torch.rand(num_gaussians, 3, device=device) * 0.05
opacities = torch.rand(num_gaussians, device=device) * 0.5
colors = torch.rand(num_gaussians, 3, device=device)

viewmat = torch.eye(4, device=device).unsqueeze(0)
K = torch.tensor([[1000, 0, width/2], [0, 1000, height/2], [0, 0, 1]], 
                 device=device, dtype=torch.float32).unsqueeze(0)

# Warmup
for _ in range(3):
    renders, _, _ = gsplat.rasterization(
        means=means, quats=quats, scales=scales,
        opacities=opacities, colors=colors,
        viewmats=viewmat, Ks=K,
        width=width, height=height, packed=False
    )
torch.cuda.synchronize()

# Benchmark
times = []
for _ in range(10):
    torch.cuda.synchronize()
    start = time.time()
    renders, _, _ = gsplat.rasterization(
        means=means, quats=quats, scales=scales,
        opacities=opacities, colors=colors,
        viewmats=viewmat, Ks=K,
        width=width, height=height, packed=False
    )
    torch.cuda.synchronize()
    times.append((time.time() - start) * 1000)

gsplat_time = np.mean(times)
print(f"Average render time: {gsplat_time:.1f}ms (±{np.std(times):.1f}ms)")
print(f"Expected: <150ms | Status: {'✓ GOOD' if gsplat_time < 150 else '✗ SLOW!'}")

# ============================================================
# TEST 2: Parameter fetching from model
# ============================================================
print("\n[TEST 2] Parameter fetching (get_parameters_as_tensors)")
print("-"*70)

# Warmup
for _ in range(3):
    params = model.get_parameters_as_tensors()

# Benchmark
times = []
for _ in range(10):
    start = time.time()
    params = model.get_parameters_as_tensors()
    times.append((time.time() - start) * 1000)

param_time = np.mean(times)
print(f"Average fetch time: {param_time:.1f}ms (±{np.std(times):.1f}ms)")
print(f"Expected: <100ms | Status: {'✓ GOOD' if param_time < 100 else '✗ SLOW!'}")

# ============================================================
# TEST 3: Actual rendering with your model
# ============================================================
print("\n[TEST 3] Model rendering (your actual render function)")
print("-"*70)

camera = scene.train_cameras[0]

# Warmup
for _ in range(3):
    rendered, info = trainer.render(camera)
torch.cuda.synchronize()

# Benchmark
times = []
for _ in range(10):
    torch.cuda.synchronize()
    start = time.time()
    rendered, info = trainer.render(camera)
    torch.cuda.synchronize()
    times.append((time.time() - start) * 1000)

render_time = np.mean(times)
print(f"Average render time: {render_time:.1f}ms (±{np.std(times):.1f}ms)")
print(f"Expected: <300ms | Status: {'✓ GOOD' if render_time < 300 else '✗ SLOW!'}")

# ============================================================
# TEST 4: SSIM computation
# ============================================================
print("\n[TEST 4] SSIM loss computation")
print("-"*70)

from pytorch_msssim import ssim as ssim_func

rendered = trainer.render(camera)[0]
gt_image = camera._gt_image_gpu

# Warmup
for _ in range(3):
    _ = ssim_func(rendered.unsqueeze(0), gt_image.unsqueeze(0), 
                  data_range=1.0, size_average=True)
torch.cuda.synchronize()

# Benchmark
times = []
for _ in range(10):
    torch.cuda.synchronize()
    start = time.time()
    ssim_val = ssim_func(rendered.unsqueeze(0), gt_image.unsqueeze(0), 
                        data_range=1.0, size_average=True)
    torch.cuda.synchronize()
    times.append((time.time() - start) * 1000)

ssim_time = np.mean(times)
print(f"Average SSIM time: {ssim_time:.1f}ms (±{np.std(times):.1f}ms)")
print(f"Expected: <200ms | Status: {'✓ GOOD' if ssim_time < 200 else '✗ SLOW!'}")

# ============================================================
# TEST 5: Backward pass
# ============================================================
print("\n[TEST 5] Backward pass")
print("-"*70)

import torch.nn.functional as F

# Run forward pass
params = model.get_parameters_as_tensors()
rendered, _ = trainer.render(camera)
loss = F.l1_loss(rendered, camera._gt_image_gpu)

# Clear any existing gradients
for opt in trainer.optimizers.values():
    opt.zero_grad()

# Benchmark backward
torch.cuda.synchronize()
start = time.time()
loss.backward()
torch.cuda.synchronize()
backward_time = (time.time() - start) * 1000

print(f"Backward pass time: {backward_time:.1f}ms")
print(f"Expected: <400ms | Status: {'✓ GOOD' if backward_time < 400 else '✗ SLOW!'}")

# ============================================================
# TEST 6: Optimizer step
# ============================================================
print("\n[TEST 6] Optimizer step")
print("-"*70)

torch.cuda.synchronize()
start = time.time()
for opt in trainer.optimizers.values():
    opt.step()
torch.cuda.synchronize()
opt_time = (time.time() - start) * 1000

print(f"Optimizer step time: {opt_time:.1f}ms")
print(f"Expected: <150ms | Status: {'✓ GOOD' if opt_time < 150 else '✗ SLOW!'}")

# ============================================================
# TEST 7: Full training iteration
# ============================================================
print("\n[TEST 7] Complete training iteration")
print("-"*70)

times = []
for _ in range(5):
    torch.cuda.synchronize()
    start = time.time()
    losses = trainer.train_step()
    torch.cuda.synchronize()
    times.append((time.time() - start) * 1000)

full_iter_time = np.mean(times)
print(f"Average full iteration: {full_iter_time:.1f}ms (±{np.std(times):.1f}ms)")
print(f"Expected: <800ms | Status: {'✓ GOOD' if full_iter_time < 800 else '✗ SLOW!'}")

# ==

SPEED DIAGNOSTIC FOR OBJECTGS
GPU: NVIDIA GeForce RTX 4090
PyTorch: 2.4.1+cu124
CUDA Available: True
gsplat version: 1.5.3

[TEST 1] Pure gsplat rendering (81K Gaussians, 720x960)
----------------------------------------------------------------------


If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].


Average render time: 0.3ms (±0.0ms)
Expected: <150ms | Status: ✓ GOOD

[TEST 2] Parameter fetching (get_parameters_as_tensors)
----------------------------------------------------------------------


NameError: name 'model' is not defined

In [13]:
import importlib
import quick_diagnostic

# Reload the MODULE
importlib.reload(quick_diagnostic)

# Call the function with new name
quick_diagnostic.quick_diagnostic_test(scene, model)

# Check the INPUT point cloud colors
import numpy as np

pc_colors = scene.colors
print(f"Point cloud color mean: {pc_colors.mean(axis=0)}")
print(f"Point cloud color std: {pc_colors.std()}")
print(f"Point cloud color range: [{pc_colors.min():.3f}, {pc_colors.max():.3f}]")

# Show a sample
print(f"\nFirst 5 point colors:")
print(pc_colors[:5])


QUICK PRE-TRAINING DIAGNOSTIC (ENHANCED)

1. COLOR INITIALIZATION
------------------------------------------------------------
   Color std: 0.1800
   Color mean: [0.519, 0.476, 0.424]
   PASS: Colors have variation

2. OPACITY INITIALIZATION
------------------------------------------------------------
   Opacity mean: 0.246
   Visible (>0.5): 27.1%
   FAIL: Most Gaussians are transparent

3. COORDINATE SYSTEM ANALYSIS (DETAILED)

POINT CLOUD:
  Center: [0.95, 0.15, -0.98]
  Min:    [-0.75, -1.16, -3.11]
  Max:    [2.32, 1.78, 1.52]
  Extent: [3.07, 2.94, 4.63]

CAMERA (ORIGINAL):
  Position: [-0.71, 0.15, 0.86]
  Right:    [0.70, 0.50, -0.51]
  Up:       [0.02, 0.70, 0.72]
  Forward:  [0.71, -0.51, 0.48]
  Distance to PC center: 2.47

HAS get_opencv_viewmat(): True

----------------------------------------------------------------------
METHOD 1: Original (OpenGL style)
----------------------------------------------------------------------
w2c matrix:
[[ 6.98995233e-01  2.04177722e-02

In [14]:
# Run this RIGHT NOW on your current model
import importlib
import quick_diagnostic

# Reload the MODULE
importlib.reload(quick_diagnostic)
from quick_diagnostic import diagnose_training_degradation

print("=== CURRENT STATE (Iteration 400) ===")
result_400 = diagnose_training_degradation(model, "diagnostic_400.png")

# Compare to expected initialization values
print("\n" + "="*70)
print("COMPARISON TO EXPECTED INITIALIZATION")
print("="*70)

# Based on your fixed model initialization:
expected_opacity_mean = 0.90  # You initialized to 0.9
expected_opacity_std = 0.05   # Should have some variation
expected_color_std = 0.19     # From your point cloud
expected_scale_mean = 0.05    # exp(-3.0)

print(f"\nOpacity mean:  {result_400['opacity_mean']:.4f}  (expected ~{expected_opacity_mean:.2f})")
print(f"Opacity std:   {result_400['opacity_std']:.4f}  (expected >{expected_opacity_std:.2f})")
print(f"Color std:     {result_400['color_std']:.4f}  (expected ~{expected_color_std:.2f})")
print(f"Scale mean:    {result_400['scale_mean']:.4f}  (expected ~{expected_scale_mean:.2f})")

print("\n" + "="*70)
print("DEGRADATION ANALYSIS")
print("="*70)

if result_400['opacity_std'] < 0.05:
    print("❌ CONFIRMED: Opacity uniformity is the problem!")
    print("   Opacities converged from varied to uniform")
    print("   → This causes the blob at iteration 400")
elif result_400['color_std'] < 0.10:
    print("❌ CONFIRMED: Color convergence is the problem!")
    print("   Colors converged to mean beige")
else:
    print("⚠️  Something else is wrong - check the diagnostic plots")

print("="*70)

=== CURRENT STATE (Iteration 400) ===

TRAINING DEGRADATION DIAGNOSTIC

1. OPACITY DISTRIBUTION
----------------------------------------------------------------------
   Mean: 0.2461
   Std:  0.3017
   Min:  0.0003
   Max:  0.9055

   Distribution:
   [0.0-0.1]:  57.2% ████████████████████████████
   [0.1-0.2]:   7.5% ███
   [0.2-0.3]:   2.9% █
   [0.3-0.4]:   1.6% 
   [0.4-0.5]:   3.6% █
   [0.5-0.6]:   7.3% ███
   [0.6-0.7]:   5.1% ██
   [0.7-0.8]:   6.9% ███
   [0.8-0.9]:   7.8% ███
   [0.9-1.0]:   0.0% 

   DIAGNOSIS:
   ✓ OK: Opacities have variation (std=0.3017)

2. COLOR VARIATION
----------------------------------------------------------------------
   Color std: 0.1800
   Color mean: [0.519, 0.476, 0.424]
   Per-channel std: R=0.1747, G=0.1750, B=0.1774

   DIAGNOSIS:
   ✓ OK: Colors still have variation (std=0.1800)

3. SCALE DISTRIBUTION
----------------------------------------------------------------------
   Scale mean: 0.0821
   Scale std:  0.0359
   Scale max:  0.6755

 

In [22]:
import torch
import gsplat

# Get a test camera
camera = scene.test_cameras[0]

# Get model params
params = model.get_parameters_as_tensors()
means = params['pos']
opacities = torch.sigmoid(params['opacity_raw']).squeeze(-1)
scales = torch.exp(params['scale_raw'])
quats = params['rotation']
colors = params['color']

# Test 4 different transforms
transforms_to_test = {
    "w2c.T (original with .T)": camera.world_view_transform.T,
    "w2c.T (no .T)": camera.world_view_transform,
    "c2w": camera.c2w,
    "c2w.T": camera.c2w.T,
}

K = torch.tensor([
    [camera.fx, 0, camera.cx],
    [0, camera.fy, camera.cy],
    [0, 0, 1]
], device='cuda', dtype=torch.float32)

print("Testing different transforms:")
print("="*70)

for name, viewmat in transforms_to_test.items():
    viewmat = viewmat.to('cuda')
    
    try:
        renders, alphas, info = gsplat.rasterization(
            means=means,
            quats=quats,
            scales=scales,
            opacities=opacities,
            colors=colors,
            viewmats=viewmat.unsqueeze(0),
            Ks=K.unsqueeze(0),
            width=camera.image_width,
            height=camera.image_height,
            packed=False
        )
        
        render_sum = renders.sum().item()
        render_mean = renders.mean().item()
        render_max = renders.max().item()
        
        # Check how many Gaussians are in front
        means_cam = torch.matmul(means, viewmat[:3, :3].T) + viewmat[3, :3]
        num_front = (means_cam[:, 2] > 0).sum().item()
        percent_front = 100 * num_front / len(means)
        
        print(f"\n{name:30s}:")
        print(f"  Render: sum={render_sum:>10.2f}, mean={render_mean:.6f}, max={render_max:.6f}")
        print(f"  Gaussians in front: {num_front:>6d}/{len(means)} ({percent_front:>5.1f}%)")
        
        if render_sum > 100.0:  # Significant render output
            print(f"  ✓✓✓ THIS ONE WORKS! ✓✓✓")
            
    except Exception as e:
        print(f"\n{name:30s}: ERROR - {str(e)[:50]}")

print("="*70)

Testing different transforms:

w2c.T (original with .T)      :
  Render: sum=      0.00, mean=0.000000, max=0.000000
  Gaussians in front:   8219/81620 ( 10.1%)

w2c.T (no .T)                 :
  Render: sum=1271199.50, mean=0.613040, max=0.782178
  Gaussians in front:  16905/81620 ( 20.7%)
  ✓✓✓ THIS ONE WORKS! ✓✓✓

c2w                           :
  Render: sum=1233289.38, mean=0.594758, max=0.934704
  Gaussians in front:  41338/81620 ( 50.6%)
  ✓✓✓ THIS ONE WORKS! ✓✓✓

c2w.T                         :
  Render: sum=  18039.07, mean=0.008699, max=0.429268
  Gaussians in front:  30723/81620 ( 37.6%)
  ✓✓✓ THIS ONE WORKS! ✓✓✓


In [8]:
import time
import torch
import torch.nn.functional as F

# Check if CUDA is available
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")

# Move model to GPU explicitly
model = model.to('cuda')
trainer.model = trainer.model.to('cuda')
trainer.device = torch.device('cuda')

# Verify it worked
print(f"\nModel device: {model.device}")
print(f"First anchor device: {model.anchors[0].position.device}")
print(f"MLP device: {next(model.attribute_mlp.parameters()).device}")

# Time one full iteration
t0 = time.time()

# Get params
t_start = time.time()
params = model.get_parameters_as_tensors()
t_params = time.time() - t_start
print(f"✓ Get parameters: {t_params:.3f}s")
print(f"  - Params on device: {params['pos'].device}")
print(f"  - Num Gaussians: {params['num_gaussians']:,}")

# Get a camera
camera = scene.train_cameras[0]
gt_image = camera.original_image.to(trainer.device)
t_gt = time.time() - t_start - t_params
print(f"✓ Load GT image: {t_gt:.3f}s")

# Render
t_start = time.time()
rendered = trainer.render(camera)
t_render = time.time() - t_start
print(f"✓ Render: {t_render:.3f}s")

# Loss and backward
t_start = time.time()
loss = F.l1_loss(rendered, gt_image)
loss.backward()
t_backward = time.time() - t_start
print(f"✓ Backward: {t_backward:.3f}s")

t_total = time.time() - t0
print(f"\n{'='*40}")
print(f"TOTAL TIME: {t_total:.3f}s")
print(f"{'='*40}")

CUDA available: True
CUDA device: NVIDIA GeForce RTX 4090

Model device: cuda
First anchor device: cuda:0
MLP device: cuda:0
✓ Get parameters: 0.106s
  - Params on device: cuda:0
  - Num Gaussians: 204,240
✓ Load GT image: 0.001s
✓ Render: 0.110s
✓ Backward: 0.512s

TOTAL TIME: 0.730s


In [9]:
import torch
import numpy as np

print("="*70)
print("DIAGNOSTICS")
print("="*70)

# 1. Check loss trend
losses = trainer.losses
print(f"\n[1] Loss Trend:")
print(f"  First 100 avg:  {np.mean(losses[:100]):.4f}")
print(f"  Last 100 avg:   {np.mean(losses[-100:]):.4f}")
print(f"  ✗ Loss is {'INCREASING' if np.mean(losses[-100:]) > np.mean(losses[:100]) else 'DECREASING'}")

# 2. Check for NaN/Inf
params = model.get_parameters_as_tensors()
print(f"\n[2] NaN/Inf Check:")
for name, tensor in params.items():
    if isinstance(tensor, torch.Tensor):
        has_nan = torch.isnan(tensor).any().item()
        has_inf = torch.isinf(tensor).any().item()
        print(f"  {name:20s}: NaN={has_nan}, Inf={has_inf}")

# 3. Check Gaussian health
print(f"\n[3] Gaussian Health:")
opacities = torch.sigmoid(params['opacity_raw']).squeeze()
scales = torch.exp(params['scale_raw'])
print(f"  Opacity mean:  {opacities.mean():.4f} (should be 0.3-0.7)")
print(f"  Opacity std:   {opacities.std():.4f}")
print(f"  Num visible (>0.1): {(opacities > 0.1).sum()}/{len(opacities)}")
print(f"  Scale mean:    {scales.mean():.4f} (should be 0.01-0.1)")
print(f"  Scale max:     {scales.max():.4f}")
print(f"  Scale min:     {scales.min():.4f}")

# 4. Check learning rate
print(f"\n[4] Learning Rate:")
current_lr = trainer.optimizer.param_groups[0]['lr']
print(f"  Current LR: {current_lr:.6f}")
print(f"  Initial LR: 0.0001")
print(f"  Decayed by: {current_lr/0.0001:.2%}")

# 5. Check rendered output
rendered = trainer.render(scene.test_cameras[0])
print(f"\n[5] Rendered Image:")
print(f"  Min:  {rendered.min():.4f}")
print(f"  Max:  {rendered.max():.4f}")
print(f"  Mean: {rendered.mean():.4f}")
print(f"  Std:  {rendered.std():.4f}")
print(f"  All zeros? {(rendered.abs() < 1e-6).all()}")

# 6. Check ground truth
gt = scene.test_cameras[0].original_image
print(f"\n[6] Ground Truth:")
print(f"  Min:  {gt.min():.4f}")
print(f"  Max:  {gt.max():.4f}")
print(f"  Mean: {gt.mean():.4f}")

print("="*70)

DIAGNOSTICS

[1] Loss Trend:
  First 100 avg:  0.2246
  Last 100 avg:   0.2246
  ✗ Loss is DECREASING

[2] NaN/Inf Check:
  pos                 : NaN=False, Inf=False
  opacity_raw         : NaN=False, Inf=False
  scale_raw           : NaN=False, Inf=False
  rotation            : NaN=False, Inf=False
  color               : NaN=False, Inf=False
  object_ids          : NaN=False, Inf=False
  semantics           : NaN=False, Inf=False

[3] Gaussian Health:
  Opacity mean:  0.4966 (should be 0.3-0.7)
  Opacity std:   0.0245
  Num visible (>0.1): 204240/204240
  Scale mean:    0.0498 (should be 0.01-0.1)
  Scale max:     0.0498
  Scale min:     0.0498

[4] Learning Rate:
  Current LR: 0.000100
  Initial LR: 0.0001
  Decayed by: 100.00%

[5] Rendered Image:
  Min:  0.0000
  Max:  0.0000
  Mean: 0.0000
  Std:  0.0000
  All zeros? True

[6] Ground Truth:
  Min:  0.0000
  Max:  1.0000
  Mean: 0.5280
