In [1]:
!nvidia-smi

Fri Apr 21 15:25:33 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.47.03    Driver Version: 510.47.03    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   77C    P0    70W /  70W |  12337MiB / 15360MiB |     99%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla T4            Off  | 00000000:00:05.0 Off |                    0 |
| N/A   67C    P8    18W /  70W |      2MiB / 15360MiB |      0%      Default |
|       

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import torch
from pathlib import Path
Path.ls = lambda x: list(x.iterdir())

try:
    import lovely_tensors as lt
except:
    ! pip install --upgrade lovely-tensors
    import lovely_tensors as lt
    
lt.monkey_patch()

In [4]:
# !rm -rf /app/notebooks/siren_sdf/checkpoints/*

In [5]:
config = {
    "device": torch.device("cuda:1") if torch.cuda.is_available() else torch.device("cpu"),
    "wandb_project": "siren_sdf",
    "experiment_name": "uber_sdf_normal_init_baseline",
    "logging": True,
    "point_cloud_path": "data/interior_room.xyz",
    "batch_size": 25_000,
    "lr": 1e-4,
    "clip_grad": True,
    "checkpoint_dir": Path('checkpoints/'),
    "save_ckpt_freq": 5_000,
    "vis_freq": 2_500,
    "epochs": 100,
    "net_type": 'sine' #'sine' #'split_act'
    # "load_from_checkpoint_path": Path('/app/notebooks/siren_sdf/top_checkpoints/siren_sdf_siren_sdf_baseline_2040_836809.pth') 
    # "load_from_checkpoint_path": Path('/app/notebooks/siren_sdf/top_checkpoints/siren_sdf_siren_sdf_baseline_99_41000.pth') 
}

config["checkpoint_dir"].mkdir(exist_ok=True, parents=True)

# Train SDF

In [6]:
import src.dataio as dataio
from torch.utils.data import DataLoader
from src.utils import get_sdf_summary
from src.sdf_meshing import create_mesh

In [7]:
sdf_dataset = dataio.PointCloud(config['point_cloud_path'], on_surface_points=config['batch_size'])
dataloader = DataLoader(sdf_dataset, shuffle=True, batch_size=1, pin_memory=True, num_workers=0)

Loading point cloud
Finished loading point cloud


In [8]:
from src.nn_modules import SingleBVPNet
    
model = SingleBVPNet(type=config['net_type'], in_features=3).to(config['device'])
# if config.get('load_from_checkpoint_path') is not None and Path(config['load_from_checkpoint_path']).exists():
#     model.load_state_dict(torch.load(config['load_from_checkpoint_path']))

In [9]:
from src.loss_functions import sdf

optimizer = torch.optim.Adam(lr=config['lr'], params=model.parameters())

In [10]:
import wandb
if config['logging']:
    run = wandb.init(project=config["wandb_project"], name=config["experiment_name"], config=config)

[34m[1mwandb[0m: Currently logged in as: [33mnerlfield[0m. Use [1m`wandb login --relogin`[0m to force relogin
2023-04-21 15:26:43.503896: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2023-04-21 15:26:43.503946: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [11]:
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [None]:
from tqdm.auto import tqdm

iteration = 0
for epoch in (pbar := tqdm(range(int(config['epochs'])))):
    for step, (model_input, gt) in enumerate(dataloader):
        model_input = {key: value.to(config['device']) for key, value in model_input.items()}
        gt = {key: value.to(config['device']) for key, value in gt.items()}
        
        model_output = model(model_input)
        losses = sdf(model_output, gt)
        
        train_loss = 0.
        for loss_name, loss in losses.items():
            single_loss = loss.mean()
            train_loss += single_loss
                
        if config['logging']:
            wandb.log({
                "sdf": losses['sdf'].item(),
                "inter": losses['inter'].item(),
                "normal_constraint": losses['normal_constraint'].item(),
                "grad_constraint": losses['grad_constraint'].item(),
                "train_loss": train_loss.item()
            })
            
        optimizer.zero_grad()
        train_loss.backward()

        if config['clip_grad']:
            if isinstance(config['clip_grad'], bool):
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.)
            else:
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config['clip_grad'])

        optimizer.step()
        
        if iteration % config['vis_freq'] == 0 and config['logging']:
            sdf_summary = get_sdf_summary(model, model_input, gt, model_output)
            wandb.log({
                "xy_sdf_slice": wandb.Image(sdf_summary['xy_sdf_slice']),
                "xz_sdf_slice": wandb.Image(sdf_summary['xz_sdf_slice']),
                "yz_sdf_slice": wandb.Image(sdf_summary['yz_sdf_slice'])
            })
            
        if iteration % config['save_ckpt_freq'] == 0:
            torch.save(model.state_dict(), config['checkpoint_dir'] / f"{config['wandb_project']}_{config['experiment_name']}_{epoch}_{iteration}.pth")
        
        pbar.set_description(f' => Loss: {train_loss.item():.3f}')
        
        iteration += 1

  0%|          | 0/100 [00:00<?, ?it/s]

# Save to mesh

In [None]:
break

In [None]:
from src.nn_modules import SingleBVPNet

class SDFDecoder(torch.nn.Module):
    def __init__(self, config, ckpt_path=None):
        super().__init__()
        # Define the model.
        self.model = SingleBVPNet(type=config['net_type'], final_layer_factor=1, in_features=3)
        if ckpt_path is not None:
            self.model.load_state_dict(torch.load(ckpt_path))
        self.model = self.model.to(config['device'])

    def forward(self, coords):
        model_in = {'coords': coords}
        return self.model(model_in)['model_out']

In [None]:
decoder = SDFDecoder(config, config['checkpoint_dir'].ls()[-1])

In [None]:
create_mesh(decoder, config['experiment_name'], device=config['device'], N=200)