In [1]:
%load_ext autoreload
%autoreload 2

In [30]:
import torch
import numpy as np

import sys
sys.path.append('../')
from rkhs_splatting.utils.data_utils import read_all
from rkhs_splatting.utils.camera_utils import parse_camera
from rkhs_splatting.rkhs_model import RKHSModel
from rkhs_splatting.rkhs_render import RKHSRenderer
from rkhs_splatting.rkhs_model_global_scale import RKHSModelGlobalScale
from rkhs_splatting.rkhs_render_global_scale import RKHSRendererGlobalScale
from rkhs_splatting.utils.point_utils import get_point_clouds

import datetime
import pathlib
from icecream import ic
from spatialmath import SE3
import plotly.graph_objects as go

from plotly_utils import *
from test_utils import *
from few_points_trainer import GSSTrainer


# pos, rgba
train_pc = create_pc(np.array([
    [0,0,0,255,0,0,1],
    # [1,0,0,255,0,0,1],
]))
init_map = create_pc(np.array([
    [0.1,0,0,255,0,0,1]
]))

c2w = SE3.Tz(-10)#SE3.Rx(-0.5)
camera_data = create_camera(256,256,355,355,c2w)
map_model = RKHSModelGlobalScale(sh_degree=4, debug=False, trainable=True)
map_model.create_from_pcd(init_map, initial_scaling=0.5)
input_model = RKHSModelGlobalScale(sh_degree=4, debug=False, trainable=False)
renderer = RKHSRendererGlobalScale(white_bkgd=True)
input_model.create_from_pcd(train_pc, initial_scaling=0.5)
input_frame = renderer(
    camera_data[0],
    input_model.get_xyz,
    input_model.get_opacity,
    input_model.get_scaling,
    input_model.get_features
)

# folder = '../data/B075X65R3X'
# device = 'cuda'
# data = read_all(folder, resize_factor=0.5)
# data = {k: v.to(device) for k, v in data.items()}
# points = get_point_clouds(data['camera'], data['depth'], data['alpha'], data['rgb'])
# raw_points = points.generate_random_noise(2**14)
# map_model.create_from_pcd(raw_points, initial_scaling=0.005)

fig = go.Figure()
plot_camera(fig, c2w.R, c2w.t, 3, 'camera0', True)
plot_pc(fig, train_pc, 'render_pc')
plot_pc(fig, init_map, 'init_map')
fig.show()
train_rgb = input_frame['render'].squeeze().unsqueeze(0).detach().cpu().numpy()
train_depth = input_frame['depth'].unsqueeze(0).detach().cpu().numpy()
train_alpha = input_frame['alpha'].squeeze().unsqueeze(0).detach().cpu().numpy()
show_depth(train_depth[0])
rgba = np.concatenate([train_rgb, train_alpha.unsqueeze(-1)*255], axis=-1)
# show_image(rgba[0])
show_depth(data['alpha'][0].cpu().numpy())

In [31]:

folder_name = datetime.datetime.now().strftime("%Y-%m-%d__%H-%M-%S")
folder_name = 'test'
results_folder = pathlib.Path('../result/'+folder_name)
results_folder.mkdir(parents=True, exist_ok=True)

trainer = GSSTrainer(
    model=map_model,
    input_model=input_model,
    renderer=renderer,
    data=data,
    train_batch_size=1, 
    train_num_steps=20,
    i_image =10,
    train_lr=1e-1,#3e-3
    amp=True,
    fp16=False,
    results_folder=results_folder,
)

trainer.on_evaluate_step()
trainer.train()

  0%|          | 0/20 [00:00<?, ?it/s]ic| total_loss: tensor(9372139., device='cuda:0', grad_fn=<SubBackward0>)
loss: 9372139.000 total: 9372139.000 l1: 0.369 ssim: -inf depth: 3.289 psnr: -7.190:   5%|▌         | 1/20 [00:00<00:04,  4.03it/s]ic| total_loss: tensor(9372139., device='cuda:0', grad_fn=<SubBackward0>)
loss: 9372139.000 total: 9372139.000 l1: 0.369 ssim: -inf depth: 3.289 psnr: -7.190:  10%|█         | 2/20 [00:00<00:04,  3.96it/s]ic| total_loss: tensor(9372139., device='cuda:0', grad_fn=<SubBackward0>)
loss: 9372139.000 total: 9372139.000 l1: 0.369 ssim: -inf depth: 3.289 psnr: -7.190:  15%|█▌        | 3/20 [00:00<00:04,  3.93it/s]ic| total_loss: tensor(9372139., device='cuda:0', grad_fn=<SubBackward0>)
loss: 9372139.000 total: 9372139.000 l1: 0.369 ssim: -inf depth: 3.289 psnr: -7.190:  20%|██        | 4/20 [00:01<00:04,  3.95it/s]ic| total_loss: tensor(9372139., device='cuda:0', grad_fn=<SubBackward0>)
loss: 9372139.000 total: 9372139.000 l1: 0.369 ssim: -inf depth: 3.2

In [12]:
# Original dataset

folder = '../data/B075X65R3X'
data = read_all(folder, resize_factor=0.5)
data = {k: v for k, v in data.items()}

ic(data.keys())
ic(data['camera'].shape)

H_, W_, intrinsics_, c2w_ = parse_camera(data['camera'][0:1])
ic(H_, W_, intrinsics_, c2w_)

ic| data.keys(): dict_keys(['rgb', 'camera', 'depth', 'alpha'])
ic| data['camera'].shape: torch.Size([20, 34])
ic| H_: tensor([256.])
    W_: tensor([256.])
    intrinsics_: tensor([[[355.5555,   0.0000, 128.0000,   0.0000],
                          [  0.0000, 355.5555, 128.0000,   0.0000],
                          [  0.0000,   0.0000,   1.0000,   0.0000],
                          [  0.0000,   0.0000,   0.0000,   1.0000]]])
    c2w_: tensor([[[-8.6086e-01,  3.7950e-01, -3.3896e-01,  6.7791e-01],
                   [ 5.0884e-01,  6.4205e-01, -5.7346e-01,  1.1469e+00],
                   [ 1.0934e-08, -6.6614e-01, -7.4583e-01,  1.4917e+00],
                   [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  1.0000e+00]]])


(tensor([256.]),
 tensor([256.]),
 tensor([[[355.5555,   0.0000, 128.0000,   0.0000],
          [  0.0000, 355.5555, 128.0000,   0.0000],
          [  0.0000,   0.0000,   1.0000,   0.0000],
          [  0.0000,   0.0000,   0.0000,   1.0000]]]),
 tensor([[[-8.6086e-01,  3.7950e-01, -3.3896e-01,  6.7791e-01],
          [ 5.0884e-01,  6.4205e-01, -5.7346e-01,  1.1469e+00],
          [ 1.0934e-08, -6.6614e-01, -7.4583e-01,  1.4917e+00],
          [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  1.0000e+00]]]))

In [None]:
loss, _ = trainer.on_train_step()
ic(loss)
# loss.backward()
import torchviz
graph = torchviz.make_dot(loss, params=dict(trainer.model.named_parameters()), show_attrs=True, show_saved=True)
graph.render(directory='../result/graph', format='png')


ic| loss: tensor(1798642.6250, device='cuda:0', grad_fn=<SubBackward0>)


'../result/graph/Digraph.gv.png'

In [19]:

render_pc = get_point_clouds(data['camera'], data['depth'], data['alpha'], data['rgb'])
render_pc.select_channels(['R', 'G', 'B', 'A']).shape

(65536, 4)

In [21]:
render_pc.coords.shape

(65536, 3)

In [32]:
map_model.get_xyz

Parameter containing:
tensor([[ 1.0000e-01, -1.0680e-27,  0.0000e+00]], device='cuda:0',
       requires_grad=True)