In [1]:
import torch.utils.data
from datasets.shapenet_data_pc import ShapeNet15kPointClouds

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
def get_dataset(dataroot, npoints,category):
    tr_dataset = ShapeNet15kPointClouds(root_dir=dataroot,
        categories=category.split(','), split='train',
        tr_sample_size=npoints,
        te_sample_size=npoints,
        scale=1.,
        normalize_per_shape=False,
        normalize_std_per_axis=False,
        random_subsample=True)
    te_dataset = ShapeNet15kPointClouds(root_dir=dataroot,
        categories=category.split(','), split='val',
        tr_sample_size=npoints,
        te_sample_size=npoints,
        scale=1.,
        normalize_per_shape=False,
        normalize_std_per_axis=False,
        all_points_mean=tr_dataset.all_points_mean,
        all_points_std=tr_dataset.all_points_std,
    )
    return tr_dataset, te_dataset

def get_dataloader(train_dataset, test_dataset=None):

    # if opt.distribution_type == 'multi':
    #     train_sampler = torch.utils.data.distributed.DistributedSampler(
    #         train_dataset,
    #         num_replicas=opt.world_size,
    #         rank=opt.rank
    #     )
    #     if test_dataset is not None:
    #         test_sampler = torch.utils.data.distributed.DistributedSampler(
    #             test_dataset,
    #             num_replicas=opt.world_size,
    #             rank=opt.rank
    #         )
    #     else:
    #         test_sampler = None
    # else:
    train_sampler = None
    test_sampler = None

    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=1,sampler=train_sampler,
                                                   shuffle=train_sampler is None, num_workers=1, drop_last=True)

    if test_dataset is not None:
        test_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=1,sampler=test_sampler,
                                                   shuffle=False, num_workers=1, drop_last=False)
    else:
        test_dataloader = None

    return train_dataloader, test_dataloader, train_sampler, test_sampler

In [3]:

dataroot = "/home/yifan/studium/3D_Completion/DiT-3D_2024AILab/datasets/data/ShapeNetCore.v2.PC15k"
npoints = 2048
category = "chair"

train_dataset, _ = get_dataset(dataroot, npoints, category)
dataloader, _, train_sampler, _ = get_dataloader(train_dataset, None)
print(len(dataloader))

Total number of data:4612
Min number of points: (train)2048 (test)2048
Total number of data:662
Min number of points: (train)2048 (test)2048
4612


In [4]:
batch = next(iter(dataloader))
print(len(batch))
print(batch.keys())

8
dict_keys(['idx', 'train_points', 'test_points', 'mean', 'std', 'cate_idx', 'sid', 'mid'])


In [5]:
batch['idx']

tensor([4524])

In [6]:
print(batch['train_points'].shape) # batch, num_points, xyz

torch.Size([1, 2048, 3])


In [7]:
print(batch['train_points'].transpose(1,2).shape)

torch.Size([1, 3, 2048])


In [8]:
print(batch['cate_idx'])

tensor([0])


In [10]:
import numpy as np

point_cloud = np.load('/home/yifan/studium/3D_Completion/DiT-3D_2024AILab/datasets/data/ShapeNetCore.v2.PC15k/02801938/test/2bcc1b8bf5ac9ddc97e30bfe57d923fb.npy')

print("original pcd shape:", point_cloud.shape)

original pcd shape: (15000, 3)


In [2]:
# model forward test

from models.dit3d import DiT3D_models
model = DiT3D_models['DiT-S/4']()
model

grid_size: 8


DiT(
  (voxelization): Voxelization(resolution=32, normalized eps = 0)
  (x_embedder): PatchEmbed_Voxel(
    (proj): Conv3d(3, 384, kernel_size=(4, 4, 4), stride=(4, 4, 4))
  )
  (t_embedder): TimestepEmbedder(
    (mlp): Sequential(
      (0): Linear(in_features=256, out_features=384, bias=True)
      (1): SiLU()
      (2): Linear(in_features=384, out_features=384, bias=True)
    )
  )
  (y_embedder): LabelEmbedder(
    (embedding_table): Embedding(2, 384)
  )
  (blocks): ModuleList(
    (0-11): 12 x DiTBlock(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=False)
      (attn): Attention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=False)
      (mlp):

In [1]:
# test case for model with adaptformer, DiT-S/4
from models.dit3d import DiT3D_models, DiT

model = DiT(depth=12, hidden_size=384, patch_size=4, num_heads=6)
model

grid_size: 8


DiT(
  (voxelization): Voxelization(resolution=32, normalized eps = 0)
  (x_embedder): PatchEmbed_Voxel(
    (proj): Conv3d(3, 384, kernel_size=(4, 4, 4), stride=(4, 4, 4))
  )
  (t_embedder): TimestepEmbedder(
    (mlp): Sequential(
      (0): Linear(in_features=256, out_features=384, bias=True)
      (1): SiLU()
      (2): Linear(in_features=384, out_features=384, bias=True)
    )
  )
  (y_embedder): LabelEmbedder(
    (embedding_table): Embedding(2, 384)
  )
  (blocks): ModuleList(
    (0-11): 12 x DiTBlock(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=False)
      (attn): Attention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=False)
      (mlp):

In [1]:
from models.dit3d import DiT3D_models, DiT
model_adaptformer = DiT(depth=12, hidden_size=384, patch_size=4, num_heads=6, adaptformer=True)
model_adaptformer

grid_size: 8


DiT(
  (voxelization): Voxelization(resolution=32, normalized eps = 0)
  (x_embedder): PatchEmbed_Voxel(
    (proj): Conv3d(3, 384, kernel_size=(4, 4, 4), stride=(4, 4, 4))
  )
  (t_embedder): TimestepEmbedder(
    (mlp): Sequential(
      (0): Linear(in_features=256, out_features=384, bias=True)
      (1): SiLU()
      (2): Linear(in_features=384, out_features=384, bias=True)
    )
  )
  (y_embedder): LabelEmbedder(
    (embedding_table): Embedding(2, 384)
  )
  (blocks): ModuleList(
    (0-11): 12 x DiTBlock(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=False)
      (attn): Attention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=False)
      (mlp):

In [2]:
import torch
# input random tensot to check shape
x = torch.randn(1, 3, 2048).to('cuda')
t = torch.randint(1, 1001, (1,)).to('cuda')
y = torch.randint(0, 1, (1,)).to('cuda')
model = model.to('cuda')
print(x)
print(t)
print(y)

tensor([[[ 0.3839,  0.9537,  0.7609,  ...,  0.0210,  0.9074, -0.3622],
         [ 0.4203, -0.1210,  0.3230,  ...,  0.0515, -1.6827,  0.5799],
         [-1.2297,  0.3981,  1.8440,  ..., -0.4590, -0.1731, -0.3693]]],
       device='cuda:0')
tensor([488], device='cuda:0')
tensor([0], device='cuda:0')


In [3]:
output = model(x, t, y)
output.shape

x after voxelization:  torch.Size([1, 3, 32, 32, 32])
voxel_coords:  torch.Size([1, 3, 2048])
x after patchfiy:  torch.Size([1, 512, 384])
x after add position embedding:  torch.Size([1, 512, 384])
time embedding:  torch.Size([1, 384])
label embedding:  torch.Size([1, 384])
x after dit block: torch.Size([1, 512, 384])
x after final layer:  torch.Size([1, 512, 192])
x after unpatchify:  torch.Size([1, 3, 32, 32, 32])
x after devoxelization:  torch.Size([1, 3, 2048])


torch.Size([1, 3, 2048])

In [None]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("DiT_S_4 with out adapter trainable parameters: ", trainable_params)
total_params = sum(p.numel() for p in model.parameters())
print("DiT_S_4 with out adapter total parameters: ", total_params)

DiT_S_4 with out adapter trainable parameters:  32610624
DiT_S_4 with out adapter total parameters:  32807232


In [None]:
trainable_params_adapt = sum(p.numel() for p in model_adaptformer.parameters() if p.requires_grad)
print("DiT_S_4 with out adapter trainable parameters: ", trainable_params_adapt)
total_params_adapt = sum(p.numel() for p in model_adaptformer.parameters())
print("DiT_S_4 with out adapter total parameters: ", total_params_adapt)

DiT_S_4 with out adapter trainable parameters:  33215040
DiT_S_4 with out adapter total parameters:  33411648


In [None]:
trainable_params = trainable_params_adapt - trainable_params
trainable_params

604416

In [1]:
# check Partial Pcd embedding layer
from models.dit3d import PartialPcdEmbedder
import torch
y_embedder = PartialPcdEmbedder(0.1,
                                hidden_size=384,
                                patch_size=4,
                                in_channels=3)

grid_size: 8


In [2]:
# input random tensor to check shape
partial_pcd = torch.randn(1, 3, 300).to('cuda')
y_embedder = y_embedder.to('cuda')
print(partial_pcd.shape)

torch.Size([1, 3, 300])


In [3]:
output = y_embedder(partial_pcd, True)
output.shape

torch.Size([1, 384])

In [1]:
# check DiT with partial point cloud condtion
from models.dit3d import DiT3D_models, DiT
model_partial_pcd = DiT(depth=12, hidden_size=384, patch_size=4, num_heads=6, adaptformer=False, partial_pcd=True)
model_partial_pcd

DiT(
  (voxelization): Voxelization(resolution=32, normalized eps = 0)
  (x_embedder): PatchEmbed_Voxel(
    (proj): Conv3d(3, 384, kernel_size=(4, 4, 4), stride=(4, 4, 4))
  )
  (t_embedder): TimestepEmbedder(
    (mlp): Sequential(
      (0): Linear(in_features=256, out_features=384, bias=True)
      (1): SiLU()
      (2): Linear(in_features=384, out_features=384, bias=True)
    )
  )
  (partial_pcd_embedder): PartialPcdEmbedder(
    (voxelization): Voxelization(resolution=32, normalized eps = 0)
    (x_embedder): PatchEmbed_Voxel(
      (proj): Conv3d(3, 384, kernel_size=(4, 4, 4), stride=(4, 4, 4))
    )
    (blocks): ModuleList(
      (0-2): 3 x ViTBlock(
        (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=False)
        (attn): Attention(
          (qkv): Linear(in_features=384, out_features=1152, bias=True)
          (q_norm): Identity()
          (k_norm): Identity()
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=384, out_

In [5]:
import torch
# input random tensot to check shape
x = torch.randn(1, 3, 2048).to('cuda')
t = torch.randint(1, 1001, (1,)).to('cuda')
partial_pcd = torch.randn(1, 3, 300).to('cuda')
model_partial_pcd = model_partial_pcd.to('cuda')
print(x.shape)
print(t)
print(partial_pcd.shape)

torch.Size([1, 3, 2048])
tensor([300], device='cuda:0')
torch.Size([1, 3, 300])


In [6]:
output = model_partial_pcd(x, t, partial_pcd)
output.shape

torch.Size([1, 3, 2048])