In [1]:
import os
import torch
from models import SpecialFuseNet, SpecialFuseNetModel
from data_manager import rgbd_gradients_dataset, rgbd_gradients_dataloader
from torchvision import transforms as T


In [2]:
fusenet = SpecialFuseNet()

In [3]:
print(fusenet)

SpecialFuseNet(
  (CBR1_RGB_ENC): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
  )
  (RGB_POOL1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (CBR2_RGB_ENC): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
  )
  (RGB_POOL2): MaxPool2d(kernel_size=2, stride=2, padd

In [4]:
CWD             = os.getcwd()
DATASET_DIR     = os.path.join(CWD,'data/nyuv2')
print(DATASET_DIR)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

C:\Users\tomav\Documents\GitHub\cs236781-project\data/nyuv2
Using device: cuda


In [5]:
IMAGE_SIZE = (64,64)
TRAIN_TEST_RATIO = 0.9
BATCH_SIZE=4
NUM_WORKERS=4

In [6]:
tf_rgb = T.Compose([
    # Resize to constant spatial dimensions
    T.Resize(IMAGE_SIZE),
    # PIL.Image -> torch.Tensor
    T.ToTensor(),
    # Dynamic range [0,1] -> [-1, 1]
    T.Normalize(mean=(.5,.5,.5), std=(.5,.5,.5)),
])
tf_depth = T.Compose([
    # Resize to constant spatial dimensions
    T.Resize(IMAGE_SIZE),
    # PIL.Image -> torch.Tensor
    T.ToTensor(),
    # Dynamic range [0,1] -> [-1, 1]
    T.Normalize(mean=(.5,), std=(.5,)),
])

In [7]:
rgbd_grads_ds = rgbd_gradients_dataset(root=DATASET_DIR, transforms_rgb=tf_rgb, transforms_depth=tf_depth)

In [8]:
dl_train,dl_test = rgbd_gradients_dataloader(root=DATASET_DIR,
                                             batch_size=BATCH_SIZE,
                                             num_workers=NUM_WORKERS,
                                             train_test_ration=TRAIN_TEST_RATIO,
                                             transforms_rgb=tf_rgb,
                                             transforms_depth=tf_depth)

In [9]:
sample_batch = next(iter(dl_test))
rgb_size = tuple(sample_batch['rgb'].shape[1:])
depth_size = tuple(sample_batch['depth'].shape[1:])
grads_size = tuple(sample_batch['x'].shape[1:])
print(rgb_size)
print(depth_size)
print(grads_size)

(3, 64, 64)
(1, 64, 64)
(1, 64, 64)


In [10]:
fusenetmodel = SpecialFuseNetModel(device=device,rgb_size=rgb_size,depth_size=depth_size,grads_size=grads_size,
                                   mode='train')

[debug] - default optimizer set: SGD(lr=0.001,momentum=0.9,weight_decay=0.0005)


In [11]:
print(f"feed-forward size check: {fusenetmodel(rgb_batch=sample_batch['rgb'],depth_batch=sample_batch['depth']).shape}")


feed-forward size check: torch.Size([4, 2, 64, 64])


In [12]:
print("type check:")
print(f"rgb input:   {type(sample_batch['rgb']),sample_batch['rgb'].dtype}")
print(f"depth input: {type(sample_batch['depth']),sample_batch['depth'].dtype}")
print(f"x input:     {type(sample_batch['x']),sample_batch['x'].dtype}")
print(f"y input:     {type(sample_batch['y']),sample_batch['y'].dtype}")
print(f"xy output:   {type(fusenetmodel(rgb_batch=sample_batch['rgb'],depth_batch=sample_batch['depth'])),fusenetmodel(rgb_batch=sample_batch['rgb'],depth_batch=sample_batch['depth']).dtype}")

type check:
rgb input:   (<class 'torch.Tensor'>, torch.float32)
depth input: (<class 'torch.Tensor'>, torch.float32)
x input:     (<class 'torch.Tensor'>, torch.float32)
y input:     (<class 'torch.Tensor'>, torch.float32)
xy output:   (<class 'torch.Tensor'>, torch.float32)


In [13]:
print("loss check:")
print(f"ground truth grads size: {torch.cat((sample_batch['x'],sample_batch['y']),dim=1).shape}")
fusenetmodel.loss(torch.cat((sample_batch['x'],sample_batch['y']),dim=1).to(device),
                  fusenetmodel(rgb_batch=sample_batch['rgb'],depth_batch=sample_batch['depth']))

loss check:
ground truth grads size: torch.Size([4, 2, 64, 64])
[debug] - shapes: |tags|=torch.Size([4, 2, 64, 64]), |output|=torch.Size([4, 2, 64, 64])


tensor(0.1351, device='cuda:0', grad_fn=<MseLossBackward>)