In [1]:
import os
import re
import time
import torch
import sys
import random
import pickle as pkl
from shutil import copyfile

from pprint import pprint
import torch.optim as optim
from torch.nn import DataParallel
import numpy as np
from torch.autograd import Variable
from PIL import Image
from torchvision import transforms as T
from matplotlib import pyplot as plt
from plot import post_epoch_plot, rgbd_gradients_dataset_first_n
%matplotlib notebook
from models import SpecialFuseNetModel
from data_manager import rgbd_gradients_dataset, rgbd_gradients_dataloader
from train import FuseNetTrainer
from functions import torch2np_u8

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cpu


## Loading Model, FitResult file & Configurations:

In [3]:
CWD          = os.getcwd()
DATASET_DIR  = os.path.join(CWD,'data','nyuv2')
DATASET_SIZE = len([name for name in os.listdir(os.path.join(DATASET_DIR,'rgb')) if 
                            os.path.isfile(os.path.join(DATASET_DIR,'rgb',name))])
CKPT_DIR     = os.path.join(CWD,'checkpoints')

CKPT_BASE_NAME  = 'special_fusenet,img_size=64,64,batch_size=16,betas=0.9,0.99,lr=0.001,momentum=0.99'
RGB2D_BASE_NAME = 'model_rgb_to_depth'

CKPT_NAME       = os.path.join(CKPT_DIR,CKPT_BASE_NAME)
RGB2D_CKPT_NAME = os.path.join(CKPT_DIR,RGB2D_BASE_NAME)

print(f'[I] - CWD={CWD}')
print(f'[I] - DATASET_DIR={DATASET_DIR}')
print(f'[I] - DATASET_SIZE={DATASET_SIZE}')
print(f'[I] - CKPT_DIR={CKPT_DIR}')

if os.path.exists(CKPT_NAME+'.pt'):
    print(f'[I] - CKPT_NAME={CKPT_NAME}.pt')
    try:
        print(f'[I] - Load Model ...', end=' ')
        saved_state = torch.load(CKPT_NAME+'.pt', map_location=device)
        print('Done.')
    except Exception as e:
        print(f'\n[E] - {e}')
else:
    print(f"[E] - CKPT_NAME={CKPT_NAME}.pt Doesn't exist!")
    
if os.path.exists(RGB2D_CKPT_NAME+'.pt'):
    print(f'[I] - RGB2D_CKPT_NAME={RGB2D_CKPT_NAME}.pt')
    try:
        print(f'[I] - Load Model ...', end=' ')
        rgb2d_model = torch.load(RGB2D_CKPT_NAME+'.pt', map_location=device)
        rgb2d_model.set_device(device)
        print('Done.')
    except Exception as e:
        print(f'\n[E] - {e}')
else:
    print(f"[E] - RGB2D_CKPT_NAME={RGB2D_CKPT_NAME}.pt Doesn't exist!")
    
if os.path.exists(CKPT_NAME+'_res.pkl'):
    print(f'[I] - FitResult File={CKPT_NAME}_res.pkl')
    try:
        print(f'[I] - Load Pkl ...', end=' ')
        fp = open(os.path.join(CKPT_NAME+'_res.pkl'), "rb")
        fit_result = pkl.load(fp)
        print('Done.')
    except Exception as e:
        print(f'\n[E] - {e}')
else:
    print(f"[E] - CKPT_NAME={CKPT_NAME}_res.pkl Doesn't exist!")

if os.path.exists(CKPT_NAME+'_hp.py'):
    print(f'[I] - Hyperparameters File={CKPT_NAME}_hp.py')
    try:
        print(f'[I] - Copy Temp File ...', end=' ')
        copyfile(CKPT_NAME+'_hp.py', os.path.join(CWD, "curr_hp.py"))
        print('Done.')
        print(f'[I] - Import Hyperparametrs ...', end=' ')
        from curr_hp import BATCH_SIZE, NUM_WORKERS, TRAIN_TEST_RATIO, IMAGE_SIZE, LR, MOMENTUM, WEIGHT_DECAY
        print(f'BATCH_SIZE={BATCH_SIZE} ...', end=' ')
        print(f'NUM_WORKERS={NUM_WORKERS} ...', end=' ')
        print(f'TRAIN_TEST_RATIO={TRAIN_TEST_RATIO} ...', end=' ')
        print(f'IMAGE_SIZE={IMAGE_SIZE} ...', end=' ')
        print(f'LR={LR} ...', end=' ')
        print(f'MOMENTUM={MOMENTUM} ...', end=' ')
        print(f'WEIGHT_DECAY={WEIGHT_DECAY} ...', end=' ')
        print('Done.')
        print(f'[I] - Remove Temp File ...', end=' ')
        os.remove(os.path.join(CWD, "curr_hp.py"))
        print('Done.')
    except Exception as e:
        print(f'\n[E] - {e}')
else:
    print(f"[E] - CKPT_NAME={CKPT_NAME}_hp.py Doesn't exist!")

[I] - CWD=/home/manor/cs236781-DeepLearning/project/master
[I] - DATASET_DIR=/home/manor/cs236781-DeepLearning/project/master/data/nyuv2
[I] - DATASET_SIZE=1278
[I] - CKPT_DIR=/home/manor/cs236781-DeepLearning/project/master/checkpoints
[I] - CKPT_NAME=/home/manor/cs236781-DeepLearning/project/master/checkpoints/special_fusenet,img_size=64,64,batch_size=16,betas=0.9,0.99,lr=0.001,momentum=0.99.pt
[I] - Load Model ... Done.
[I] - RGB2D_CKPT_NAME=/home/manor/cs236781-DeepLearning/project/master/checkpoints/model_rgb_to_depth.pt
[I] - Load Model ... Done.
[I] - FitResult File=/home/manor/cs236781-DeepLearning/project/master/checkpoints/special_fusenet,img_size=64,64,batch_size=16,betas=0.9,0.99,lr=0.001,momentum=0.99_res.pkl
[I] - Load Pkl ... Done.
[I] - Hyperparameters File=/home/manor/cs236781-DeepLearning/project/master/checkpoints/special_fusenet,img_size=64,64,batch_size=16,betas=0.9,0.99,lr=0.001,momentum=0.99_hp.py
[I] - Copy Temp File ... Done.
[I] - Import Hyperparametrs ... BAT



## Chosen Model's performances during Training:

In [4]:
train_loss = [sum(r)/len(r) for r in fit_result.train_loss]
test_loss = [sum(r)/len(r) for r in fit_result.test_loss]

fig, axes = plt.subplots(1,3, figsize=(8,8), sharex=True, sharey=True)

title_args = re.search( r'^special_fusenet,img_size=(\d+,\d+),batch_size=(\d+),betas=(\d+.\d+,\d+.\d+),lr=(\d+.\d+),momentum=(\d+.\d+)', CKPT_BASE_NAME)
if title_args:
    img_size   = title_args.group(1)
    batch_size = title_args.group(2)
    betas      = title_args.group(3)
    lr         = title_args.group(4)
    momentum   = title_args.group(5)
    fig.suptitle(f'|img|={img_size}, |batch|={batch_size},lr={lr}, momentum={momentum}', fontsize=20)
    
axes[0].plot(train_loss, label='Train Loss')
axes[0].set_title('Train Loss')
axes[0].scatter(x=len(train_loss), y=train_loss[-1], marker='+', s=100)
axes[0].annotate("Final Train Loss\n= {0:.4f}    ".format(train_loss[-1]),
                          xy=(len(train_loss), train_loss[-1]), xycoords='data',
                          xytext=(0.8, 0.3), textcoords='axes fraction',
                          arrowprops=dict(facecolor='black', shrink=0.05),
                          horizontalalignment='right', verticalalignment='top')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('MSE Loss')


axes[1].plot(test_loss,  label='Test Loss')
axes[1].set_title('Test Loss')
axes[1].scatter(x=len(test_loss), y=test_loss[-1], marker='+', s=100)
axes[1].annotate("Final Test Loss\n= {0:.4f}    ".format(test_loss[-1]),
                          xy=(len(test_loss), test_loss[-1]), xycoords='data',
                          xytext=(0.8, 0.3), textcoords='axes fraction',
                          arrowprops=dict(facecolor='black', shrink=0.05),
                          horizontalalignment='right', verticalalignment='top')
axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('MSE Loss')


axes[2].plot(train_loss, label='Train Loss')
axes[2].plot(test_loss,  label='Test Loss')
axes[2].set_title('Train Vs Test Losses')
axes[2].set_xlabel('Epochs')
axes[2].set_ylabel('MSE Loss')

axes[2].legend()

plt.show()

<IPython.core.display.Javascript object>

## We shell now review our model's Loss on the Test Set & Validation Set and watch it's output's Visual Quality for diffrent kinds of inference:
* Regular Mode: RGB & Depth
* No Depth: RGB & Zeros instead of Depth
* No RGB: Zeros instead of RGB & Depth
* Noise Depth: RGB & White Gaussian Noise istead of Depth
* Noise RGB: White Gaussian Noise istead of RGB & Depth


In [5]:
fusenetmodel = SpecialFuseNetModel(sgd_lr=LR, sgd_momentum=MOMENTUM, sgd_wd=WEIGHT_DECAY,device=device)
try:
    print(f'[I] - Load State Dict ...', end=' ')
    fusenetmodel.load_state_dict(saved_state['model_state'])
    print('Done.')
except Exception as e:
    print(f'\n[E] - {e}')
try:
    print(f'[I] - Set Model to Evaluation Mode ...', end=' ')
    fusenetmodel.train(False)
    print('Done.')
except Exception as e:
    print(f'\n[E] - {e}')
    
trainer = FuseNetTrainer(model=fusenetmodel, device=device, num_epochs=1)

[I] - device=cpu
    - seed=42
    - dropout_p=0.4
    - optimizer=None
    - scheduler=None

[I] - Init SpecialFuseNet
    - warm start=True
    - BN momentum=0.1
    - dropout_p=0.4

[I] - Check Features Disabled
[I] - Initialize Net.
    - Init type=xavier
    - Init gain=0.02

[I] - default optimizer set: SGD(lr=0.001,momentum=0.99,weight_decay=0.0005)
[I] - default scheduler set: StepSR(step_size=1000,gamma=0.1)
[I] - Load State Dict ... Done.
[I] - Set Model to Evaluation Mode ... Done.
[I (FuseNetTrainer)] - model=<models.SpecialFuseNetModel object at 0x7f59783871d0>
                     - num_epochs=1
                     - device=cpu
                      - seed=42



In [6]:
experiments  = ['rgb2d', 'both', 'zero_depth', 'zero_rgb', 'noise_depth', 'noise_rgb']
# experiments  = ['rgb2d']
# experiments  = ['rgb2d', 'both']

experiments_dict = {
    'both'        : 'Regular Mode: RGB & Depth ',
    'zero_depth'  : 'No Depth: RGB & Zeros instead of Depth ',
    'zero_rgb'    : 'No RGB: Zeros instead of RGB & Depth ',
    'noise_depth' : "Noise Depth: RGB & White Uniformal Noise istead of Depth",
    'noise_rgb'   : "Noise RGB: White Uniformal Noise istead of RGB & Depth",
    'rgb2d'       : "Approximated Depth: RGB & Approximated Depth generated by RGB2D Network"
}

eval_losses = {
    'both'        : 0,
    'zero_depth'  : 0,
    'zero_rgb'    : 0,
    'noise_depth' : 0,
    'noise_rgb'   : 0,
    'rgb2d'       : 0
}

In [10]:
from data_manager import rgbd_gradients_inference_dataset,rgb2depth_dataset
from torch import randperm
from torch._utils import _accumulate
from torch.utils.data.dataset import Subset

rows = 4

for i,exp in enumerate(experiments):
    print(f'[I] - {i}) {exp}\n','-'*100)
    
    rgbd_grads_ds = rgbd_gradients_inference_dataset(device=device, root=DATASET_DIR, image_size=IMAGE_SIZE,
                                                     inference=exp if exp != 'rgb2d' else 'both',
                                                     goto_pixel=False)
    if exp == 'rgb2d':
        rgb2d_dataset = rgb2depth_dataset(root=DATASET_DIR)
    
    split_lengths = [int(np.ceil(len(rgbd_grads_ds)  *    TRAIN_TEST_RATIO)),
                     int(np.floor(len(rgbd_grads_ds) * (1-TRAIN_TEST_RATIO)))]
    
    indices = randperm(sum(split_lengths)).tolist()
    _, ds_test = [Subset(rgbd_grads_ds, indices[offset - length:offset]) for offset, length in
                  zip(_accumulate(split_lengths), split_lengths)]
    print(f'[D (rgbd_gradients_dataloader)] - ds_test       first 10 indices: {ds_test.indices[:10]}')
    
    if exp == 'rgb2d':
        _, rgb2d_ds_test = [Subset(rgb2d_dataset, indices[offset - length:offset]) for offset, length in
                            zip(_accumulate(split_lengths), split_lengths)]
        print(f'[D (rgbd_gradients_dataloader)] - rgb2d_ds_test first 10 indices: {rgb2d_ds_test.indices[:10]}')
    
    if exp != 'rgb2d':
        fig, axes  = plt.subplots(nrows=rows, ncols=4, figsize=(8,8), subplot_kw={'aspect': 1},
                                  gridspec_kw=dict(hspace=0.1, wspace=0.4, left=0.1, right=0.8))
    else:
        fig, axes  = plt.subplots(nrows=rows, ncols=5, figsize=(8,8), subplot_kw={'aspect': 1},
                                  gridspec_kw=dict(hspace=0.1, wspace=0.4, left=0.1, right=0.8))
        
#     fig.suptitle(experiments_dict[exp])
    
    for i in range(rows): 
        sample     = ds_test[i]
        rgb        = sample['rgb'].unsqueeze(0)
        depth      = sample['depth'].unsqueeze(0)
        x_gt       = sample['x'].squeeze(0)
        y_gt       = sample['y'].squeeze(0)
        if exp == 'rgb2d':
            rgb2d_sample = rgb2d_ds_test[i]
            rgb2d_rgb    = rgb2d_sample['rgb'].unsqueeze(0)
            rgb2d_depth  = rgb2d_sample['depth'].unsqueeze(0)

        if exp == 'rgb2d':
            with torch.no_grad():
                app_depth = rgb2d_model(rgb2d_rgb)
#                 app_depth = T.ToPILImage()(app_depth.squeeze(0))
#                 app_depth = T.ToTensor()(app_depth)
#                 app_depth = T.Normalize(mean=(.5,), std=(.5,))(app_depth)
#                 app_depth = app_depth.unsqueeze(0)
                app_depth = app_depth - torch.min(app_depth)
                app_depth = app_depth / torch.max(app_depth)
                app_depth = 1.0 - app_depth
                xy = fusenetmodel(rgb,app_depth)
        else:
            with torch.no_grad():
                xy = fusenetmodel(rgb,depth)

        x = xy[0,0,:,:]
        y = xy[0,1,:,:]
        
        rgb   = torch2np_u8(rgb)
        depth = torch2np_u8(depth)
        if exp == 'rgb2d':
            rgb2d_rgb = torch2np_u8(rgb2d_rgb)
            app_depth = torch2np_u8(app_depth)
        
        axes[i,0].imshow(rgb)
#         axes[i,0].set_title('RGB')
        axes[i,1].imshow(depth)
#         axes[i,1].set_title('DEPTH')
        
        X_gt,Y_gt = np.meshgrid(np.arange(x_gt.shape[1]), np.arange(x_gt.shape[0]))
        axes[i,2].quiver(X_gt, Y_gt, x_gt, y_gt, pivot='tip', units='xy')
#         axes[i,2].set_title('GT\nGradients')
        
        X,Y = np.meshgrid(np.arange(x.shape[1]), np.arange(x.shape[0]))
        axes[i,3].quiver(X, Y, x, y, pivot='tip', units='xy')
#         axes[i,3].set_title('App.\nGradients')
        
        if exp == 'rgb2d':
            axes[i,4].imshow(app_depth)
#             axes[i,4].set_title('App.\nDEPTH')
            
    plt.show()
    print('-'*100)
    
    dl_test = torch.utils.data.DataLoader(ds_test, batch_size=1, num_workers=1, shuffle=True)
    
    if exp != 'rgb2d':    
        eval_result = trainer.eval(dl_test)
        print(f'[I] - Test Set Average Loss: {sum(eval_result.losses)/len(eval_result.losses)}')
        eval_losses[exp] = sum(eval_result.losses)/len(eval_result.losses)
    else:
        loss = []
        for i in range(len(rgb2d_ds_test)):
            if i%10 == 0:
                print(f'Done {i} ... ',end='')
            sample = rgb2d_ds_test[i]
            rgb        = sample['rgb'].unsqueeze(0)
            depth      = sample['depth'].unsqueeze(0)
            rgb4fusenet        = sample['rgb4fusenet'].unsqueeze(0)
            depth4fusenet      = sample['depth4fusenet'].unsqueeze(0)
            x_gt       = sample['x4fusenet']
            y_gt       = sample['y4fusenet']
            fusenetmodel.net.train(False)
            with torch.no_grad():
                app_depth = rgb2d_model(rgb)
                # Depth -> [0, 1]
                app_depth = app_depth - torch.min(app_depth)
                app_depth = app_depth / torch.max(app_depth)
                # Inverse colors, to match the GT depth colors' style.
                app_depth = 1.0 - app_depth
                xy = fusenetmodel(rgb4fusenet,app_depth)
            
            xy_gt = torch.cat((x_gt.unsqueeze(0),y_gt.unsqueeze(0)),dim=1)
            loss.append(fusenetmodel.loss(ground_truth_grads=xy_gt, approximated_grads=xy))
        print(f'\n[I] - Test Set Average Loss (Feed-through RGB2D Network): {sum(loss)/len(loss)}')
        eval_losses[exp] = sum(loss)/len(loss)
            

[I] - 0) rgb2d
 ----------------------------------------------------------------------------------------------------
[I (rgbd_gradients_inference_dataset)] - root=/home/manor/cs236781-DeepLearning/project/master/data/nyuv2
                                       - device=cpu
                                       - image_size=(64, 64)
                                       - inference=both
                                       - goto_pixel=False

[I] - |self|=1278
[I (rgb2depth_dataset)] - root=/home/manor/cs236781-DeepLearning/project/master/data/nyuv2
                        - image_size=(128, 128)
                        - fusenet_image_size=(64, 64)

[I] - |self|=1278
[D (rgbd_gradients_dataloader)] - ds_test       first 10 indices: [312, 2, 1167, 217, 178, 853, 359, 780, 146, 704]
[D (rgbd_gradients_dataloader)] - rgb2d_ds_test first 10 indices: [312, 2, 1167, 217, 178, 853, 359, 780, 146, 704]


<IPython.core.display.Javascript object>

----------------------------------------------------------------------------------------------------
Done 0 ... Done 10 ... Done 20 ... Done 30 ... Done 40 ... Done 50 ... Done 60 ... Done 70 ... Done 80 ... Done 90 ... Done 100 ... Done 110 ... Done 120 ... Done 130 ... Done 140 ... Done 150 ... Done 160 ... Done 170 ... Done 180 ... Done 190 ... Done 200 ... Done 210 ... Done 220 ... Done 230 ... Done 240 ... Done 250 ... 
[I] - Test Set Average Loss (Feed-through RGB2D Network): 0.2017802894115448
[I] - 1) both
 ----------------------------------------------------------------------------------------------------
[I (rgbd_gradients_inference_dataset)] - root=/home/manor/cs236781-DeepLearning/project/master/data/nyuv2
                                       - device=cpu
                                       - image_size=(64, 64)
                                       - inference=both
                                       - goto_pixel=False

[I] - |self|=1278
[D (rgbd_gradients_dataloa

<IPython.core.display.Javascript object>

----------------------------------------------------------------------------------------------------
test_batch (Avg. Loss 0.008: 100%|██████████| 255/255 [00:27<00:00,  9.12it/s]
[I] - Test Set Average Loss: 0.00808335105002876
[I] - 2) zero_depth
 ----------------------------------------------------------------------------------------------------
[I (rgbd_gradients_inference_dataset)] - root=/home/manor/cs236781-DeepLearning/project/master/data/nyuv2
                                       - device=cpu
                                       - image_size=(64, 64)
                                       - inference=zero_depth
                                       - goto_pixel=False

[I] - |self|=1278
[D (rgbd_gradients_dataloader)] - ds_test       first 10 indices: [583, 542, 632, 352, 1131, 995, 746, 329, 1149, 676]


<IPython.core.display.Javascript object>

----------------------------------------------------------------------------------------------------
test_batch (Avg. Loss 0.107: 100%|██████████| 255/255 [00:27<00:00,  9.21it/s]
[I] - Test Set Average Loss: 0.10738996859712928
[I] - 3) zero_rgb
 ----------------------------------------------------------------------------------------------------
[I (rgbd_gradients_inference_dataset)] - root=/home/manor/cs236781-DeepLearning/project/master/data/nyuv2
                                       - device=cpu
                                       - image_size=(64, 64)
                                       - inference=zero_rgb
                                       - goto_pixel=False

[I] - |self|=1278
[D (rgbd_gradients_dataloader)] - ds_test       first 10 indices: [1167, 131, 232, 787, 521, 540, 650, 117, 449, 573]


<IPython.core.display.Javascript object>

----------------------------------------------------------------------------------------------------
test_batch (Avg. Loss 0.009: 100%|██████████| 255/255 [00:27<00:00,  9.43it/s]
[I] - Test Set Average Loss: 0.008677533993442708
[I] - 4) noise_depth
 ----------------------------------------------------------------------------------------------------
[I (rgbd_gradients_inference_dataset)] - root=/home/manor/cs236781-DeepLearning/project/master/data/nyuv2
                                       - device=cpu
                                       - image_size=(64, 64)
                                       - inference=noise_depth
                                       - goto_pixel=False

[I] - |self|=1278
[D (rgbd_gradients_dataloader)] - ds_test       first 10 indices: [514, 372, 103, 868, 107, 458, 1093, 805, 42, 248]


<IPython.core.display.Javascript object>

----------------------------------------------------------------------------------------------------
test_batch (Avg. Loss 0.116: 100%|██████████| 255/255 [00:28<00:00,  9.10it/s]
[I] - Test Set Average Loss: 0.11564670160558878
[I] - 5) noise_rgb
 ----------------------------------------------------------------------------------------------------
[I (rgbd_gradients_inference_dataset)] - root=/home/manor/cs236781-DeepLearning/project/master/data/nyuv2
                                       - device=cpu
                                       - image_size=(64, 64)
                                       - inference=noise_rgb
                                       - goto_pixel=False

[I] - |self|=1278
[D (rgbd_gradients_dataloader)] - ds_test       first 10 indices: [129, 694, 1123, 934, 1199, 109, 1266, 920, 1107, 952]


<IPython.core.display.Javascript object>

----------------------------------------------------------------------------------------------------
test_batch (Avg. Loss 0.009: 100%|██████████| 255/255 [00:27<00:00,  9.44it/s]
[I] - Test Set Average Loss: 0.00905830295455149


## Let's plot & compare each experiment Evaluation Loss:

In [11]:
eval_losses
fig, ax = plt.subplots()
# fig.suptitle(' Each Experiment Loss')
x = np.arange(len(eval_losses))
ax.bar(x, height=list(eval_losses.values()))
plt.xticks(x, list(eval_losses.keys()))
fig.show()

<IPython.core.display.Javascript object>

### As we can see from the bar plot above, the Network has learnt to approximate the Depth Image Gradients, as originally calculated using Classic methods (Convolution with a Sobel kernel).
* Minimal loss achieved with the RGB image zeroed, but it is almost equal to the loss with both the RGB & Depth images and to the loss with the RGB image drawed from a Uniform Distribution. 
* We deduce therefore that the Network has learnt to **ignore** the RGB image, as the loss over the Testset is the smallest when the RGB image zeroed, but almost equal to the case with both the RGB image and the Depth image
* Regards the experiment of feeding forward through the RGB2D network first (and than through our network), very high loss accheived.
** At first look that doesn't make sense since the RGB2D approximate the depth not bad.
** We think the high loss (and accordingly, the low visual quality of the results as you can see above) might caused due to a normalization missmatch between the first network and the second.
** while our network expect its depth input images to be in the range [-1,1], and centered around 0, 