# Basic Setup

In [1]:
# Check device running the notebook automatically
import sys
is_on_colab = 'google.colab' in sys.modules
print("Is on colab: ", is_on_colab)

Is on colab:  False


## Setup for Colab

In [2]:
if is_on_colab:
    # Google Colab setup

    # Install pytorch3d
    !pip install 'git+https://github.com/facebookresearch/pytorch3d.git'
    
    # Mount drive
    from google.colab import drive
    drive.mount('/content/drive')

    # Retrieve repository and cd into root folder
    from getpass import getpass
    import urllib
    import os
    user = input('Github user name: ')
    password = getpass('Github password: ')
    password = urllib.parse.quote(password) # your password is converted into url format
    branch = "" # "-b " + "branch_name"
    cmd_string = 'git clone {0} https://{1}:{2}@github.com/lukasHoel/novel-view-synthesis.git'.format(branch, user, password)
    os.system(cmd_string)
    os.chdir("novel-view-synthesis")

    # Install PyTorch3D libraries (required for pointcloud computations.)
    !pip install 'git+https://github.com/facebookresearch/pytorch3d.git'
    !pwd

## Setup for Local Execution

In [3]:
# ONLY NECESSARY FOR LOCAL EXECUTION (WORKS WITHOUT THIS CELL IN GOOGLE COLAB)
# Setup that is necessary for jupyter notebook to find sibling-directories
# see: https://stackoverflow.com/questions/34478398/import-local-function-from-a-module-housed-in-another-directory-with-relative-im


if not is_on_colab:
    
    import os
    import sys
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)


## General Settings

In [4]:
# Imports for this notebook

from models.nvs_model import NovelViewSynthesisModel
from models.synthesis.synt_loss_metric import SynthesisLoss
from util.nvs_solver import NVS_Solver
from util.gan_wrapper_solver import GAN_Wrapper_Solver
from data.nuim_dataloader import ICLNUIMDataset

from torch.utils import data
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision.transforms
import torch
import torch.nn as nn
import numpy as np

%load_ext autoreload
%autoreload 2

In [5]:
# Check training on GPU?

cuda = torch.cuda.is_available()

print("Training is on GPU with CUDA: {}".format(cuda))

device = "cuda:0" if cuda else "cpu"

print("Device: {}".format(device))

Training is on GPU with CUDA: True
Device: cuda:0


In [6]:
def count_parameters(model):
    """Given a model return total number of parameters"""
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Model & Loss Init

Instantiate and initialize NovelViewSynthesisModel and a selected flavor of SynthesisLoss.

In [7]:
# TODO: Define more parameters in the dict according to availalbe ones in the model, as soon as they are needed.
# Right now we just use the default parameters for the rest (see outcommented list or the .py file)
    
model_args={
    'imageSize': 64,
    
    'use_gt_depth': True,
    'normalize_images': False,
    'use_rgb_features': True,
    
    'dec_activation_func': nn.Sigmoid(),
    'dec_dims': [3, 3, 3, 3],
    'dec_blk_types': ["id", "id", "id"],
    'dec_noisy_bn': False,
    'dec_spectral_norm': False,
    
    # from here attributes for the loss of the nvs_model
    'l1_loss': '1.0_l1',
    'content_loss': '0.0_content', # synsin default: 10.0
}

# keep this loss object constant and modify usage of losses by e.g. setting one coefficient to 0
nvs_loss = SynthesisLoss(losses=[
    model_args['l1_loss'],
    model_args['content_loss']
])

model = NovelViewSynthesisModel(imageSize=model_args['imageSize'],
                                #max_z=0,
                                #min_z=0,
                                #enc_dims=[3, 8, 16, 32],
                                #enc_blk_types=["id", "id", "id"],
                                #enc_noisy_bn=True,
                                #enc_spectral_norm=True,
                                dec_dims=model_args['dec_dims'],
                                dec_blk_types=model_args['dec_blk_types'],
                                dec_activation_func=model_args['dec_activation_func'],
                                dec_noisy_bn=model_args['dec_noisy_bn'],
                                dec_spectral_norm=model_args['dec_spectral_norm'],
                                #points_per_pixel=8,
                                #learn_feature=True,
                                #radius=1.5,
                                #rad_pow=2,
                                #accumulation='alphacomposite',
                                #accumulation_tau=1,
                                use_rgb_features=model_args['use_rgb_features'],
                                use_gt_depth=model_args['use_gt_depth'],
                                #use_inverse_depth=False,
                                normalize_images=model_args['normalize_images'])
model_args["model"] = type(model).__name__

print("Model configuration: {}".format(model_args))

print("Architecture:", model)
print("Total number of paramaters:", count_parameters(model))

Loss names: ('l1', 'content')
Weight of each loss: ('1.0', '0.0')
Model configuration: {'imageSize': 64, 'use_gt_depth': True, 'normalize_images': False, 'use_rgb_features': True, 'dec_activation_func': Sigmoid(), 'dec_dims': [3, 3, 3, 3], 'dec_blk_types': ['id', 'id', 'id'], 'dec_noisy_bn': False, 'dec_spectral_norm': False, 'l1_loss': '1.0_l1', 'content_loss': '0.0_content', 'model': 'NovelViewSynthesisModel'}
Architecture: NovelViewSynthesisModel(
  (dec_activation_func): Sigmoid()
  (encoder): FeatureNet(
    (res_blocks): Sequential(
      (0): ResidualBlock(
        (left_branch): Sequential(
          (0): Conv2d(3, 8, kernel_size=(1, 1), stride=(1, 1))
          (1): Identity()
        )
        (right_branch): Sequential(
          (0): LinearNoiseLayer(
            (gain): Linear(in_features=20, out_features=3, bias=False)
            (bias): Linear(in_features=20, out_features=3, bias=False)
            (bn): bn()
          )
          (1): ReLU()
          (2): Conv2d(3, 8,

# Load Data
Load ICL-NUIM dataset.


In [8]:
# Load dataset from drive or local

if is_on_colab:
    path = "/content/drive/My Drive/Novel_View_Synthesis/ICL-NUIM/living_room_traj2_loop"
else:
    path = "/home/lukas/Desktop/datasets/ICL-NUIM/prerendered_data/living_room_traj0_loop"

transform = torchvision.transforms.Compose([
    torchvision.transforms.ToPILImage(),
    torchvision.transforms.Resize((model_args['imageSize'], model_args['imageSize'])),
    torchvision.transforms.ToTensor(), 
])
    
data_dict = {
    "path": path,
    "depth_to_image_plane": True,
    "sampleOutput": True,
    "RTrelativeToOutput": True,
    "inverse_depth": False
}
    
dataset = ICLNUIMDataset(path,
                         transform=transform,
                         depth_to_image_plane=data_dict["depth_to_image_plane"],
                         sampleOutput=data_dict["sampleOutput"],
                         RTrelativeToOutput=data_dict["RTrelativeToOutput"],
                         inverse_depth=data_dict["inverse_depth"])

print("Loaded following data: {} (samples: {}) with configuration: {}".format(data_dict["path"], len(dataset), data_dict))

Loaded following data: /home/lukas/Desktop/datasets/ICL-NUIM/prerendered_data/living_room_traj0_loop (samples: 60) with configuration: {'path': '/home/lukas/Desktop/datasets/ICL-NUIM/prerendered_data/living_room_traj0_loop', 'depth_to_image_plane': True, 'sampleOutput': True, 'RTrelativeToOutput': True, 'inverse_depth': False}


In [9]:
# Create Train and Val dataset with 80% train and 20% val.
# from: https://stackoverflow.com/questions/50544730/how-do-i-split-a-custom-dataset-into-training-and-test-datasets

dataset_args = {
    "batch_size": 1,
    "validation_percentage": 0.2,
    "shuffle_dataset": True,
    **data_dict
}

num_workers = 4
random_seed = 4 # seed random generation for shuffeling indices and for getting overfitting cases

# Creating data indices for training and validation splits:
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(dataset_args["validation_percentage"] * dataset_size))
if dataset_args["shuffle_dataset"]:
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# OVERFITTING CASE - FIRST IMAGE:
train_indices = [train_indices[0]]
val_indices = []
overfit_item = dataset.__getitem__(train_indices[0])
print("OVERFITTING Input Image: {}, Output Image: {}".format(
    train_indices[0],
    overfit_item["output"]["idx"]))

print(torch.min(overfit_item["output"]["image"]))
print(torch.max(overfit_item["output"]["image"]))

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(dataset, batch_size=dataset_args["batch_size"], 
                                           sampler=train_sampler, num_workers=num_workers)
validation_loader = torch.utils.data.DataLoader(dataset, batch_size=dataset_args["batch_size"],
                                                sampler=valid_sampler, num_workers=num_workers)

dataset_args["train_len"] = len(train_loader)
dataset_args["val_len"] = len(validation_loader)

print("Dataset parameters: {}".format(dataset_args))


OVERFITTING Input Image: 11, Output Image: 7
tensor(0.0980)
tensor(0.9490)
Dataset parameters: {'batch_size': 1, 'validation_percentage': 0.2, 'shuffle_dataset': True, 'path': '/home/lukas/Desktop/datasets/ICL-NUIM/prerendered_data/living_room_traj0_loop', 'depth_to_image_plane': True, 'sampleOutput': True, 'RTrelativeToOutput': True, 'inverse_depth': False, 'train_len': 1, 'val_len': 0}


# Training Visualization

Start Tensorboard for visualization of the upcoming training / validation / test steps.

In [10]:
# Start tensorboard. Might need to make sure, that the correct runs directory is chosen here.
%load_ext tensorboard
%tensorboard --logdir ../runs

Reusing TensorBoard on port 6006 (pid 4012), started 1:51:39 ago. (Use '!kill 4012' to kill it.)

# Training

Start training process.

In [11]:
# This flag decides with solver gets used and where the logs will be logged into (into which directory)
train_with_discriminator = False

In [12]:
# Create unique ID for this training process for saving to disk.

from datetime import datetime
import uuid
now = datetime.now() # current date and time
id = str(uuid.uuid1())
id_suffix = now.strftime("%Y-%b-%d_%H-%M-%S") + "_" + id

if train_with_discriminator:
    log_dir_name = "Full_GAN"
else:
    log_dir_name = "Full_No_GAN"

log_dir = "../runs/" + log_dir_name + "/" + id_suffix # Might need to make sure, that the correct runs directory is chosen here.
print("log_dir:", log_dir)

log_dir: ../runs/Full_No_GAN/2020-May-03_11-51-47_b42cb130-8d23-11ea-bd60-e782b53cc122


In [13]:
# Configure solver
extra_args = {
    **model_args,
    **dataset_args
}

if train_with_discriminator:
    solver = GAN_Wrapper_Solver(optim_d=torch.optim.Adam,
                                optim_d_args={"lr": 1e-2,
                                              "betas": (0.9, 0.999),
                                              "eps": 1e-8,
                                              "weight_decay": 0.0},# is the l2 regularization parameter, see: https://pytorch.org/docs/stable/optim.html
                                optim_g=torch.optim.Adam,
                                optim_g_args={"lr": 1e-4,
                                              "betas": (0.9, 0.999),
                                              "eps": 1e-8,
                                              "weight_decay": 0.0}, # is the l2 regularization parameter, see: https://pytorch.org/docs/stable/optim.html
                                g_loss_func=nvs_loss,
                                extra_args=extra_args,
                                log_dir=log_dir,
                                init_discriminator_weights=True)
else:
    solver = NVS_Solver(optim=torch.optim.Adam,
                        optim_args={"lr": 1e-2,
                                    "betas": (0.9, 0.999),
                                    "eps": 1e-8,
                                    "weight_decay": 0.0}, # is the l2 regularization parameter, see: https://pytorch.org/docs/stable/optim.html,
                        loss_func=nvs_loss,
                        extra_args=extra_args,
                        tensorboard_writer=None, # let solver create a new instance
                        log_dir=log_dir)

Metric names: PSNR SSIM
Hyperparameters of this solver: {'loss_function': 'SynthesisLoss', 'optimizer': 'Adam', 'learning_rate': 0.01, 'weight_decay': 0.0, 'imageSize': '64', 'use_gt_depth': 'True', 'normalize_images': 'False', 'use_rgb_features': 'True', 'dec_activation_func': 'Sigmoid()', 'dec_dims': '[3, 3, 3, 3]', 'dec_blk_types': "['id', 'id', 'id']", 'dec_noisy_bn': 'False', 'dec_spectral_norm': 'False', 'l1_loss': '1.0_l1', 'content_loss': '0.0_content', 'model': 'NovelViewSynthesisModel', 'batch_size': '1', 'validation_percentage': '0.2', 'shuffle_dataset': 'True', 'path': '/home/lukas/Desktop/datasets/ICL-NUIM/prerendered_data/living_room_traj0_loop', 'depth_to_image_plane': 'True', 'sampleOutput': 'True', 'RTrelativeToOutput': 'True', 'inverse_depth': 'False', 'train_len': '1', 'val_len': '0'}


In [None]:
# Start training

num_epochs=100
log_nth=1

# TODO: Add parameters to extra_args dict?
if train_with_discriminator:
    steps = 1 # how many steps of training for discriminator/generator before switching to generator/discriminator
    solver.train(model, train_loader, validation_loader, num_epochs=num_epochs, log_nth=log_nth, steps=steps)
else:
    solver.train(model, train_loader, validation_loader, num_epochs=num_epochs, log_nth=log_nth)

START TRAIN on device: cuda:0


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.15388423204421997

[EPOCH 1/100] TRAIN mean acc/loss: 14.321157455444336/0.15388423204421997


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 1/100] VAL mean acc/loss: nan/nan


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.11219388246536255

[EPOCH 2/100] TRAIN mean acc/loss: 16.960540771484375/0.11219388246536255


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 2/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.0919230580329895

[EPOCH 3/100] TRAIN mean acc/loss: 18.48149871826172/0.0919230580329895


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 3/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.07018321007490158

[EPOCH 4/100] TRAIN mean acc/loss: 20.1048641204834/0.07018321007490158


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 4/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.0619969516992569

[EPOCH 5/100] TRAIN mean acc/loss: 20.751110076904297/0.0619969516992569


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 5/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.06070352718234062

[EPOCH 6/100] TRAIN mean acc/loss: 21.044212341308594/0.06070352718234062


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 6/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.058793749660253525

[EPOCH 7/100] TRAIN mean acc/loss: 21.4705810546875/0.058793749660253525


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 7/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.05649442598223686

[EPOCH 8/100] TRAIN mean acc/loss: 21.833587646484375/0.05649442598223686


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 8/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.0543195903301239

[EPOCH 9/100] TRAIN mean acc/loss: 21.96115493774414/0.0543195903301239


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 9/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.05335259065032005

[EPOCH 10/100] TRAIN mean acc/loss: 21.887348175048828/0.05335259065032005


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 10/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.052116673439741135

[EPOCH 11/100] TRAIN mean acc/loss: 21.872875213623047/0.052116673439741135


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 11/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.050771456211805344

[EPOCH 12/100] TRAIN mean acc/loss: 21.957456588745117/0.050771456211805344


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 12/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.04978936165571213

[EPOCH 13/100] TRAIN mean acc/loss: 22.04078483581543/0.04978936165571213


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 13/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.049496933817863464

[EPOCH 14/100] TRAIN mean acc/loss: 22.065847396850586/0.049496933817863464


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 14/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.04900938272476196

[EPOCH 15/100] TRAIN mean acc/loss: 22.139984130859375/0.04900938272476196


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 15/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.04786979407072067

[EPOCH 16/100] TRAIN mean acc/loss: 22.322265625/0.04786979407072067


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 16/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.047061413526535034

[EPOCH 17/100] TRAIN mean acc/loss: 22.552448272705078/0.047061413526535034


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 17/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.046453528106212616

[EPOCH 18/100] TRAIN mean acc/loss: 22.747461318969727/0.046453528106212616


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 18/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.045454107224941254

[EPOCH 19/100] TRAIN mean acc/loss: 22.89307403564453/0.045454107224941254


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 19/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.04420233517885208

[EPOCH 20/100] TRAIN mean acc/loss: 22.987407684326172/0.04420233517885208


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 20/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.043525051325559616

[EPOCH 21/100] TRAIN mean acc/loss: 23.018985748291016/0.043525051325559616


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 21/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.04301390051841736

[EPOCH 22/100] TRAIN mean acc/loss: 23.09842300415039/0.04301390051841736


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 22/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.04195946455001831

[EPOCH 23/100] TRAIN mean acc/loss: 23.388750076293945/0.04195946455001831


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 23/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.04089513421058655

[EPOCH 24/100] TRAIN mean acc/loss: 23.794649124145508/0.04089513421058655


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 24/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.039702173322439194

[EPOCH 25/100] TRAIN mean acc/loss: 24.195680618286133/0.039702173322439194


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 25/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.03836699575185776

[EPOCH 26/100] TRAIN mean acc/loss: 24.559640884399414/0.03836699575185776


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 26/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.037210460752248764

[EPOCH 27/100] TRAIN mean acc/loss: 24.92923355102539/0.037210460752248764


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 27/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.03561946004629135

[EPOCH 28/100] TRAIN mean acc/loss: 25.36792755126953/0.03561946004629135


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 28/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.034827545285224915

[EPOCH 29/100] TRAIN mean acc/loss: 25.639780044555664/0.034827545285224915


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 29/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.0342295877635479

[EPOCH 30/100] TRAIN mean acc/loss: 25.798171997070312/0.0342295877635479


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 30/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.033979691565036774

[EPOCH 31/100] TRAIN mean acc/loss: 25.8376522064209/0.033979691565036774


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 31/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.033334288746118546

[EPOCH 32/100] TRAIN mean acc/loss: 26.005699157714844/0.033334288746118546


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 32/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.032870057970285416

[EPOCH 33/100] TRAIN mean acc/loss: 26.174718856811523/0.032870057970285416


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 33/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.03236917778849602

[EPOCH 34/100] TRAIN mean acc/loss: 26.263216018676758/0.03236917778849602


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 34/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.031998470425605774

[EPOCH 35/100] TRAIN mean acc/loss: 26.313716888427734/0.031998470425605774


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 35/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.03155030310153961

[EPOCH 36/100] TRAIN mean acc/loss: 26.373302459716797/0.03155030310153961


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 36/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.03114183619618416

[EPOCH 37/100] TRAIN mean acc/loss: 26.398345947265625/0.03114183619618416


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 37/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.03080654703080654

[EPOCH 38/100] TRAIN mean acc/loss: 26.46486473083496/0.03080654703080654


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 38/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.030328743159770966

[EPOCH 39/100] TRAIN mean acc/loss: 26.606103897094727/0.030328743159770966


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 39/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.030043255537748337

[EPOCH 40/100] TRAIN mean acc/loss: 26.716510772705078/0.030043255537748337


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 40/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.029744621366262436

[EPOCH 41/100] TRAIN mean acc/loss: 26.80536651611328/0.029744621366262436


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 41/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.029488038271665573

[EPOCH 42/100] TRAIN mean acc/loss: 26.87558364868164/0.029488038271665573


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 42/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Iteration 1/1] TRAIN loss: 0.029237855225801468

[EPOCH 43/100] TRAIN mean acc/loss: 26.92365264892578/0.029237855225801468


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


[EPOCH 43/100] VAL mean acc/loss: nan/nan


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

In [None]:
# To download tensorboard runs from Colab

# TODO: Make sure that only new ones are copied --> for tensorboard runs on colab, do not use git repository as "runs" directory?
# TODO: Instead of downloading, directly move it to the git repository that is currently checked out and push changes?
if is_on_colab:
  from google.colab import files
  !zip -r /content/runs.zip /content/runs
  files.download("/content/runs.zip")

# Test

Test with test dataset.
Will load the data and start the training.

Visualizations can be seen in Tensorboard above.

In [None]:
# Load test data
# TODO: Find real test split, for now we load the SAME dataset as for train/val (just that this notebook is complete...)
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

test_path = path # CHANGE HERE TO REAL PATH TO TEST SET

test_dataset = dataset = ICLNUIMDataset(test_path, transform=transform) # TODO also use rest of parameters...

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=dataset_args["batch_size"], 
                                               shuffle=True,
                                               num_workers=4)

print("Length of test set: {}".format(len(test_dataset)))
print("Loaded test set: {}".format(test_path))

In [None]:
# Start testing

#solver.test(model, test_loader, test_prefix="DUMMY_TEST_WITH_NO_REAL_TEST_SET", log_nth=1)

# Save the model

Save network with its weights to disk.

See torch.save function: https://pytorch.org/docs/stable/notes/serialization.html#recommend-saving-models 

Load again with `the_model = TheModelClass(*args, **kwargs) the_model.load_state_dict(torch.load(PATH))`

In [None]:
def save_model(modelname, model):
    # Might need to make sure, that the correct saved_results directory is chosen here.
    filepath = "../saved_models/" + modelname + ".pt"
    torch.save(model.state_dict(), filepath)

In [None]:
nvs_modelname = "nvs_" + id_suffix
save_model(nvs_modelname, model)

if train_with_discriminator:
    # Also save the discriminator - currently this can only be accessed through the solver (change it!)
    gan_modelname = "gan_" + id_suffix
    save_model(gan_modelname, solver.netD)

In [None]:
# LOAD MODEL AGAIN for verification purposes
# Should print: <All keys matched successfully> per each model if it works

nvs_filepath = "../saved_models/" + nvs_modelname + ".pt"
print("NVS_Model loading: ", model.load_state_dict(torch.load(nvs_filepath)))

if train_with_discriminator:
    gan_filepath = "../saved_models/" + gan_modelname + ".pt"
    print("Discriminator loading: ", solver.netD.load_state_dict(torch.load(gan_filepath)))