In [None]:
!pip3 install torch==1.2.0+cu92 torchvision==0.4.0+cu92 -f https://download.pytorch.org/whl/torch_stable.html

Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting torch==1.2.0+cu92
  Downloading https://download.pytorch.org/whl/cu92/torch-1.2.0%2Bcu92-cp37-cp37m-manylinux1_x86_64.whl (663.1 MB)
[K     |████████████████████████████████| 663.1 MB 1.6 kB/s 
[?25hCollecting torchvision==0.4.0+cu92
  Downloading https://download.pytorch.org/whl/cu92/torchvision-0.4.0%2Bcu92-cp37-cp37m-manylinux1_x86_64.whl (8.8 MB)
[K     |████████████████████████████████| 8.8 MB 46.0 MB/s 
Installing collected packages: torch, torchvision
  Attempting uninstall: torch
    Found existing installation: torch 1.10.0+cu111
    Uninstalling torch-1.10.0+cu111:
      Successfully uninstalled torch-1.10.0+cu111
  Attempting uninstall: torchvision
    Found existing installation: torchvision 0.11.1+cu111
    Uninstalling torchvision-0.11.1+cu111:
      Successfully uninstalled torchvision-0.11.1+cu111
[31mERROR: pip's dependency resolver does not currently take into account all the packages 

In [None]:
# pytorch imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init
import torchvision
import numpy as np

In [None]:
# get from this Github: https://github.com/KexianHust/Structure-Guided-Ranking-Loss/blob/master/models/DepthNet.py
from modules import nn as NN

import resnet
from networks import *

In [None]:
# the Depth Prediction module; credit to: https://github.com/KexianHust/Structure-Guided-Ranking-Loss/blob/master/models/DepthNet.py
class DepthPredictionModule(nn.Module):

  def __init__(self, backbone='resnet', depth=50, pretrained=True, in_channels=[256, 512, 1024, 2048],
               mid_channels=[256, 256, 256, 512], up_factors=[2, 2, 2, 2], out_channels=1):
    super(DepthPredictionModule, self).__init__()
    self.backbone = backbone
    self.depth = depth
    self.pretrained = pretrained
    self.in_channels = in_channels
    self.mid_channels = mid_channels
    self.up_factors = up_factors
    self.out_channels = out_channels

    # feel free to change to resnet50 or resnet101
    self.encoder = resnet.restnet50(pretrained=self.pretrained)
    self.decoder = DepthPredictionDecoder(in_channels=self.in_channels, mid_channels=self.mid_channels, up_factors=self.up_factors, out_channels=self.out_channels)

  def forward(self, x):
    x = self.encoder(x)
    x = self.decoder(x)
    return x

In [None]:
# FFM, AO and FTB in https://github.com/KexianHust/Structure-Guided-Ranking-Loss/blob/master/models/networks.py
# networks.py: https://github.com/KexianHust/Structure-Guided-Ranking-Loss/blob/master/models/networks.py
# resnet.py: https://github.com/KexianHust/Structure-Guided-Ranking-Loss/blob/master/models/resnet.py

# DPM needs a Decoder; also from same Github: https://github.com/KexianHust/Structure-Guided-Ranking-Loss/blob/master/models/DepthNet.py
class DepthPredictionDecoder(nn.Module):

  def __init__(self, in_channels, mid_channels, up_factors, out_channels):
    super(DepthPredictionDecoder, self).__init__()
    self.in_channels = in_channels
    self.mid_channels = mid_channels
    self.up_factors = up_factors
    self.out_channels = out_channels

    self.conv = FTB(inchannels=self.in_channels[3], midchannels=self.mid_channels[3])
    self.conv1 = torch.nn.Conv2d(in_channels=self.mid_channels[3], out_channels=self.mid_channels[2],
                                 kernel_size=3, padding=1, stride=1, bias=True)
    self.up_sample = torch.nn.Upsample(scale_factor=self.up_factors[3], mode='bilinear', align_corners=True)

    self.ffm0 = FFM(inchannels=self.in_channels[0], midchannels=self.mid_channels[0], outchannels=self.out_channels[0], upfactor=self.up_factors[0])
    self.ffm1 = FFM(inchannels=self.in_channels[1], midchannels=self.mid_channels[1], outchannels=self.out_channels[1], upfactor=self.up_factors[1])
    self.ffm2 = FFM(inchannels=self.in_channels[2], midchannels=self.mid_channels[2], outchannels=self.out_channels[2], upfactor=self.up_factors[2])

    self.out_conv = AO(inchannels=self.in_channels[0], outchannels=self.out_channels, upfactor=2)
    self._init_params()

  def _init_params(self):
    for module in self.modules():
      if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.ConvTranspose2d) or isinstance(module, torch.nn.Linear):
        init.normal_(module.weight, std=0.01)
        if module.bias is not None:
          init.constant_(module.bias, 0)
      elif isinstance(module, NN.BatchNorm2d):
        init.constant_(module.weight, 1)
        init.constant_(module.bias, 0)
      
    def forward(self, features):
      x = self.conv(features[3])
      x = self.conv1(x)
      x = self.up_sample(x)
      x = self.ffm2(features[2], x)
      x = self.ffm1(features[1], x)
      x = self.ffm0(features[0], x)

      x = self.out_conv(x)
      return x

    

In [None]:
if __name__ == 'main':
  dpm = DepthPredictionModule()
  inputs = torch.ones(4,3,128,128)
  outputs = dpm(inputs)
  print(outputs.size())

Loss functions novel in the paper

In [None]:
class DepthPredictionLoss(nn.Module):
  
  # not yet sure what to put
  def __init__(self, pr_d, gt_d):
    super(DepthPredictionLoss, self).__init__()
    self.ilnr = image_level_normalized_regression_loss(pr_d, gt_d)
    
    sampled_points =  sample_pair_points(pr_d, 100000) # 100K sampled points
    sample_gt_d_A = get_gt_d_of_sample(sampled_points, pr_d, gt_d)
    surface_normal_A = surface_normal(sample_gt_d_A)

    sampled_points =  sample_pair_points(pr_d, 100000) # 100K sampled points
    sample_gt_d_B = get_gt_d_of_sample(sampled_points, pr_d, gt_d)
    surface_normal_B = surface_normal(sample_gt_d_B)

    self.pwn = pairwise_normal_loss(surface_normal_A, surface_normal_B, sample_gt_d_A, sample_gt_d_B)
    self.msg = multi_scale_gradient_loss(...) # not sure about this
    self.overall_loss = overall_loss()

  # do the ILNR given the predicted depth pr_d and the ground truth depth gt_d
  def image_level_normalized_regression_loss(self, pr_d, gt_d):
    # trim out those values that are within the 10% furthest away
    # then apply the normalization based on the means and stds of the remaining
    # we do this for preventing outliers
    gt_d_trim = torch.stack([x for x in gt_d if 0.1 < gt_d < 0.9])
    d_bar = (gt_d - torch.mean(gt_d_trim))/torch.std(gt_d)

    # proposed ILNR formula
    ilnr = torch.mean(torch.abs(pr_d - d_bar) + torch.abs(torch.tanh(pr_d/100) - torch.tanh(d_bar/100)))
    return ilnr

  # sample that amount of points
  def sample_pair_points(self, pr_d, num):
    # not sure
    return rnd.sample(pr_d, num)

  # get ground truth of the sample
  # smp is a list, pr_d is a tensor, gt_d is a tensor
  def get_gt_d_of_sample(self, smp, pr_d, gt_d):
    # get the pos list of the samples from  the pr_d
    pos = [(pr_d == x).nonzero().item() for x in smp]
    return torch.stack([gt_d[x] for x in pos])
    

  # follows Xian et al's Structure-guided rank loss, which can imporve edge sharpness
  # the sampling method is followed but enforced on surface normal space
  # this improves global and local geometric relations
  def surface_normal(self, sample_gt_d)
    # to-do
    # requires 3D point cloud
    # least squares fit

  # do the PWNL given the normals of A and B
  def pairwise_normal_loss(self, n_A, n_B, gt_A, gt_B):
    # proposed PWN formula
    pwn = torch.mean(torch.abs(n_A*n_B - gt_A*gt_B))
    return pwn
  
  def multi_scale_gradient_loss(self):
    # not sure; placeholder for now
    # does this also need the 3D point cloud?
    msg = tensor.zeros(1)

    return msg

  def overall_loss(self):
    lambda_a, lambda_g = 1, 0.5 # given constants in the paper
    return self.pwn + lambda_a*self.ilnr + lambda_g*self.msg