Code borrowed from : https://github.com/ialhashim/DenseDepth 

In [1]:
import sys
import os

from google.colab import drive
drive.mount('/content/drive/')

py_file_location = "/content/drive/My Drive/DenseDepth"
sys.path.append(py_file_location)

import torch
from torchvision import transforms, utils
import collections

try:
    import accimage
except ImportError:
    accimage = None

Mounted at /content/drive/


In [2]:
cd '/content/drive/My Drive/DenseDepth/'

/content/drive/My Drive/DenseDepth


In [3]:
# !wget https://s3-eu-west-1.amazonaws.com/densedepth/nyu.h5 -O /content/drive/


In [4]:
import torch
import torch.nn.parallel

# import senet
# import modules
# import resnet
# import net
# import densenet

import matplotlib.image
import matplotlib.pyplot as plt
plt.set_cmap("jet")
from PIL import Image

from collections import OrderedDict
import math
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.utils import model_zoo
import copy
import numpy as np
from keras.models import load_model


<Figure size 432x288 with 0 Axes>

In [5]:
from keras.engine.topology import Layer, InputSpec
import keras.utils.conv_utils as conv_utils
import tensorflow as tf
import keras.backend as K

def normalize_data_format(value):
    if value is None:
        value = K.image_data_format()
    data_format = value.lower()
    if data_format not in {'channels_first', 'channels_last'}:
        raise ValueError('The `data_format` argument must be one of '
                         '"channels_first", "channels_last". Received: ' +
                         str(value))
    return data_format
    

class BilinearUpSampling2D(Layer):
    def __init__(self, size=(2, 2), data_format=None, **kwargs):
        super(BilinearUpSampling2D, self).__init__(**kwargs)
        self.data_format = normalize_data_format(data_format)
        self.size = conv_utils.normalize_tuple(size, 2, 'size')
        self.input_spec = InputSpec(ndim=4)

    def compute_output_shape(self, input_shape):
        if self.data_format == 'channels_first':
            height = self.size[0] * input_shape[2] if input_shape[2] is not None else None
            width = self.size[1] * input_shape[3] if input_shape[3] is not None else None
            return (input_shape[0],
                    input_shape[1],
                    height,
                    width)
        elif self.data_format == 'channels_last':
            height = self.size[0] * input_shape[1] if input_shape[1] is not None else None
            width = self.size[1] * input_shape[2] if input_shape[2] is not None else None
            return (input_shape[0],
                    height,
                    width,
                    input_shape[3])

    def call(self, inputs):
        input_shape = K.shape(inputs)
        if self.data_format == 'channels_first':
            height = self.size[0] * input_shape[2] if input_shape[2] is not None else None
            width = self.size[1] * input_shape[3] if input_shape[3] is not None else None
        elif self.data_format == 'channels_last':
            height = self.size[0] * input_shape[1] if input_shape[1] is not None else None
            width = self.size[1] * input_shape[2] if input_shape[2] is not None else None
        
        return tf.image.resize(inputs, [height, width], method=tf.image.ResizeMethod.BILINEAR)

    def get_config(self):
        config = {'size': self.size, 'data_format': self.data_format}
        base_config = super(BilinearUpSampling2D, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [6]:
import torch
import torch.nn as nn
from torchvision import models
import torch.nn.functional as F


class UpSample(nn.Sequential):
    def __init__(self, skip_input, output_features):
        super(UpSample, self).__init__()        
        self.convA = nn.Conv2d(skip_input, output_features, kernel_size=3, stride=1, padding=1)
        self.leakyreluA = nn.LeakyReLU(0.2)
        self.convB = nn.Conv2d(output_features, output_features, kernel_size=3, stride=1, padding=1)
        self.leakyreluB = nn.LeakyReLU(0.2)

    def forward(self, x, concat_with):
        up_x = F.interpolate(x, size=[concat_with.size(2), concat_with.size(3)], mode='bilinear', align_corners=True)
        return self.leakyreluB( self.convB( self.convA( torch.cat([up_x, concat_with], dim=1)  ) )  )

class Decoder(nn.Module):
    def __init__(self, num_features=1664, decoder_width = 1.0):
        super(Decoder, self).__init__()
        features = int(num_features * decoder_width)

        self.conv2 = nn.Conv2d(num_features, features, kernel_size=1, stride=1, padding=0)

        self.up1 = UpSample(skip_input=features//1 + 256, output_features=features//2)
        self.up2 = UpSample(skip_input=features//2 + 128,  output_features=features//4)
        self.up3 = UpSample(skip_input=features//4 + 64,  output_features=features//8)
        self.up4 = UpSample(skip_input=features//8 + 64,  output_features=features//16)

        self.conv3 = nn.Conv2d(features//16, 1, kernel_size=3, stride=1, padding=1)

    def forward(self, features):
        x_block0, x_block1, x_block2, x_block3, x_block4 = features[3], features[4], features[6], features[8], features[12]
        x_d0 = self.conv2(F.relu(x_block4))

        x_d1 = self.up1(x_d0, x_block3)
        x_d2 = self.up2(x_d1, x_block2)
        x_d3 = self.up3(x_d2, x_block1)
        x_d4 = self.up4(x_d3, x_block0)
        return self.conv3(x_d4)

class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()       
        self.original_model = models.densenet169( pretrained=False )

    def forward(self, x):
        features = [x]
        for k, v in self.original_model.features._modules.items(): features.append( v(features[-1]) )
        return features

class PTModel(nn.Module):
    def __init__(self):
        super(PTModel, self).__init__()
        self.encoder = Encoder()
        self.decoder = Decoder()

    def forward(self, x):
        return self.decoder( self.encoder(x) )



In [7]:


class ToTensor(object):
    """Convert a ``PIL.Image`` or ``numpy.ndarray`` to tensor.
    Converts a PIL.Image or numpy.ndarray (H x W x C) in the range
    [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0].
    """
    def __init__(self,is_test=False):
        self.is_test = is_test

    def __call__(self, sample):
        image, depth = sample['image'], sample['depth']
        """
        Args:
            pic (PIL.Image or numpy.ndarray): Image to be converted to tensor.
        Returns:
            Tensor: Converted image.
        """
        # ground truth depth of training samples is stored in 8-bit while test samples are saved in 16 bit
        image = self.to_tensor(image)
        if self.is_test:
            depth = self.to_tensor(depth).float()/1000
        else:            
            depth = self.to_tensor(depth).float()*10
        return {'image': image, 'depth': depth}

    def to_tensor(self, pic):
        if not(_is_pil_image(pic) or _is_numpy_image(pic)):
            raise TypeError(
                'pic should be PIL Image or ndarray. Got {}'.format(type(pic)))

        if isinstance(pic, np.ndarray):
            img = torch.from_numpy(pic.transpose((2, 0, 1)))

            return img.float().div(255)

        if accimage is not None and isinstance(pic, accimage.Image):
            nppic = np.zeros(
                [pic.channels, pic.height, pic.width], dtype=np.float32)
            pic.copyto(nppic)
            return torch.from_numpy(nppic)

        # handle PIL Image
        if pic.mode == 'I':
            img = torch.from_numpy(np.array(pic, np.int32, copy=False))
        elif pic.mode == 'I;16':
            img = torch.from_numpy(np.array(pic, np.int16, copy=False))
        else:
            img = torch.ByteTensor(
                torch.ByteStorage.from_buffer(pic.tobytes()))
        # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
        if pic.mode == 'YCbCr':
            nchannel = 3
        elif pic.mode == 'I;16':
            nchannel = 1
        else:
            nchannel = len(pic.mode)
        img = img.view(pic.size[1], pic.size[0], nchannel)
        # put it from HWC to CHW format
        # yikes, this transpose takes 80% of the loading time/CPU
        img = img.transpose(0, 1).transpose(0, 2).contiguous()
        if isinstance(img, torch.ByteTensor):
            return img.float().div(255)
        else:
            return img

class Normalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, sample):
        """
        Args:
            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
        Returns:
            Tensor: Normalized image.
        """
        image, depth = sample['image'], sample['depth']

        image = self.normalize(image, self.mean, self.std)

        return {'image': image, 'depth': depth}

    def normalize(self, tensor, mean, std):
        """Normalize a tensor image with mean and standard deviation.
        See ``Normalize`` for more details.
        Args:
            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
            mean (sequence): Sequence of means for R, G, B channels respecitvely.
            std (sequence): Sequence of standard deviations for R, G, B channels
                respecitvely.
        Returns:
            Tensor: Normalized image.
        """

        # TODO: make efficient
        for t, m, s in zip(tensor, mean, std):
            t.sub_(m).div_(s)
        return tensor

In [8]:
def DepthNorm(x, maxDepth):
    return maxDepth / x

def predict(model, images, minDepth=10, maxDepth=1000, batch_size=2):
    # Support multiple RGBs, one RGB image, even grayscale 
    if len(images.shape) < 3: images = np.stack((images,images,images), axis=2)
    if len(images.shape) < 4: images = images.reshape((1, images.shape[0], images.shape[1], images.shape[2]))
    # Compute predictions
    predictions = model.predict(images, batch_size=batch_size)
    # Put in expected range
    return np.clip(DepthNorm(predictions, maxDepth=maxDepth), minDepth, maxDepth) / maxDepth

# def load_model():
#     # Kerasa / TensorFlow
#     os.environ['TF_CPP_MIN_LOG_LEVEL'] = '5'
#     from keras.models import load_model

#     # Custom object needed for inference and training
#     custom_objects = {'BilinearUpSampling2D': BilinearUpSampling2D, 'depth_loss_function': None}

#     # Load model into GPU / CPU
#     return load_model('nyu.h5', custom_objects=custom_objects, compile=False)


def load_test_data(test_data_zip_file='nyu_test.zip'):
    print('Loading test data...', end='')
    import numpy as np
    from data import extract_zip
    data = extract_zip(test_data_zip_file)
    from io import BytesIO
    rgb = np.load(BytesIO(data['eigen_test_rgb.npy']))
    depth = np.load(BytesIO(data['eigen_test_depth.npy']))
    # crop = np.load(BytesIO(data['eigen_test_crop.npy']))

def test(model,image):
    out = model(image)
    # predict_test = cmap(predict(model, image, minDepth=minDepth, maxDepth=maxDepth)[0,:,:,0])[:,:,:3]
    print(out.shape)
    matplotlib.image.imsave('/content/drive/My Drive/data/test.png', out)



def my_predict(model, images, minDepth=10, maxDepth=1000):

  with torch.no_grad():
    # Compute predictions
    predictions = model(images.float())

  # return predictions
    # Put in expected range
  return np.clip(DepthNorm(predictions.numpy(), maxDepth=maxDepth), minDepth, maxDepth) / maxDepth


In [9]:
import os 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '5'
from keras.models import load_model
from matplotlib import pyplot as plt

import torch
import torch.nn as nn
from torchvision import models
import torch.nn.functional as F

def load_model_pytorch():
  print('Loading model...')

  custom_objects = {'BilinearUpSampling2D': BilinearUpSampling2D, 'depth_loss_function': None}

  # Load model into GPU / CPU
  model = load_model('nyu.h5', custom_objects=custom_objects, compile=False)
  names = [weight.name for layer in model.layers for weight in layer.weights]
  weights = model.get_weights()

  keras_name = []
  for name, weight in zip(names, weights):
    keras_name.append(name)

  pytorch_model = PTModel().float()

  # load parameter from keras
  keras_state_dict = {} 
  j = 0
  for name, param in pytorch_model.named_parameters():
  
    if 'classifier' in name:
      keras_state_dict[name]=param
      continue

    if 'conv' in name and 'weight' in name:
      keras_state_dict[name]=torch.from_numpy(np.transpose(weights[j],(3, 2, 0, 1)))
      # print(name,keras_name[j])
      j = j+1
      continue
  
    if 'conv' in name and 'bias' in name:
      keras_state_dict[name]=torch.from_numpy(weights[j])
      # print(param.shape,weights[j].size)
      j = j+1
      continue

    if 'norm' in name and 'weight' in name:
      keras_state_dict[name]=torch.from_numpy(weights[j])
      # print(param.shape,weights[j].shape)
      j = j+1
      continue

    if 'norm' in name and 'bias' in name:
      keras_state_dict[name]=torch.from_numpy(weights[j])
      # print(param.shape,weights[j].size)
      j = j+1
      keras_state_dict[name.replace("bias", "running_mean")]=torch.from_numpy(weights[j])
      # print(param.shape,weights[j].size)
      j = j+1
      keras_state_dict[name.replace("bias", "running_var")]=torch.from_numpy(weights[j])
      # print(param.shape,weights[j].size)
      j = j+1
      continue


  pytorch_model.load_state_dict(keras_state_dict)
  pytorch_model.eval()
  return pytorch_model

In [10]:
__imagenet_stats = {'mean': [0.485, 0.456, 0.406],
                        'std': [0.229, 0.224, 0.225]}

trans = transforms.Compose([ToTensor(is_test=True),
                                           Normalize(__imagenet_stats['mean'],
                                                     __imagenet_stats['std'])
                                       ])

In [11]:
pytorch_model = load_model_pytorch()
cmap = plt.get_cmap('jet')
minDepth, maxDepth = 10, 1000

inputs = np.clip(np.asarray(Image.open(os.path.join("/content/drive/My Drive/data/", 'test_img.png')))/255,0,1)
# x = Image.open(os.path.join("/content/drive/My Drive/data/", '0049.jpg'))
# x = trans(x)
# x = torch.tensor(x)
pytorch_input = torch.from_numpy(inputs).permute(2,0,1).unsqueeze(0)
print(pytorch_input.shape)
# x = np.expand_dims(x, axis=0)
out = my_predict(pytorch_model,pytorch_input[0,:,:,:].unsqueeze(0))[0]
print(out.shape)
# plt.imshow(out[0,:,:])
# plt.savefig('test.png')
# plt.show()
matplotlib.image.imsave('/content/drive/My Drive/data/synthetic1.png', out[0,:,:])


Loading model...
torch.Size([1, 3, 820, 1078])
(1, 410, 539)
