In [1]:
import os
import torch
import PIL.Image as pil
from torchvision import transforms
from Models.EncoderModel import EncoderModelConvNeXt
from Models.DecoderModel import DepthDecoderModel, PoseDecoderModel

In [5]:
device = "cpu"#torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
path = os.path.join("models-convnext-tuned-espcn", "weights_19")

In [7]:
enc = EncoderModelConvNeXt()
encoderDict = torch.load(os.path.join(path, "encoder.pth"), map_location=device)
height = encoderDict.pop("height")
width = encoderDict.pop("width")
enc.load_state_dict(encoderDict)
enc.to(device)
enc.eval()

EncoderModelConvNeXt(
  (encoder): ConvNeXt(
    (features): Sequential(
      (0): ConvNormActivation(
        (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
        (1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
      )
      (1): Sequential(
        (0): CNBlock(
          (block): Sequential(
            (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
            (1): Permute()
            (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
            (3): Linear(in_features=96, out_features=384, bias=True)
            (4): GELU()
            (5): Linear(in_features=384, out_features=96, bias=True)
            (6): Permute()
          )
          (stochastic_depth): StochasticDepth(p=0.0, mode=row)
        )
        (1): CNBlock(
          (block): Sequential(
            (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
            (1): Permute()
            (2): LayerNorm((96,), eps=1e-06

In [8]:
depthDecoder = DepthDecoderModel(enc.numChannels)
depthDecoder.load_state_dict(torch.load(os.path.join(path, "decoder.pth"), map_location=device))
depthDecoder.to(device)
depthDecoder.eval()

DepthDecoderModel(
  (decoder): ModuleList(
    (0): ConvBlock(
      (pad): ReflectionPad2d((1, 1, 1, 1))
      (conv): Conv2d(768, 256, kernel_size=(3, 3), stride=(1, 1))
      (act): ELU(alpha=1.0)
    )
    (1): ConvBlock(
      (pad): ReflectionPad2d((1, 1, 1, 1))
      (conv): Conv2d(640, 256, kernel_size=(3, 3), stride=(1, 1))
      (act): ELU(alpha=1.0)
    )
    (2): ConvBlock(
      (pad): ReflectionPad2d((1, 1, 1, 1))
      (conv): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1))
      (act): ELU(alpha=1.0)
    )
    (3): ConvBlock(
      (pad): ReflectionPad2d((1, 1, 1, 1))
      (conv): Conv2d(320, 128, kernel_size=(3, 3), stride=(1, 1))
      (act): ELU(alpha=1.0)
    )
    (4): ConvBlock(
      (pad): ReflectionPad2d((1, 1, 1, 1))
      (conv): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1))
      (act): ELU(alpha=1.0)
    )
    (5): ConvBlock(
      (pad): ReflectionPad2d((1, 1, 1, 1))
      (conv): Conv2d(160, 64, kernel_size=(3, 3), stride=(1, 1))
      (act): E

In [9]:
imgPath = "/scratch/mp6021/monodepth2/assets/WSP-2UP4.jpeg"
img = pil.open(imgPath).convert('RGB')
original_width, original_height = img.size
img = img.resize((width, height), pil.LANCZOS)
imgTensor = transforms.ToTensor()(img).unsqueeze(0)
imgTensor = imgTensor.to(device)

In [10]:
import matplotlib as mpl
import matplotlib.cm as cm
import numpy as np

In [11]:
with torch.no_grad():
    features = enc(imgTensor)
    outputs = depthDecoder(features)
    disp = outputs[("disp", 0)]
    disp_resized = torch.nn.functional.interpolate(
                    disp, (original_height, original_width), mode="bilinear", align_corners=False)
    disp_resized_np = disp_resized.squeeze().cpu().numpy()
    vmax = np.percentile(disp_resized_np, 95)
    normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=vmax)
    mapper = cm.ScalarMappable(norm=normalizer, cmap='magma')
    colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8)
    im = pil.fromarray(colormapped_im)

In [13]:
im.save("/scratch/mp6021/monodepth2/assets/WSP-2UP4_ConvNext_ESPCN.jpeg")