#Mod of Midas to include

In [None]:
pip install timm

### Example Usage

Download an image from the PyTorch homepage

In [None]:
import cv2
import torch
import urllib.request

import matplotlib.pyplot as plt

url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
urllib.request.urlretrieve(url, filename)

Load a model (see [https://github.com/intel-isl/MiDaS/#Accuracy](https://github.com/intel-isl/MiDaS/#Accuracy) for an overview)

In [None]:
model_type = "DPT_Large"     # MiDaS v3 - Large     (highest accuracy, slowest inference speed)
#model_type = "DPT_Hybrid"   # MiDaS v3 - Hybrid    (medium accuracy, medium inference speed)
#model_type = "MiDaS_small"  # MiDaS v2.1 - Small   (lowest accuracy, highest inference speed)

midas = torch.hub.load("intel-isl/MiDaS", model_type)

Move model to GPU if available

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()
()

In [None]:
for (name, layer) in midas._modules.items():
    #iteration over outer layers
    print((name))

In [None]:
import os
import time
import csv
import numpy as np

import torch
import torch.backends.cudnn as cudnn
import torch.optim
cudnn.benchmark = True

from models import ResNet
from metrics import AverageMeter, Result
from dataloaders.dense_to_sparse import UniformSampling, SimulatedStereo,ORBSampling
import criteria
import utils
from dataloaders.nyu_dataloader import NYUDataset

In [None]:
train_dir=os.path.join('data', 'nyudepthv2', 'train')
test_dir=os.path.join('data', 'nyudepthv2', 'val')

sparsifier=UniformSampling(100,np.inf)
train_dataset = NYUDataset(train_dir, type='train', sparsifier=sparsifier,modality='rgbd')
val_dataset = NYUDataset(test_dir, type='val', sparsifier=sparsifier,modality='rgbd')
train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=10, shuffle=True,
            num_workers=1, pin_memory=True, sampler=None,
            worker_init_fn=lambda work_id:np.random.seed(work_id))
val_loader = torch.utils.data.DataLoader(val_dataset,
        batch_size=1, shuffle=False, num_workers=10, pin_memory=True)

Load transforms to resize and normalize the image for large or small model

In [None]:
len(train_loader)

In [None]:
for i, (input, target) in enumerate(train_loader):
    print(input.shape)
    print(target.shape)
    break

In [None]:
d=input[0,:3].permute(1,2,0)
plt.imshow(d)
#print(torch.nonzero(d).shape)
print(d[0,0])

In [None]:
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")

if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
    transform = midas_transforms.dpt_transform
else:
    transform = midas_transforms.small_transform

Load image and apply transforms

In [None]:
print(np.shape(input[:1,:3].permute(0,2,3,1)*255))
input_batch=transform(np.array(input[0,:3].permute(1,2,0))*255)


In [None]:
input_batch.shape

Predict and resize to original resolution

In [None]:
with torch.no_grad():
    prediction = midas(input_batch.cuda())

    prediction = torch.nn.functional.interpolate(
        prediction.unsqueeze(1),
        size=d.shape[:2],
        mode="bicubic",
        align_corners=False,
    ).squeeze()

output = prediction.cpu().numpy()

Show result

In [None]:
plt.imshow(output.squeeze())
# plt.show()

Let's try to modify the model to include partial depth informations

In [None]:
#First, let's fix the pretrained values :
for parameter in midas.parameters(): 
    parameter.requires_grad=False

In [None]:
import torch.nn as nn
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.md = midas
        self.fcomb = nn.Conv2d(2,1,(21,21),padding=(10,10),padding_mode='reflect')
        self.ReLU=nn.ReLU()
    def forward(self, x, y):
        x1 = self.md(x)
        x2 = self.fcomb(torch.cat((x1.unsqueeze(1),y),1))
        x3 = self.ReLU(x2)
        return x2


In [None]:
import random
x=input_batch
res=midas(input_batch.cuda())
y=torch.zeros_like(res)
lx,ly=res.shape[1],res.shape[2]
for _ in range(180):
  i,j=random.randint(0,lx-1),random.randint(0,ly-1)
  y[0,i,j]=res[0,i,j]
y=y.unsqueeze(1)

In [None]:
print(x.shape,y.shape)

In [None]:
model=MyModel()
model.to(device)
model.eval()
#res=model(x.cuda(),y.cuda())
()

In [None]:
from torchvision import transforms
def train( model, dataloader, loss, optimizer, n_epochs=1):
    model.train(True)
    i=torch.Tensor(0).cuda()
    for epoch in range(n_epochs):
        track_loss=[]
        for _, (input, ref) in enumerate(dataloader):
            x2=torch.zeros((np.shape(input)[0],3,288,384))
            for i in range(np.shape(x)[0]):
                x2[i]=transform(np.array(input[0,:3].permute(1,2,0))*255)
            y= transforms.Resize((288,384),antialias=True)(input[:,2:3])
            ref=transforms.Resize((288,384),antialias=True)(ref)
            res=model(x2.cuda(),y.cuda())
            l=loss(res,ref.cuda())
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            track_loss.append(l.cpu())
            
            #print(f"[{(i+1)//10}] avg_loss : {sum(track_loss)/len(track_loss)}")
            if len(track_loss) % 1 == 0:
                print(f"[{(len(track_loss))//10}] avg_loss : {sum(track_loss[-min(len(track_loss),10):])/min(len(track_loss),10)}")
                #track_loss=[]
                #i=10
            i= i+1
    return mean


In [None]:
from torch.optim import SGD
from torch.nn import MSELoss
loss=MSELoss()
optimizer=SGD(model.parameters(),lr=10**-6)

In [None]:
train(model,train_loader,loss,optimizer)

In [None]:
print(x2.shape,y.shape)

In [None]:
with torch.no_grad():
    prediction2 = model(input_batch.cuda(),transforms.Resize((288,384),antialias=True)(input[:1,2:3]).cuda())

    prediction2 = torch.nn.functional.interpolate(
        prediction2,
        size=d.shape[:2],
        mode="bicubic",
        align_corners=False,
    ).squeeze()

output = prediction.cpu().numpy()


In [None]:
prediction2.shape

In [None]:
plt.imshow(prediction2.cpu())

In [None]:
with torch.no_grad():
    prediction = midas(input_batch.cuda())

    prediction = torch.nn.functional.interpolate(
        prediction.unsqueeze(0),
        size=d.shape[:2],
        mode="bicubic",
        align_corners=False,
    ).squeeze()

output = prediction.cpu().numpy()

In [None]:
plt.imshow(output)

In [None]:
print(loss(prediction2.cpu(),transforms.Resize((228, 304),antialias=True)(target)[0,0]),loss(prediction.cpu(),-transforms.Resize((228, 304),antialias=True)(target)[0,0]))

In [None]:
plt.imshow(-target[0,0])

In [None]:
print(sum(sum(prediction2)))
pred=prediction+torch.min(prediction)
pred2=prediction2+torch.min(prediction)
p1=pred/(sum(sum(pred))/(prediction.shape[0]*prediction.shape[1]))
p2=pred2/(sum(sum(pred2))/(p2.shape[0]*p2.shape[1]))
rf=transforms.Resize((228, 304),antialias=True)(target)[0,0]
rf=rf+torch.min(rf)
pr=rf/(sum(sum(rf))/(rf.shape[0]*rf.shape[1]))

In [None]:
loss(p1.cpu(),rf.cpu()),loss(p2.cpu(),rf.cpu())

In [None]:
plt.imsave("p2opp.png", -p2.cpu())

In [None]:
#We try with ORB-samples now:

In [None]:
train_dir=os.path.join('data', 'nyudepthv2', 'train')
test_dir=os.path.join('data', 'nyudepthv2', 'val')

sparsifier=ds.ORBSampling(100,np.inf)
train_dataset = NYUDataset(train_dir, type='train', sparsifier=sparsifier,modality='rgbd')
val_dataset = NYUDataset(test_dir, type='val', sparsifier=sparsifier,modality='rgbd')
train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=10, shuffle=True,
            num_workers=1, pin_memory=True, sampler=None,
            worker_init_fn=lambda work_id:np.random.seed(work_id))
val_loader = torch.utils.data.DataLoader(val_dataset,
        batch_size=1, shuffle=False, num_workers=10, pin_memory=True)

midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")

if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
    transform = midas_transforms.dpt_transform
else:
    transform = midas_transforms.small_transform

for parameter in midas.parameters(): 
    parameter.requires_grad=False

In [None]:
import torch.nn as nn
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.md = midas
        self.fcomb = nn.Conv2d(2,2,(21,21),padding=(10,10),padding_mode='reflect')
        self.f2 = nn.Conv2d(2,1,(21,21),padding=(10,10),padding_mode='reflect')
        self.ReLU=nn.ReLU()
    def forward(self, x, y):
        x1 = self.md(x)
        x2 = self.fcomb(torch.cat((x1.unsqueeze(1),y),1))
        x3 = self.ReLU(x2)
        x4 = self.f2(x3)
        return x4
model=MyModel()
model.to(device)
model.eval()
()

In [None]:
from torchvision import transforms
def train( model, dataloader, loss, optimizer, n_epochs=1):
    model.train(True)
    i=torch.Tensor(0).cuda()
    for epoch in range(n_epochs):
        track_loss=[]
        for _, (input, ref) in enumerate(dataloader):
            x2=torch.zeros((np.shape(input)[0],3,288,384))
            for i in range(np.shape(x2)[0]):
                x2[i]=transform(np.array(input[0,:3].permute(1,2,0))*255)
            y= transforms.Resize((288,384),antialias=True)(input[:,2:3])
            ref=transforms.Resize((288,384),antialias=True)(ref)
            res=model(x2.cuda(),y.cuda())
            l=loss(res,ref.cuda())
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            track_loss.append(l.cpu())
            
            #print(f"[{(i+1)//10}] avg_loss : {sum(track_loss)/len(track_loss)}")
            if len(track_loss) % 10 == 0:
                print(f"[{(len(track_loss))//10}] avg_loss : {sum(track_loss[-min(len(track_loss),10):])/min(len(track_loss),10)}")
                #track_loss=[]
                #i=10
            i= i+1
    return track_loss


In [None]:
from torch.optim import SGD
from torch.nn import MSELoss
loss=MSELoss()
optimizer=SGD(model.parameters(),lr=10**-5)

In [None]:
%%time
losses=train(model,train_loader,loss,optimizer)

In [None]:
def evaluate(model,test_loader):
    model.eval()
    eval_loss=[]
    for  _,(input, ref) in enumerate(test_loader):
        x2=torch.zeros((np.shape(input)[0],3,288,384))
        for i in range(np.shape(x2)[0]):
            x2[i]=transform(np.array(input[0,:3].permute(1,2,0))*255)
        y= transforms.Resize((288,384),antialias=True)(input[:,2:3])
        ref=transforms.Resize((288,384),antialias=True)(ref)
        res=model(x2.cuda(),y.cuda())
        l=loss(res,ref.cuda())
        eval_loss.append(l.cpu())
    return eval_loss

In [None]:
L=evaluate(model,val_loader)

In [None]:
import dataloaders.dense_to_sparse as ds

In [None]:
import importlib
importlib.reload(ds)