In [None]:
import torch
import math
import matplotlib.pyplot as plt
import numpy as np

# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)


## Reading Dataset

In [None]:
import pandas as pd
train = pd.read_csv("../input/image-depth-estimation/data/nyu2_test.csv", names = ['image', 'label'])
test = pd.read_csv("../input/image-depth-estimation/data/nyu2_test.csv", names = ['image', 'label'])


train.label = train.label.map(lambda x: f"../input/image-depth-estimation/{x}")
train.image = train.image.map(lambda x: f"../input/image-depth-estimation/{x}")
print(len(train))
train.head()




## Data Loader

In [None]:
import torch
import os
from PIL import Image
import torchvision.transforms as transforms




class DepthDataset(torch.utils.data.Dataset):
    def __init__(self, df):

        self.images = list(df['image'].values)
        self.labels = list(df['label'].values)
        
        # load image
        random_sample_image = random.choice([i for i in range(len(self.images) - 1)])
        image = Image.open(self.images[random_sample_image])
        depth = Image.open(self.labels[random_sample_image])
        
        plt.imshow(image)
        plt.title("Image")
        plt.show()
        
        # Denormaling Image
        plt.imshow(np.asarray(depth) * 256)
        plt.title("Depth")
        plt.show()

    def __getitem__(self, index):
        # load image
        image = Image.open(self.images[index])
        depth = Image.open(self.labels[index])
        
       
        # transformation
        comm_trans = transforms.Compose([
            transforms.Resize((240, 320)),
            transforms.CenterCrop((228, 304)),
            transforms.RandomHorizontalFlip()
        ])
        image_trans = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        depth_trans = transforms.Compose([
            transforms.Resize((64, 80)),
            transforms.ToTensor(),
            transforms.Lambda(lambda x: x.float()),
            transforms.Lambda(lambda x: torch.div(x, 65535.0)),
            #transforms.Normalize((0.5, ), (0.5, ))
        ])
        image = image_trans(comm_trans(image))
        depth = depth_trans(comm_trans(depth))
        return image, depth

    def __len__(self):
        return len(self.images)





## hyperparameter

In [None]:

batch_size = 32
learning_rate = 0.001
total_epoch = 50
report_rate = 20

## Datasets and loader

In [None]:
import random
dataset_train = DepthDataset(train)


lengths = [int(math.floor(len(train) * 0.8)), int(math.ceil(len(train) * 0.2))]
train_dataset, test_dataset = torch.utils.data.random_split(dataset_train, lengths)

In [None]:


train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                     batch_size=batch_size,
                                     shuffle=False)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                     batch_size=batch_size,
                                     shuffle=True)


## LOSS FUCNTIONS (WE CAN USE FOR THE MODEL)

In [None]:
#Cite from: https://github.com/simonmeister/pytorch-mono-depth

import numpy as np
import torch
import torch.nn as nn
from math import log


def _mask_input(input, mask=None):
    if mask is not None:
        input = input * mask
        count = torch.sum(mask).data[0]
    else:
        count = np.prod(input.size(), dtype=np.float32).item()
    return input, count


class BerHuLoss(nn.Module):
    def forward(self, input, target, mask=None):
        x = input - target
        abs_x = torch.abs(x)
        c = torch.max(abs_x).item() / 5
        leq = (abs_x <= c).float()
        l2_losses = (x ** 2 + c ** 2) / (2 * c)
        losses = leq * abs_x + (1 - leq) * l2_losses
        losses, count = _mask_input(losses, mask)
        return torch.sum(losses) / count


class HuberLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.loss = nn.SmoothL1Loss(size_average=False)

    def forward(self, input, target, mask=None):
        if mask is not None:
            loss = self.loss(input * mask, target * mask)
            count = torch.sum(mask).data[0]
            return loss / count

        count = np.prod(input.size(), dtype=np.float32).item()
        return self.loss(input, target) / count


class DistributionLogLoss(nn.Module):
    def __init__(self, distribution):
        super().__init__()
        self.distribution = distribution

    def forward(self, input, target, mask=None):
        d = self.distribution(*input)
        loss = d.log_loss(target)
        loss, count = _mask_input(loss, mask)
        return torch.sum(loss) / count


class RMSLoss(nn.Module):
    def forward(self, input, target, mask=None):
        loss = torch.pow(input - target, 2)
        loss, count = _mask_input(loss, mask)
        return torch.sqrt(torch.sum(loss) / count)


class RelLoss(nn.Module):
    def forward(self, input, target, mask=None):
        loss = torch.abs(input - target) / target
        loss, count = _mask_input(loss, mask)
        return torch.sum(loss) / count

class MseLoss(nn.Module):  
    def forward(self, input, target, mask=None):
        loss = torch.sum((input - target) ** 2)
        loss, count = _mask_input(loss, mask)
        return torch.sum(loss) / count

class Log10Loss(nn.Module):
    def forward(self, input, target, mask=None):
        loss = torch.abs((torch.log(target) - torch.log(input)) / log(10))
        loss, count = _mask_input(loss, mask)
        return torch.sum(loss) / count


class TestingLosses(nn.Module):
    def __init__(self, scalar_losses):
        super().__init__()
        self.scalar_losses = nn.ModuleList(scalar_losses)

    def forward(self, input, target):
        scalars = [m(input, target) for m in self.scalar_losses]
        return torch.cat(scalars)

# Resnet 50

In [None]:
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo


__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
           'resnet152']


model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}


def conv3x3(in_planes, out_planes, stride=1):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x


def resnet18(pretrained=False, **kwargs):
    """Constructs a ResNet-18 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
    return model



def resnet34(pretrained=False, **kwargs):
    """Constructs a ResNet-34 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
    return model



def resnet50(pretrained=False, **kwargs):
    """Constructs a ResNet-50 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
    return model



def resnet101(pretrained=False, **kwargs):
    """Constructs a ResNet-101 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
    return model



def resnet152(pretrained=False, **kwargs):
    """Constructs a ResNet-152 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
    return model

## Resnet-50 Model with Up-sampling

In [None]:
import torch
import torch.nn as nn
import torchvision

# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# 3x3 convolution
def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3,
                     stride=stride, padding=1, bias=False)
# 5x5 convolution
def conv5x5(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=5,
                     stride=stride, padding=2, bias=False)

# UpSampling Block
class UpSamplingBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(UpSamplingBlock, self).__init__()
        self.unpool = nn.MaxUnpool2d(2, stride=2)
        self.pool = nn.MaxPool2d(2, stride=2, return_indices=True)
        self.conv1 = conv5x5(in_channels, out_channels)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(out_channels, out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        # create indices for unpool
        size = x.size()
        _, indices = self.pool(torch.empty(size[0], size[1], size[2]*2, size[3]*2))
        # unpool and assign residual
        out = self.unpool(x, indices.to(device))
        residual = self.conv1(out)
        residual = self.bn1(residual)
        # forward and projection
        out = self.conv1(out)
        out = self.bn1(residual)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(residual)
        out += residual
        return out


# DepthNet
class DepthNet(nn.Module):
    def __init__(self):
        super(DepthNet, self).__init__()
        # Remove FC and AvgPool layer from Resnet50
        resnet = resnet50(pretrained=True)
        modules = list(resnet.children())[:-2]
        self.resnet = nn.Sequential(*modules)
        self.conv1 = nn.Conv2d(2048, 1024, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn = nn.BatchNorm2d(1024)
        # Add new upsampling layer
        self.up1 = nn.Sequential(UpSamplingBlock(1024, 512),
                                 nn.ReLU(),
                                 UpSamplingBlock(512, 256),
                                 nn.ReLU(),
                                 UpSamplingBlock(256, 128),
                                 nn.ReLU())
        self.conv2 = conv3x3(128, 1)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, inputs):
        out = self.resnet(inputs)
        out = self.conv1(out)
        out = self.bn(out)
        out = self.up1(out)
        out = self.conv2(out)
        out = self.relu(out)
        return out


In [None]:
# load model
model = DepthNet().to(device)

# Loss and optimizer
criterion  = BerHuLoss()
criterion2 = MseLoss()
optimizer  = torch.optim.Adam(model.parameters(), lr=learning_rate)

# learning rate decay
def update_lr(opt, lr):
    for param_group in opt.param_groups:
        param_group['lr'] = lr

# validation
def validate(model, test_loader):
    model.eval()
    with torch.no_grad():
        loss2, loss = 0.0, 0.0
        for t_image, t_depth in test_loader:
            t_image = t_image.to(device)
            t_depth = t_depth.to(device)
            t_outputs = model(t_image)
            
            curr_loss = criterion(t_depth, t_outputs)
            curr_loss2 = criterion2(t_depth, t_outputs)
            loss += curr_loss.item()
            loss2 += curr_loss2.item()
        print("Validation BerHuLoss: {:.4f}"
              .format(loss/(len(test_loader) * batch_size)))
        print("Validation MSE LOSS: {:.4f}"
              .format(loss2/(len(test_loader) * batch_size)))
        
        
    model.train()





In [None]:
# train
total_step = len(train_dataset)
curr_lr = learning_rate
for epoch in range(total_epoch):
    running_loss, running_loss2 = 0.0, 0.0
    epoch_loss = 0.0
    for i, (image, depth) in enumerate(train_loader):
        
        image = image.to(device)
        depth = depth.to(device)

        # forward pass
        outputs = model(image)
        loss = criterion(outputs, depth)
        loss2 = criterion2(depth, outputs)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate loss
        running_loss += loss.item()
        running_loss2 += loss2.item()
        
        epoch_loss += running_loss2

        if (i + 1) % report_rate == 0:
            print("Epoch: [{}/{}] Step [{}/{}] Loss: {:.4f} MSE LOSS {:.4f}"
                  .format((epoch+1), total_epoch, (i+1), total_step, (running_loss/batch_size), (running_loss2/batch_size)))
            running_loss, running_loss2 = 0.0, 0.0

    #Decay learning rate
    if (epoch + 1) % 5 == 0:
        curr_lr /= 3
        update_lr(optimizer, curr_lr)

    # Report epoch loss
    print("Epoch: [{}/{}] Epoch Loss: {:.4f}\n"
          .format((epoch+1), total_epoch, (epoch_loss / (len(train_loader) * batch_size))))

    validate(model, test_loader)

# Save the model checkpoint
torch.save(model.state_dict(), 'depthnet.ckpt')

# Validate Model By Seeing the Predictions Manually

In [None]:


device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

for i, (image, depth) in enumerate(test_loader):
    print(i, end='\r')
    image = image.to(device)
    y_pred = model(image)
    plt.imshow(y_pred.permute(1,0,2,3).squeeze(axis=0)[1:2,:,:].squeeze(axis=0).cpu().detach().numpy() * 256)
    plt.show()
    
    if i == 2:
        break
        
