# Setting TPU

In [None]:
# Make sure to use TPU
import os
assert os.environ['COLAB_TPU_ADDR'], 'Make sure to select TPU from Edit > Notebook settings > Hardware accelerator'

In [None]:
# Installing PyTorch/XLA
!pip install cloud-tpu-client==0.10 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.8.1-cp37-cp37m-linux_x86_64.whl

Collecting cloud-tpu-client==0.10
  Downloading https://files.pythonhosted.org/packages/56/9f/7b1958c2886db06feb5de5b2c191096f9e619914b6c31fdf93999fdbbd8b/cloud_tpu_client-0.10-py3-none-any.whl
Collecting torch-xla==1.8.1
[?25l  Downloading https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.8.1-cp37-cp37m-linux_x86_64.whl (145.0MB)
[K     |████████████████████████████████| 145.0MB 47kB/s 
Collecting google-api-python-client==1.8.0
[?25l  Downloading https://files.pythonhosted.org/packages/9a/b4/a955f393b838bc47cbb6ae4643b9d0f90333d3b4db4dc1e819f36aad18cc/google_api_python_client-1.8.0-py3-none-any.whl (57kB)
[K     |████████████████████████████████| 61kB 3.1MB/s 
[31mERROR: earthengine-api 0.1.264 has requirement google-api-python-client<2,>=1.12.1, but you'll have google-api-python-client 1.8.0 which is incompatible.[0m
Installing collected packages: google-api-python-client, cloud-tpu-client, torch-xla
  Found existing installation: google-api-python-client 1.12.8
  

In [None]:
# Import pytorch/xla
import torch

# imports the torch_xla package
import torch_xla
import torch_xla.core.xla_model as xm



In [None]:
# make device
device = xm.xla_device()

# Train distance via colab
## Import Module

In [None]:
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models, transforms
import torch.nn as nn
import torch.nn.functional as F
import h5py
import scipy.io
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

import torch.optim as optim
from tqdm.auto import tqdm
import torch
import json
import os

## Load data

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
data_path = 'gdrive/MyDrive/Colab Notebooks/nyu_depth_data_labeled.mat'
root_path = 'gdrive/MyDrive/Colab Notebooks/'


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## Define Dataset Loader

In [None]:
class NyDataset(Dataset):
    """Newyork Data"""

    def __init__(self, root_dir, transform=None, x_point=10, y_point=10):
        """
        Args:
            root_dir (string):
                모든 이미지가 존재하는 디렉토리 경로
            transform (callable, optional):
                샘플에 적용될 Optional transform
            point (int):
                이미즈 한 변의 point 개수
        """
        self.root_dir = root_dir
        self.img_data_file = h5py.File(root_dir)
        self.transform = transform
        self.x_point = x_point
        self.y_point = y_point
        self.point = x_point * y_point

        f = h5py.File(self.root_dir)

        self.len = f['images'].shape[0]
        
        # flag for read image from *.mat or raw image.
        self.read_img = False
        self.read_depth = False
        self.X = [None] * self.__len__()
        self.depth = [None] * self.__len__()

    def __len__(self):
        return self.len * self.point

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        if type(idx) is list:
            converted_idx = np.array([(i // self.point, i % self.point) for i in idx])
        elif type(idx) is int:
            converted_idx = np.array([[idx // self.point, idx % self.point]])
            # converted_idx = np.reshape(converted_idx, (converted_idx.shape[0], 1))

        image = self.__get_image(self.root_dir, converted_idx[:, 0])
        # raw_depth_image = self.__get_raw_depth(self.root_dir, converted_idx[:, 0])

        if not self.read_depth:
            # Read idx depth image first time
            depth_image = self.__get_depth(self.root_dir, converted_idx[:, 0])
            depth_list, target_coordinate = self.get_depth_point(converted_idx[:, 1], depth_image=depth_image)
        else:
            # print('aready read dpeth img')
            # Already read idx dpeth image
            depth_list, target_coordinate = self.get_depth_point(converted_idx[:, 1])


        sample = {
            'image': image,
            'target_coordinate': target_coordinate
        }

        return sample, depth_list

    def get_depth_point(self, idxes, depth_image=None):
        # print(idxes)
        if depth_image is None:
            return self.depth[idxes[0]]
        else:
            # Not coordinate of image, only order of training points.
            positions = [ [idx % self.point // self.x_point, idx % self.point % self.x_point ] for idx in idxes]
            x_interval = depth_image.shape[1] // self.x_point
            y_interval = depth_image.shape[2] // self.y_point

            depth = [ depth_image[0][pos[0] * x_interval][pos[1] * y_interval] for pos in positions ]
            target_coordinate = [ [pos[0] * x_interval, pos[1] * y_interval] for pos in positions ]

            depth = np.array(depth)

            self.depth[idxes[0]] = [depth, target_coordinate]

            return depth, target_coordinate

    def __get_raw_depth(self, root_dir, idx):
        rawDepth = self.img_data_file['rawDepths'][idx] / 4.0
        # return rawDepth
        # rawDepth_ = np.empty([480, 640, 3])
        # rawDepth_[:, :, 0] = rawDepth[:, :].T
        # rawDepth_[:, :, 1] = rawDepth[:, :].T
        # rawDepth_[:, :, 2] = rawDepth[:, :].T

        # image = io.imread(rawDepth_ / 4.0)
        return rawDepth

    def __get_depth(self, root_dir, idx):
        depth = self.img_data_file['depths'][idx] # (1, 640, 480)
        # return depth
        # depth_ = np.empty([480, 640, 1])
        # depth_[:, :, 0] = depth[:, :].T
        # depth_[:, :, 1] = depth[:, :].T
        # depth_[:, :, 2] = depth[:, :].T
        # depth_ = depth.T

        transform_depth = depth.astype('float32') / 4.0
        return transform_depth

    def __get_image(self, root_dir, idx):
        if self.read_img:
            # print('aready read img')
            return self.X[idx[0]]
        else:
            img = self.img_data_file['images'][idx][0] # (3, 640, 480)
            # return img
            # img_ = np.empty([480, 640, 3])
            # img_[:, :, 0] = img[0, :, :].T
            # img_[:, :, 1] = img[1, :, :].T
            # img_[:, :, 2] = img[2, :, :].T

            transform_img = img.astype('float32') / 255.0
            self.X[idx[0]] = transform_img

            return transform_img

## Define Dataset

In [None]:
class Data:
    def __init__(self, path, test=False):
        self.test = test
        self.ny_dataset = NyDataset(path)

    def get_dataset(self, train_ratio=0.8):
        if self.test:
            train_len = 100
            test_len = 10
            train_dataset, test_dataset, _ = torch.utils.data.random_split(self.ny_dataset,
                                                                        [train_len, test_len,
                                                                         len(self.ny_dataset) - train_len - test_len])

            train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
            test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=64, shuffle=True)
        else:
            # Set split length
            train_len = int(len(self.ny_dataset) * train_ratio)
            test_len = len(self.ny_dataset) - train_len

            train_dataset, test_dataset = torch.utils.data.random_split(self.ny_dataset, [train_len, test_len])

            train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=3, shuffle=True)
            test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=3, shuffle=True)

        return train_loader, test_loader




## Define MIS model

In [None]:
class MIS(nn.Module):
    """Measuring Image Distance Model
    """

    def __init__(self, sub_sampling_ratio=16, width=480, height=640, model_selection='mobile'):
        super(MIS, self).__init__()
        self.sub_sampling_ratio = sub_sampling_ratio
        self.width = width
        self.height = height
        self.model_selection = model_selection

        size = (7, 7)
        fc1_out = 64
        fc2_out = int(fc1_out / 4)

        self.feature_extractor = self.get_feature_extraction()
        self.adaptive_max_pool = nn.AdaptiveMaxPool2d(size)
        self.fc1 = nn.Linear(7 * 7 * self.extraction_size + 2, fc1_out)
        self.fc2 = nn.Linear(fc1_out, fc2_out)
        self.fc3 = nn.Linear(fc2_out, 1)
        self.dropout = nn.Dropout(p=0.7)

        print('make MIS model')

    def rol_pooling(self, output_map):
        output = [self.adaptive_max_pool(out)[0] for out in output_map]

        return output

    def forward(self, sample):
        x = sample[0] # image
        target = sample[1] # target coordinate

        x = self.feature_extractor(x)
        x = self.adaptive_max_pool(x)
        x = x.view(x.size(0), -1)
        targetT = torch.transpose(target, 0, 1)

        x_list = x.tolist()
        targetT_list = targetT.tolist()

        for i in range(x.shape[0]):
            for target in targetT_list[i]:
                x_list[i].append(target)
        x = torch.tensor(x_list).cuda()

        x = self.fc1(x)
        # x = F.relu(x)
        x = self.dropout(x)

        x = self.fc2(x)
        # x = F.relu(x)
        x = self.dropout(x)

        x = self.fc3(x)
        # output = F.softplus(x)

        return x

    def get_feature_extraction(self):
        """Return network which produces feature map.
        """
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        if self.model_selection == 'vgg':
            model = torchvision.models.vgg11(pretrained=True).to(device)
            self.extraction_size = 512
        elif self.model_selection == 'mobile':
            model = torchvision.models.mobilenet_v3_small(pretrained=True).to(device)
            self.extraction_size = 48

        features = list(model.features)

        # only collect layers with output feature map size (W, H) < 50
        dummy_img = torch.zeros((1, 3, self.width, self.height)).float()  # test image array

        req_features = []
        output = dummy_img.clone().to(device)

        for feature in features:
            output = feature(output)
            #     print(output.size()) => torch.Size([batch_size, channel, width, height])

            # If size of convolution result is threshold, break.
            if output.size()[2] < self.width // self.sub_sampling_ratio \
                    and output.size()[3] < self.height // self.sub_sampling_ratio:
                break
            req_features.append(feature)

        faster_rcnn_feature_extractor = nn.Sequential(*req_features)
        return faster_rcnn_feature_extractor

## Define train

In [None]:
class RMSELoss(nn.Module):
    """Calculate RMSE Loss for validating test data.
    """
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps

    def forward(self, yhat, y):
        """Calculate RMSE Loss from two vectors.
        Plus epsilon for preventing zero division.
        """
        loss = torch.sqrt(self.mse(yhat, y) + self.eps)
        return loss

class Train:
    def __init__(self):
        self.save_path = 'gdrive/MyDrive/Colab Notebooks/'

    def train(self, data):
        train_loader, test_loader = data
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        # torch.manual_seed(53)
        # if device == 'cuda':
        #     torch.cuda.manual_seed_all(53)

        model = MIS(model_selection='vgg')

        if torch.cuda.device_count() > 1:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0, 1, 2, 3'
            model = nn.DataParallel(model, output_device=1)
            print('Multi GPU!!!!!!!!!!!!!!')

        model = model.to(device)
        model.train()

        # Optimize
        # criterion = nn.CrossEntropyLoss().cuda()
        criterion = nn.SmoothL1Loss().cuda()
        # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005)
        optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.001)
        epochs = 20

        import time
        start_time = time.time()
        min_loss = int(1e9)
        history = {'loss': [], 'val_acc': []}
        for epoch in range(epochs):  # loop over the dataset multiple times
            epoch_loss = 0.0
            tk0 = tqdm(train_loader, total=len(train_loader), leave=False)
            for step, (inputs, labels) in enumerate(tk0, 0):
                image_inputs = inputs['image']
                coordinate_inputs = torch.stack([val for val in inputs['target_coordinate'][0]], dim=0).to(device)
                image_inputs, labels = image_inputs.to(device), labels.to(device)
                # zero the parameter gradients
                optimizer.zero_grad()
                outputs = model((image_inputs, coordinate_inputs))
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                epoch_loss += loss.item()

            train_loader.dataset.dataset.read_img = True
            train_loader.dataset.dataset.read_depth = True
            # print('Make True')

            rmse_loss = 0
            # validation
            # if epoch % 5 == 0:
            validation_criterion = RMSELoss()
            with torch.no_grad():
                for data in test_loader:
                    inputs, labels = data
                    image_inputs = inputs['image']
                    coordinate_inputs = torch.stack([val for val in inputs['target_coordinate'][0]], dim=0).cuda()

                    images = image_inputs.cuda()
                    labels = labels.cuda()
                    outputs = model((images, coordinate_inputs))
                    _, predicted = torch.max(outputs, 1)
                    rmse_loss = validation_criterion(labels, predicted)

            # print statistics
            tqdm.write('[Epoch : %d] train_loss: %.5f va rmse loss: %.5f Total_elapsed_time: %d minute' %
                       (epoch + 1, epoch_loss / len(train_loader), rmse_loss, (time.time() - start_time) / 60))
            history['loss'].append(epoch_loss / len(train_loader))
            history['val_acc'].append(rmse_loss)

            # if epoch in [36, 64, 92]:
            #     for g in optimizer.param_groups:
            #         g['lr'] /= 10
            #     print('Loss 1/10')

        print((time.time() - start_time) / 60)
        print('Finished Training')

        model_name = f'mobilenet-epoch-{epcoh}-model_state_dict.pth'
        torch.save(model.state_dict(), os.path.join(self.save_path, model_name))
        print('save model as', model_name)

# Load Data and Run trainer


In [None]:
dataset = Data(data_path)
train_data = dataset.get_dataset()

trainer = Train()
trainer.train(train_data)

make MIS model


HBox(children=(FloatProgress(value=0.0, max=60907.0), HTML(value='')))

KeyboardInterrupt: ignored

# Load model and Predict

## Define depth predictor class


In [None]:
import torch
import numpy as np


class DistancePredictor:
    def __init__(self, model_path, model_selection):
        state_dict = torch.load(model_path, map_location=torch.device('cpu'))
        self.model = MIS(model_selection=model_selection)
        self.model.load_state_dict(state_dict)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def predict(self, image, point):
        self.model.eval()
        self.model = self.model.to(self.device)

        image = np.reshape(image, (1, 3, 640, 480))
        image = torch.tensor(image).to(self.device)

        point = [torch.tensor([point[0]]), torch.tensor([point[1]])]
        point = torch.tensor(point)
        point = torch.reshape(point, (2, 1))

        output = self.model((image, point))

        return output

## Load model

In [None]:
model_name = 'model_state_dict.pth'
model_path = os.path.join(root_path, model_name)
predictor = DistancePredictor(model_path, 'vgg')

path_to_depth_v1 = 'gdrive/MyDrive/Colab Notebooks/nyu_depth_data_labeled.mat'
f = h5py.File(path_to_depth_v1)
img = f['images'][0]
transform_img = img.astype('float32') / 255.0

depth = f['depths'][0]

for point in range(5):
    target = [640 // 10 * point, 480 // 10 * point]
    output = predictor.predict(transform_img, target)
    print('real depth =', depth[target[0]][target[1]], 'predict depth =', output.item() * 4)

    print()

Downloading: "https://download.pytorch.org/models/vgg11-bbd30ac9.pth" to /root/.cache/torch/hub/checkpoints/vgg11-bbd30ac9.pth


HBox(children=(FloatProgress(value=0.0, max=531456000.0), HTML(value='')))


make MIS model
real depth = 2.7144513 predict depth = 3.0197579860687256
real depth = 2.7144513 predict depth = 3.0197579860687256

real depth = 2.7544267 predict depth = 3.0509114265441895
real depth = 2.692155 predict depth = 3.064953565597534

real depth = 3.039136 predict depth = 3.082064628601074
real depth = 3.0115318 predict depth = 3.1101491451263428

real depth = 3.1535077 predict depth = 3.113218069076538
real depth = 3.0720863 predict depth = 3.1553447246551514

real depth = 3.2476234 predict depth = 3.144371271133423
real depth = 3.1464646 predict depth = 3.20054030418396

