<a href="https://colab.research.google.com/github/naem1023/Measuring-Image-Distance/blob/feat%2F1-nydata-prototype/train_for_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setting TPU

In [3]:
# Make sure to use TPU
import os
assert os.environ['COLAB_TPU_ADDR'], 'Make sure to select TPU from Edit > Notebook settings > Hardware accelerator'

In [4]:
# Installing PyTorch/XLA
!pip install cloud-tpu-client==0.10 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.8.1-cp37-cp37m-linux_x86_64.whl



In [5]:
# Import pytorch/xla
import torch

# imports the torch_xla package
import torch_xla
import torch_xla.core.xla_model as xm



In [6]:
# make device
device = xm.xla_device()

# Train distance via colab
## Import Module

In [2]:
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models, transforms
import torch.nn as nn
import torch.nn.functional as F
import h5py
import scipy.io
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

import torch.optim as optim
from tqdm import tqdm
import torch
import json
import os

## Load data

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')
data_path = 'gdrive/MyDrive/Colab Notebooks/nyu_depth_data_labeled.mat'


Mounted at /content/gdrive


## Define Dataset Loader

In [4]:
class NyDataset(Dataset):
    """Newyork Data"""

    def __init__(self, root_dir, transform=None, x_point=10, y_point=10):
        """
        Args:
            root_dir (string):
                모든 이미지가 존재하는 디렉토리 경로
            transform (callable, optional):
                샘플에 적용될 Optional transform
            point (int):
                이미즈 한 변의 point 개수
        """
        self.root_dir = root_dir
        self.img_data_file = h5py.File(root_dir)
        self.transform = transform
        self.x_point = x_point
        self.y_point = y_point
        self.point = x_point * y_point

        f = h5py.File(self.root_dir)

        self.len = f['images'].shape[0]

    def __len__(self):
        return self.len * self.point

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        if type(idx) is list:
            converted_idx = np.array([(i // self.point, i % self.point) for i in idx])
        elif type(idx) is int:
            converted_idx = np.array([[idx // self.point, idx % self.point]])
            # converted_idx = np.reshape(converted_idx, (converted_idx.shape[0], 1))

        image = self.__get_image(self.root_dir, converted_idx[:, 0])
        # raw_depth_image = self.__get_raw_depth(self.root_dir, converted_idx[:, 0])
        depth_image = self.__get_depth(self.root_dir, converted_idx[:, 0])

        depth_list, target_coordinate = self.get_depth_point(depth_image, converted_idx[:, 1])

        sample = {
            'image': image,
            'target_coordinate': target_coordinate
        }
        #
        # if self.transform:
        #     sample = self.transform(sample)

        return sample, depth_list

    def get_depth_point(self, depth_image, idxes):
        # Not coordinate of image, only order of training points.
        positions = [ [idx % self.point // self.x_point, idx % self.point % self.x_point ] for idx in idxes]
        x_interval = depth_image.shape[1] // self.x_point
        y_interval = depth_image.shape[2] // self.y_point

        depth = [ depth_image[0][pos[0] * x_interval][pos[1] * y_interval] for pos in positions ]
        target_coordinate = [ [pos[0] * x_interval, pos[1] * y_interval] for pos in positions ]

        depth = np.array(depth)

        return depth, target_coordinate

    def __get_raw_depth(self, root_dir, idx):
        rawDepth = self.img_data_file['rawDepths'][idx] / 4.0
        # return rawDepth
        # rawDepth_ = np.empty([480, 640, 3])
        # rawDepth_[:, :, 0] = rawDepth[:, :].T
        # rawDepth_[:, :, 1] = rawDepth[:, :].T
        # rawDepth_[:, :, 2] = rawDepth[:, :].T

        # image = io.imread(rawDepth_ / 4.0)
        return rawDepth

    def __get_depth(self, root_dir, idx):
        depth = self.img_data_file['depths'][idx] # (1, 640, 480)
        # return depth
        # depth_ = np.empty([480, 640, 1])
        # depth_[:, :, 0] = depth[:, :].T
        # depth_[:, :, 1] = depth[:, :].T
        # depth_[:, :, 2] = depth[:, :].T
        # depth_ = depth.T

        transform_depth = depth.astype('float32') / 4.0
        # image = io.imread(depth_ / 4.0)
        return transform_depth

    def __get_image(self, root_dir, idx):
        img = self.img_data_file['images'][idx][0] # (3, 640, 480)
        # return img
        # img_ = np.empty([480, 640, 3])
        # img_[:, :, 0] = img[0, :, :].T
        # img_[:, :, 1] = img[1, :, :].T
        # img_[:, :, 2] = img[2, :, :].T

        transform_img = img.astype('float32') / 255.0
        # img = img.astype('float32') / 255.0
        # image = io.imread(imag_ / 255.0)
        return transform_img

## Define Dataset

In [5]:
class Data:
    def __init__(self, path):
        self.ny_dataset = NyDataset(path)

    def get_dataset(self, train_ratio=0.8):
        # Set split length
        train_len = int(len(self.ny_dataset) * train_ratio)
        test_len = len(self.ny_dataset) - train_len

        train_dataset, test_dataset = torch.utils.data.random_split(self.ny_dataset, [train_len, test_len])

        train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
        test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=64, shuffle=True)

        return train_loader, test_loader




## Define MIS model

In [None]:
class MIS(nn.Module):
    """Measuring Image Distance Model
    """

    def __init__(self, sub_sampling_ratio=16, width=480, height=640, model_selection='mobile'):
        super(MIS, self).__init__()
        self.sub_sampling_ratio = sub_sampling_ratio
        self.width = width
        self.height = height
        self.model_selection = model_selection

        size = (7, 7)
        fc1_out = 128
        fc2_out = int(fc1_out / 4)

        self.feature_extractor = self.get_feature_extraction()
        self.adaptive_max_pool = nn.AdaptiveMaxPool2d(size)
        self.fc1 = nn.Linear(7 * 7 * self.extraction_size + 2, fc1_out)
        self.fc2 = nn.Linear(fc1_out, fc2_out)
        self.fc3 = nn.Linear(fc2_out, 1)

        print('make MIS model')

    def rol_pooling(self, output_map):
        output = [self.adaptive_max_pool(out)[0] for out in output_map]

        return output

    def forward(self, sample):
        x = sample[0] # image
        target = sample[1] # target coordinate

        x = self.feature_extractor(x)
        x = self.adaptive_max_pool(x)
        x = x.view(x.size(0), -1)
        targetT = torch.transpose(target, 0, 1)

        x_list = x.tolist()
        targetT_list = targetT.tolist()

        for i in range(x.shape[0]):
            for target in targetT_list[i]:
                x_list[i].append(target)
        x = torch.tensor(x_list).cuda()

        x = self.fc1(x)
        x = F.relu(x)

        x = self.fc2(x)
        x = F.relu(x)

        x = self.fc3(x)
        output = F.softplus(x)

        return output

    def get_feature_extraction(self):
        """Return network which produces feature map.
        """
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        if self.model_selection == 'vgg':
            model = torchvision.models.vgg11(pretrained=True).to(device)
            self.extraction_size = 512
        elif self.model_selection == 'mobile':
            model = torchvision.models.mobilenet_v3_small(pretrained=True).to(device)
            self.extraction_size = 48

        features = list(model.features)

        # only collect layers with output feature map size (W, H) < 50
        dummy_img = torch.zeros((1, 3, self.width, self.height)).float()  # test image array

        req_features = []
        output = dummy_img.clone().to(device)

        for feature in features:
            output = feature(output)
            #     print(output.size()) => torch.Size([batch_size, channel, width, height])

            # If size of convolution result is threshold, break.
            if output.size()[2] < self.width // self.sub_sampling_ratio \
                    and output.size()[3] < self.height // self.sub_sampling_ratio:
                break
            req_features.append(feature)

        faster_rcnn_feature_extractor = nn.Sequential(*req_features)
        return faster_rcnn_feature_extractor

## Define train

In [12]:
class Train:
    def __init__(self):
        self.save_path = '.\\'

    def train(self, data):
        train_loader, test_loader = data
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        # torch.manual_seed(53)
        # if device == 'cuda':
        #     torch.cuda.manual_seed_all(53)

        model = MIS()

        if torch.cuda.device_count() > 1:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0, 1, 2, 3'
            model = nn.DataParallel(model, output_device=1)

        model = model.to(device)

        # Optimize
        # criterion = nn.CrossEntropyLoss().cuda()
        criterion = nn.SmoothL1Loss().cuda()
        # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005)
        optimizer = optim.Adam(model.parameters(), lr=0.01)

        import time
        start_time = time.time()
        min_loss = int(1e9)
        history = {'loss': [], 'val_acc': []}
        for epoch in range(1):  # loop over the dataset multiple times
            epoch_loss = 0.0
            tk0 = tqdm(train_loader, total=len(train_loader), leave=False)
            for step, (inputs, labels) in enumerate(tk0, 0):
                image_inputs = inputs['image']
                coordinate_inputs = torch.stack([val for val in inputs['target_coordinate'][0]], dim=0).to(device)
                image_inputs, labels = image_inputs.to(device), labels.to(device)
                # zero the parameter gradients
                optimizer.zero_grad()
                outputs = model((image_inputs, coordinate_inputs))
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                epoch_loss += loss.item()

            # validation
            if epoch % 10 == 0:
                class_correct = list(0. for i in range(1000))
                class_total = list(0. for i in range(1000))
                with torch.no_grad():
                    for data in test_loader:
                        images, labels = data
                        images = images.cuda()
                        labels = labels.cuda()
                        outputs = model(images)
                        _, predicted = torch.max(outputs, 1)
                        c = (predicted == labels).squeeze()
                        for i in range(labels.size()[0]):
                            label = labels[i].item()
                            class_correct[label] += c[i].item()
                            class_total[label] += 1
                val_acc = sum(class_correct) / sum(class_total) * 100
            else:
                val_acc = 0

            # print statistics
            tqdm.write('[Epoch : %d] train_loss: %.5f val_acc: %.2f Total_elapsed_time: %d 분' %
                       (epoch + 1, epoch_loss / 272, val_acc, (time.time() - start_time) / 60))
            history['loss'].append(epoch_loss / 272)
            history['val_acc'].append(val_acc)

            if epoch in [36, 64, 92]:
                for g in optimizer.param_groups:
                    g['lr'] /= 10
                print('Loss 1/10')

        print(time.time() - start_time)
        print('Finished Training')

        torch.save(model.state_dict(), os.path.join(self.save_path, 'model_state_dict.pt'))

# Load Data and Run trainer


In [None]:
dataset = Data(data_path)
train_data = dataset.get_dataset()

trainer = Train()
trainer.train(train_data)

  from ipykernel import kernelapp as app
Downloading: "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_small-047dcff4.pth


HBox(children=(FloatProgress(value=0.0, max=10306551.0), HTML(value='')))




  0%|          | 0/2855 [00:00<?, ?it/s]

make MIS model


 23%|██▎       | 660/2855 [14:52<47:52,  1.31s/it]