<a href="https://colab.research.google.com/github/siva-sankar-a/eva_final_project/blob/master/EVA_S15_Part1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!rm -rf DenseDepth
!git clone https://github.com/ialhashim/DenseDepth.git

Cloning into 'DenseDepth'...
remote: Enumerating objects: 235, done.[K
remote: Total 235 (delta 0), reused 0 (delta 0), pack-reused 235[K
Receiving objects: 100% (235/235), 11.80 MiB | 13.74 MiB/s, done.
Resolving deltas: 100% (115/115), done.


In [2]:
!wget https://s3-eu-west-1.amazonaws.com/densedepth/nyu.h5 -O ./DenseDepth/nyu.h5

--2020-05-16 14:04:08--  https://s3-eu-west-1.amazonaws.com/densedepth/nyu.h5
Resolving s3-eu-west-1.amazonaws.com (s3-eu-west-1.amazonaws.com)... 52.218.88.147
Connecting to s3-eu-west-1.amazonaws.com (s3-eu-west-1.amazonaws.com)|52.218.88.147|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 172897376 (165M) [application/h5]
Saving to: ‘./DenseDepth/nyu.h5’


2020-05-16 14:04:10 (71.4 MB/s) - ‘./DenseDepth/nyu.h5’ saved [172897376/172897376]



In [0]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import glob
import argparse
import matplotlib

import numpy as np
import pandas as pd

# Keras / TensorFlow
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '5'
from keras.models import load_model
from DenseDepth.layers import BilinearUpSampling2D
from DenseDepth.utils import predict, load_images, display_images
from matplotlib import pyplot as plt

Using TensorFlow backend.


In [3]:
model = 'DenseDepth/nyu.h5'

# Custom object needed for inference and training
custom_objects = {'BilinearUpSampling2D': BilinearUpSampling2D, 'depth_loss_function': None}

print('Loading model...')

# Load model into GPU / CPU
model = load_model(model, custom_objects=custom_objects, compile=False)

print('\nModel loaded ({0}).'.format(model))

Loading model...

Model loaded (<keras.engine.training.Model object at 0x7feccb611dd8>).


In [0]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets.utils import download_url, download_and_extract_archive
from multiprocessing import Process
from zipfile import ZipFile, ZIP_DEFLATED
import os
import pandas as pd
import subprocess
import urllib
import tqdm
import cv2

In [0]:
class FgBgDataset(Dataset):
  """
  Custom class to load foreground background image dataset
  """
  def __init__(self, no_of_parts=10, train_transform=None, test_transform=None, target_transform=None, **kwargs):
    """
    Constructor for foreground background image dataset
    """
    super().__init__(**kwargs)

    self.ROOT_URL = 'https://eva-final-project-dataset.s3-ap-southeast-2.amazonaws.com/'
    self.COMPRESSED_DIR = './compressed'
    self.DATASET_DIR = './dataset'
    self.DATASET_INFO_FILE = 'dataset_info.csv'
    self.DATASET_FILE_PREFIX = 'dataset_'

    self.no_of_parts = no_of_parts
    self.dataset_info_file_path = os.path.join(self.DATASET_DIR, self.DATASET_INFO_FILE)

    self.download_dataset()

    self.df = pd.read_csv(self.dataset_info_file_path)

    self.train = True
    self.train_transform = train_transform
    self.test_transform = test_transform
    self.target_transform = target_transform 

  def download_dataset(self):
    if not os.path.exists(self.DATASET_DIR):
      print('Downloading dataset..')
      os.mkdir(self.DATASET_DIR)
      if not os.path.exists(self.COMPRESSED_DIR):
        os.mkdir(self.COMPRESSED_DIR)

      dataset_info_url = urllib.parse.urljoin(self.ROOT_URL, self.DATASET_INFO_FILE)
      download_url(dataset_info_url, self.DATASET_DIR, self.DATASET_INFO_FILE)
      dataset_info_url = urllib.parse.urljoin(self.ROOT_URL, self.DATASET_INFO_FILE)

      processes = [Process(target=self.download_and_extract_part, args=(part_idx,)) for part_idx in range(self.no_of_parts)]
      for process in processes:
        process.start()
      for process in processes:
        process.join()
    else:
      print('Dataset found!')

  def download_and_extract_part(self, part):
    dataset_part_path = f'{self.DATASET_FILE_PREFIX}{part}.zip'
    dataset_part_url = urllib.parse.urljoin(self.ROOT_URL, dataset_part_path)
    download_and_extract_archive(dataset_part_url, self.COMPRESSED_DIR, self.DATASET_DIR)

  def __len__(self):
      return len(self.df)

  def set_train(self):
      self.train = True

  def set_eval(self):
      self.train = False

  def __getitem__(self, index):
    """
    Args:
        index (int): Index

    Returns:
        tuple: (image, target) where target is index of the target class.
    """
    if torch.is_tensor(index):
      index = index.tolist()

    fg_bg_path = os.path.join(self.DATASET_DIR, self.df['fg_bg_paths'].iloc[index])
    mask_path = os.path.join(self.DATASET_DIR, self.df['mask_paths'].iloc[index])

    # print(index, fg_bg_path, mask_path)
    
    # fg_bg_img = cv2.imread(fg_bg_path)
    # mask_img = cv2.imread(mask_path)
    
    file_name = os.path.basename(fg_bg_path)

    # print(fg_bg_img.shape, mask_img.shape)

    if self.train:
      if self.train_transform:
        augmented = self.train_transform(image=fg_bg_img)
        fg_bg_img = augmented['image']
    else:
      if self.test_transform:
        augmented = self.test_transform(image=fg_bg_img)
        fg_bg_img = augmented['image']

    if self.target_transform is not None:
      mask_img = self.target_transform(mask_img)

    return { 'index': index, 'file_name': file_name, 'fg_bg_path': fg_bg_path } # 'fg_bg': fg_bg_img, 'mask': mask_img }

In [6]:
dataset = FgBgDataset()
batch_size = 8
use_cuda = True
dataloader_args = dict(shuffle=False, batch_size=batch_size, num_workers=4, pin_memory=True) if use_cuda else dict(shuffle=True, batch_size=batch_size)
data_loader = torch.utils.data.DataLoader(dataset, **dataloader_args, drop_last=True)

Dataset found!


In [0]:
if not os.path.exists('dataset/depth'):
  os.mkdir('dataset/depth')

In [0]:
def get_depth(model, device, data_loader):
    compression_level = 40
    pbar = tqdm.tqdm(data_loader)
    train_len = len(data_loader.dataset)
    data_loader.dataset.df['depth_map_paths'] = ''

    for batch_idx, sample_batch in enumerate(pbar):
        
        inputs = load_images(sample_batch['fg_bg_path'])
        outputs = predict(model, inputs, minDepth=1, maxDepth=300, batch_size=batch_size)

        depth_map_paths = []

        for idx, depth_map in enumerate(outputs):
          depth_map = 1 - (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
          depth_map *= 255
          depth_map.astype(np.uint8)
          depth_map_path = os.path.join('dataset', 'depth', sample_batch['file_name'][idx])
          cv2.imwrite(depth_map_path, depth_map, [int(cv2.IMWRITE_JPEG_QUALITY), compression_level])
          depth_map_paths.append(depth_map_path)

        data_loader.dataset.df.loc[sample_batch['index'], 'depth_map_paths'] = depth_map_paths

In [0]:
tqdm.tqdm._instances.clear()

In [0]:
!rm -rf dataset/depth/
!mkdir dataset/depth/

In [9]:
get_depth(model, 'cuda', data_loader)

100%|██████████| 5000/5000 [2:08:36<00:00,  1.54s/it]


In [12]:
data_loader.dataset.df.head()

Unnamed: 0,fg_bg_paths,mask_paths,part,depth_map_paths
0,./fg_bg/img_00112256.jpg,./mask/img_00112256.jpg,0.0,./depth/img_00112256.jpg
1,./fg_bg/img_00081760.jpg,./mask/img_00081760.jpg,0.0,./depth/img_00081760.jpg
2,./fg_bg/img_00164049.jpg,./mask/img_00164049.jpg,0.0,./depth/img_00164049.jpg
3,./fg_bg/img_00081730.jpg,./mask/img_00081730.jpg,0.0,./depth/img_00081730.jpg
4,./fg_bg/img_00067725.jpg,./mask/img_00067725.jpg,0.0,./depth/img_00067725.jpg


In [0]:
data_loader.dataset.df['depth_map_paths'] = data_loader.dataset.df['depth_map_paths'].str.replace('dataset', '.')

In [0]:
n_parts = 10
_sample_dfs = np.array_split(data_loader.dataset.df, n_parts)

In [0]:
for idx, _df in enumerate(_sample_dfs):
    data_loader.dataset.df.loc[_df.index, 'part'] = idx

In [0]:
data_loader.dataset.df.to_csv('dataset_info.csv', index=False)

In [16]:
!ls -al dataset/depth/ | wc -l

40003


In [27]:
for idx, _df in enumerate(_sample_dfs):
    with ZipFile(f'dataset_{idx}.zip', 'w', ZIP_DEFLATED) as dataset:
        pbar = tqdm.tqdm(_df.iterrows())
        for idx, row in pbar:
          dataset.write(os.path.join('dataset', row['mask_paths']), row['mask_paths'])
          dataset.write(os.path.join('dataset', row['fg_bg_paths']), row['fg_bg_paths'])
          dataset.write(os.path.join('dataset', row['depth_map_paths']), row['depth_map_paths'])

4000it [00:25, 157.19it/s]
4000it [00:35, 112.38it/s]
4000it [00:35, 113.08it/s]
4000it [00:35, 111.59it/s]
4000it [00:36, 111.01it/s]
4000it [00:34, 114.64it/s]
4000it [00:35, 114.20it/s]
4000it [00:35, 114.24it/s]
4000it [00:34, 114.91it/s]
4000it [00:34, 117.57it/s]


In [21]:
!stat dataset_0.zip

  File: dataset_0.zip
  Size: 516232134 	Blocks: 1008280    IO Block: 4096   regular file
Device: 32h/50d	Inode: 2764023     Links: 1
Access: (0644/-rw-r--r--)  Uid: (    0/    root)   Gid: (    0/    root)
Access: 2020-05-16 19:08:24.001851764 +0000
Modify: 2020-05-16 19:13:43.692157556 +0000
Change: 2020-05-16 19:13:43.692157556 +0000
 Birth: -


In [0]:
import logging
import boto3
from botocore.exceptions import ClientError

In [0]:
session = boto3.Session(
    aws_access_key_id='AKIAJE7FCKG4PQ3QLIBA',
    aws_secret_access_key='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',
)
s3 = session.resource('s3')
bucket = s3.Bucket('eva-final-project-dataset')

In [0]:
def upload_file(bucket, file_path, object_name):
  if object_name is None:
      object_name = file_path

  try:
      response = bucket.upload_file(file_path, object_name)
  except ClientError as e:
      logging.error(e)
      return False
  return True

In [25]:
upload_file(bucket, 'dataset_info.csv', 'dataset_info.csv')

True

In [28]:
for idx in range(10):
  file_path = f'dataset_{idx}.zip'
  if upload_file(bucket, file_path, file_path):
    print(f'{file_path} success!!!')
  else:
    print(f'{file_path} failed!!!')

dataset_0.zip success!!!
dataset_1.zip success!!!
dataset_2.zip success!!!
dataset_3.zip success!!!
dataset_4.zip success!!!
dataset_5.zip success!!!
dataset_6.zip success!!!
dataset_7.zip success!!!
dataset_8.zip success!!!
dataset_9.zip success!!!
