[View in Colaboratory](https://colab.research.google.com/github/nicolasmetallo/road-defect-tf-implementation/blob/master/labelbox_to_tfrecord.ipynb)

In [1]:
#=============== Git Clone & Install libraries ===============#

!git clone https://github.com/nicolasmetallo/road-defect-tf-implementation.git
!pip install tqdm

Cloning into 'road-defect-tf-implementation'...
remote: Counting objects: 815, done.[K
remote: Compressing objects: 100% (797/797), done.[K
remote: Total 815 (delta 29), reused 800 (delta 17), pack-reused 0[K
Receiving objects: 100% (815/815), 73.82 MiB | 46.41 MiB/s, done.
Resolving deltas: 100% (29/29), done.
Collecting tqdm
[?25l  Downloading https://files.pythonhosted.org/packages/7d/e6/19dfaff08fcbee7f3453e5b537e65a8364f1945f921a36d08be1e2ff3475/tqdm-4.24.0-py2.py3-none-any.whl (43kB)
[K    100% |████████████████████████████████| 51kB 2.1MB/s 
[?25hInstalling collected packages: tqdm
Successfully installed tqdm-4.24.0


In [0]:
#=============== Import Libraries ===============#

import pandas as pd
import json
import numpy as np
import requests
import os
from tqdm import trange
import glob

#=============== Set WORKING DIR ===============#

try:
  os.chdir("road-defect-tf-implementation/")
except:
  pass

#=============== Import Labelbox CSV ===============#

data = pd.read_csv("labelbox_images/labelbox_output.csv") # csv output from Labelbox

In [3]:
#=============== Download Images ===============#

try:
  print("Downloading images from Labelbox..\n")
  imageList = glob.glob(os.path.join('labelbox_images',"*.jpg"))
  if len(imageList) < len(data):
    for index in trange(len(data)):
      url = data["Labeled Data"][index]
      name = data["External ID"][index]
      r = requests.get(url, allow_redirects=True)
      open(os.path.join("labelbox_images",name), 'wb').write(r.content)
  else:
    print("{} images already downloaded".format(len(data)))
except:
  pass

#=============== Split 'images' into train, val, test ===============#

!python3 build_dataset.py --data_dir='labelbox_images/' --output_dir='labelbox_images/'

  0%|          | 0/197 [00:00<?, ?it/s]

Downloading images from Labelbox..


100%|██████████| 197/197 [00:37<00:00,  5.22it/s]


Processing train data, saving preprocessed data to labelbox_images/train
100%|████████████████████████████████████████| 157/157 [00:01<00:00, 108.34it/s]
Processing val data, saving preprocessed data to labelbox_images/val
100%|██████████████████████████████████████████| 20/20 [00:00<00:00, 110.52it/s]
Processing test data, saving preprocessed data to labelbox_images/test
100%|██████████████████████████████████████████| 20/20 [00:00<00:00, 109.10it/s]
Done building dataset


In [0]:
#=============== Read JSON ===============#

def get_xy(d,xy='x'):
    if xy in d:
        yield d[xy]
    for k in d:
        if isinstance(d[k], list):
            for i in d[k]:
                for j in fun(i):
                    yield j

split = ["train","val"]

#=============== Write DataFrame & Save to CSV ===============#

# DEFAULT_COLUMNS = ['image_id', 'xmin', 'ymin', 'xmax', 'ymax', 'label']

for each in split:
  images = []
  images_dir = os.path.join(os.getcwd(), "labelbox_images", each)
  imageList = glob.glob(os.path.join(images_dir, "*.jpg"))
  
  for image in range(len(imageList)):
    images.append(imageList[image].split('/')[5])
    
  row_data = {}
  append_list = []
    
  for index in range(len(data)):
    rowDict = json.loads(data.Label[index])
    imageName = data["External ID"][index]
    #imageDict = json.loads(data.subject_data[index])
    #imageName = str(list(imageDict.values())[0]['Filename'])
    
    if imageName in images:
      for k,v in rowDict.items():
        bbox_label = k
        for i in range(len(v)):
          bbox_xmin = min(list(get_xy(v[i], xy='x')))
          bbox_xmax = max(list(get_xy(v[i], xy='x')))
          bbox_ymin = min(list(get_xy(v[i], xy='y')))
          bbox_ymax = max(list(get_xy(v[i], xy='y')))
          row_data = {"image_id": str(imageName.replace(".jpg","")), "label": bbox_label, "xmin": bbox_xmin, "xmax": bbox_xmax, "ymin": bbox_ymin, "ymax": bbox_ymax}
          append_list.append(row_data)
        
  bbox_data = pd.DataFrame(append_list)
  bbox_data = bbox_data[['image_id', 'xmin', 'ymin', 'xmax', 'ymax', 'label']]
  bbox_data.to_csv(os.path.join(images_dir,"labels.csv"), index = False)