In [4]:
import os
import json
import shutil

import sys
sys.path.append("..")

DIR_ROOT = os.getcwd()
DIR_DEEP = os.path.join(DIR_ROOT, "../dataset/deepfashion")
DIR_COCO = os.path.join(DIR_ROOT, "../dataset/coco")
DIR_ALL = os.path.join(DIR_ROOT, "../dataset/all")

# Deepfashion dataset
FILE_DEEPFASHION_TRAIN_JSON = os.path.join(DIR_ROOT, DIR_DEEP, "annotations", "instances_train2017.json")
FILE_DEEPFASHION_VAL_JSON = os.path.join(DIR_ROOT, DIR_DEEP, "annotations", "instances_val2017.json")
DIR_ROOT_DEEPFASHION_IMAGES = os.path.join(DIR_ROOT, DIR_DEEP, "images")

# COCO dataset
FILE_COCO_TRAIN_JSON = os.path.join(DIR_ROOT, DIR_COCO, "annotations", "instances_train2017.json")
FILE_COCO_VAL_JSON = os.path.join(DIR_ROOT, DIR_COCO, "annotations", "instances_val2017.json")
DIR_ROOT_COCO_IMAGES_TRAIN2017 = os.path.join(DIR_ROOT, DIR_COCO, "images/train2017")
DIR_ROOT_COCO_IMAGES_VAL2017 = os.path.join(DIR_ROOT, DIR_COCO, "images/val2017")

# new dataset
DIR_ALL_ANNOTATIONS = os.path.join(DIR_ALL, "annotations")
DIR_ALL_IMAGES_TRAIN2017 = os.path.join(DIR_ALL, "images/train2017")
DIR_ALL_IMAGES_VAL2017 = os.path.join(DIR_ALL, "images/val2017")
FILE_ALL_TRAIN_JSON = os.path.join(DIR_ALL_ANNOTATIONS, "instances_train2017.json")
FILE_ALL_VAL_JSON = os.path.join(DIR_ALL_ANNOTATIONS, "instances_val2017.json")
FILE_DEEP_TRAIN_JSON = os.path.join(DIR_ALL_ANNOTATIONS, "instances_train2017.json")
FILE_DEEP_VAL_JSON = os.path.join(DIR_ALL_ANNOTATIONS, "instances_val2017.json")

# FILE_COCO_TRAIN_JSON = os.path.join(DIR_ALL_ANNOTATIONS, "coco_train2017.json")
# FILE_COCO_VAL_JSON = os.path.join(DIR_ALL_ANNOTATIONS, "coco_val2017.json")

if not os.path.exists(DIR_ALL_ANNOTATIONS):
    os.makedirs(DIR_ALL_ANNOTATIONS)
    
if not os.path.exists(DIR_ALL_IMAGES_TRAIN2017):
    os.makedirs(DIR_ALL_IMAGES_TRAIN2017)

if not os.path.exists(DIR_ALL_IMAGES_VAL2017):
    os.makedirs(DIR_ALL_IMAGES_VAL2017)

## Deepfashion dataset과 COCO 2017 dataset 합치기

In [5]:
coco_train = json.load(open(FILE_COCO_TRAIN_JSON))
coco_val = json.load(open(FILE_COCO_VAL_JSON))
deep_train = json.load(open(FILE_DEEPFASHION_TRAIN_JSON))
deep_val = json.load(open(FILE_DEEPFASHION_VAL_JSON))

## Load된 data 확인

In [6]:
print(coco_train['annotations'][0])
print(coco_train['categories'][0])
print(coco_train['images'][0])
print(coco_train['info'])
print(coco_train['licenses'][0])

print(deep_train['annotations'][1])
print(deep_train['categories'][1])
print(len(deep_train['images']))
print(len(deep_val['images']))

{'segmentation': [[239.97, 260.24, 222.04, 270.49, 199.84, 253.41, 213.5, 227.79, 259.62, 200.46, 274.13, 202.17, 277.55, 210.71, 249.37, 253.41, 237.41, 264.51, 242.54, 261.95, 228.87, 271.34]], 'area': 2765.1486500000005, 'iscrowd': 0, 'image_id': 558840, 'bbox': [199.84, 200.46, 77.71, 70.88], 'category_id': 58, 'id': 156}
{'supercategory': 'person', 'id': 1, 'name': 'person'}
{'license': 3, 'file_name': '000000391895.jpg', 'coco_url': 'http://images.cocodataset.org/train2017/000000391895.jpg', 'height': 360, 'width': 640, 'date_captured': '2013-11-14 11:18:45', 'flickr_url': 'http://farm9.staticflickr.com/8186/8119368305_4e622c8349_z.jpg', 'id': 391895}
{'description': 'COCO 2017 Dataset', 'url': 'http://cocodataset.org', 'version': '1.0', 'year': 2017, 'contributor': 'COCO Consortium', 'date_created': '2017/09/01'}
{'url': 'http://creativecommons.org/licenses/by-nc-sa/2.0/', 'id': 1, 'name': 'Attribution-NonCommercial-ShareAlike License'}
{'area': 29854, 'bbox': [43, 1, 118, 253],

## COCO dataset과 Deepfashion dataset을 합치고 JSON파일로 저장

1.두 dataset들을 하나의 dataset으로 만듭니다.
2.images data는 merge한 후, shuffle 시킵니다. (coco data 다음에 deepfashion data가 위치하므로)
3.json 파일로 저장합니다.

In [18]:
import random

coco_train_anno = coco_train['annotations']
deep_train_anno = deep_train['annotations']
all_train_anno = coco_train_anno+deep_train_anno

coco_val_anno = coco_val['annotations']
deep_val_anno = deep_val['annotations']
all_val_anno = coco_val_anno+deep_val_anno

coco_train_img = coco_train['images']
deep_train_img = deep_train['images']
all_train_img = coco_train_img+deep_train_img

# random.shuffle(all_train_img)

coco_val_img = coco_val['images']
deep_val_img = deep_val['images']
all_val_img = coco_val_img+deep_val_img

# random.shuffle(all_val_img)

coco_cate = coco_train['categories']
deep_cate = deep_train['categories']
all_cate = coco_cate+deep_cate

all_info = coco_train['info']
all_licenses = coco_train['licenses']


In [None]:
print('start - make json file')
train_data = {}
train_data['annotations'] = all_train_anno
train_data['categories'] = all_cate
train_data['images'] = all_train_img
with open(FILE_ALL_TRAIN_JSON, 'w') as outfile:
    json.dump(train_data, outfile)
    
    
val_data = {}
val_data['annotations'] = all_val_anno
val_data['categories'] = all_cate
val_data['images'] = all_val_img
with open(FILE_ALL_VAL_JSON, 'w') as outfile:
    json.dump(val_data, outfile)
print('end - make json file')

In [19]:
coco_train_data = {}
coco_train_data['annotations'] = coco_train_anno
coco_train_data['categories'] = all_cate
coco_train_data['images'] = coco_train_img
with open(FILE_COCO_TRAIN_JSON, 'w') as outfile:
    json.dump(coco_train_data, outfile)
    
coco_val_data = {}
coco_val_data['annotations'] = coco_val_anno
coco_val_data['categories'] = all_cate
coco_val_data['images'] = coco_val_img
with open(FILE_COCO_VAL_JSON, 'w') as outfile:
    json.dump(coco_val_data, outfile)

In [None]:
deep_train_data = {}
deep_train_data['annotations'] = deep_train_anno
deep_train_data['categories'] = all_cate
deep_train_data['images'] = deep_train_img
with open(FILE_DEEP_TRAIN_JSON, 'w') as outfile:
    json.dump(deep_train_data, outfile)
    
deep_val_data = {}
deep_val_data['annotations'] = deep_val_anno
deep_val_data['categories'] = all_cate
deep_val_data['images'] = deep_val_img
with open(FILE_DEEP_VAL_JSON, 'w') as outfile:
    json.dump(deep_val_data, outfile)

## 모든 Image 파일 한 곳으로 모으기

coco dataset의 이미지와 deepfashion 이미지들을 한 폴더로 모두 복사합니다.

In [None]:
import shutil
from shutil import copyfile

for img_data in coco_train_img:
    img_name = img_data['file_name']
    img_path = os.path.join(DIR_ROOT_COCO_IMAGES_TRAIN2017, img_name)
    new_img_path = os.path.join(DIR_ALL_IMAGES_TRAIN2017, img_name)
    if not os.path.exists(new_img_path):
        if not os.path.exists(img_path):
            print("not found", img_path)
        else:
            copyfile(img_path, new_img_path)
#     if os.path.exists(img_path):
#         shutil.move(img_path, new_img_path)
        
print("Completed - COCO_IMAGES_TRAIN2017")    

for img_data in coco_val_img:
    img_name = img_data['file_name']
    img_path = os.path.join(DIR_ROOT_COCO_IMAGES_VAL2017, img_name)
    new_img_path = os.path.join(DIR_ALL_IMAGES_VAL2017, img_name)
#     if os.path.exists(img_path):
#         shutil.move(img_path, new_img_path)
    if not os.path.exists(new_img_path):
        if not os.path.exists(img_path):
            print("not found", img_path)
        else:
            copyfile(img_path, new_img_path)
            
print("Completed - COCO_IMAGES_VAL2017")    
        
for img_data in deep_train_img:
    img_name = img_data['file_name']
    img_path = os.path.join(DIR_ROOT_DEEPFASHION_IMAGES, img_name)
    new_img_path = os.path.join(DIR_ALL_IMAGES_TRAIN2017, img_name)
#     if os.path.exists(img_path):
#         shutil.move(img_path, new_img_path)
    if not os.path.exists(new_img_path):
        if not os.path.exists(img_path):
            print("not found", img_path)
        else:
            copyfile(img_path, new_img_path)
            
print("Completed - DEEPFASHION_IMAGES_TRAIN2017")    
        
for img_data in deep_val_img:
    img_name = img_data['file_name']
    img_path = os.path.join(DIR_ROOT_DEEPFASHION_IMAGES, img_name)
    new_img_path = os.path.join(DIR_ALL_IMAGES_VAL2017, img_name)
#     if os.path.exists(img_path):
#         shutil.move(img_path, new_img_path)
    if not os.path.exists(new_img_path):
        if not os.path.exists(img_path):
            print("not found", img_path)
        else:
            copyfile(img_path, new_img_path)
            
print("Completed - DEEPFASHION_IMAGES_VAL2017")    

Completed - COCO_IMAGES_TRAIN2017
Completed - COCO_IMAGES_VAL2017
