In [11]:
import cv2
from natsort import natsorted
import random
import numpy as np
from pathlib import Path

In [22]:
input_dir = 'save_path/cmnd/data_cmnd'
train_dir = 'save_path/cmnd/train/' 
test_dir  = 'save_path/cmnd/test/'
valid_dir = 'save_path/cmnd/valid/'

In [23]:
train_dir = Path(train_dir)
if not train_dir.exists():
    train_dir.mkdir(parents=True)

test_dir = Path(test_dir)
if not test_dir.exists():
    test_dir.mkdir(parents=True)

valid_dir = Path(valid_dir)
if not valid_dir.exists():
    valid_dir.mkdir(parents=True)

In [37]:
def load_data(dataset_folder):
    dataset_folder = Path(dataset_folder)
    image_paths = natsorted(dataset_folder.glob('*.jpg'), key=lambda x: str(x))
    mask_paths  = natsorted(dataset_folder.glob('*.png'), key=lambda x: str(x))
    random.seed(2020)
    random.shuffle(image_paths)
    random.seed(2020)
    random.shuffle(mask_paths)
    return image_paths, mask_paths

In [38]:
image_paths, mask_paths = load_data(input_dir)

In [39]:
print(image_paths[:1])
print(mask_paths[:1])

[WindowsPath('save_path/cmnd/data_cmnd/20141013_ce2450fc26d42e4e68a12HZ7fI9TC8bT34_00.jpg')]
[WindowsPath('save_path/cmnd/data_cmnd/20141013_ce2450fc26d42e4e68a12HZ7fI9TC8bT34_00.png')]


In [42]:
train_ratio, test_ratio, valid_ratio = 0.64, 0.2, 0.16

train_image_paths = image_paths[:int(train_ratio * len(image_paths))]
train_mask_paths  = mask_paths[:int(train_ratio * len(image_paths))]
valid_image_paths = image_paths[int(train_ratio * len(image_paths)):int((train_ratio + valid_ratio) * len(image_paths))]
valid_mask_paths  = mask_paths[int(train_ratio * len(image_paths)):int((train_ratio + valid_ratio) * len(image_paths))]
test_image_paths  = image_paths[int((train_ratio + valid_ratio) * len(image_paths)):]
test_mask_paths   = mask_paths[int((train_ratio + valid_ratio) * len(image_paths)):]

In [47]:
def save_data(image_paths, mask_paths, save_dir):
    save_dir = Path(save_dir)
    for image_path, mask_path in zip(image_paths, mask_paths):
        name_file = image_path.stem
        image = cv2.imread(str(image_path))
        mask = cv2.imread(str(mask_path))
        cv2.imwrite(str(save_dir.joinpath(name_file + '.jpg')), image)
        cv2.imwrite(str(save_dir.joinpath(name_file + '.png')), mask)

In [48]:
save_data(train_image_paths, train_mask_paths, train_dir)

In [49]:
save_data(test_image_paths, test_mask_paths, test_dir)

In [50]:
save_data(valid_image_paths, valid_mask_paths, valid_dir)