In [3]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

origin_folder = os.path.join(os.getcwd().replace("pipelines", "data"))
proceeded_folder = os.path.join(os.getcwd().replace("pipelines", "processed_data"))
dim = (512, 512)

In [4]:
image_dir_path = origin_folder + "/images/"
mask_dir_path = origin_folder + "/labels/"

image_path_list = os.listdir(image_dir_path)
mask_path_list = os.listdir(mask_dir_path)

# align masks and inputs
image_path_list.sort()
mask_path_list.sort()

print(
    "number of images: {}, number of masks: {}".format(
        len(image_path_list), len(mask_path_list)
    )
)

number of images: 1169, number of masks: 1169


In [5]:
# store numpy standard dataset
for image_path, mask_path in tqdm(zip(image_path_list, mask_path_list)):
    # assert image_path == mask_path
    assert os.path.basename(image_path)[:-4] == os.path.basename(mask_path)[:-4]

    # load image and mask
    _id = os.path.basename(image_path)[:-4]
    image_path = os.path.join(image_dir_path, image_path)
    mask_path = os.path.join(mask_dir_path, mask_path)
    image = plt.imread(image_path)

    #
    mask = plt.imread(mask_path)
    if len(mask.shape) == 3:
        mask = np.int64(np.all(mask[:, :, :3] == 1, axis=2))

    # resize image and mask to 512x512 pixels
    image_new = cv2.resize(image, dim, interpolation=cv2.INTER_CUBIC)
    mask_new = cv2.resize(mask, dim, interpolation=cv2.INTER_NEAREST)
    mask_new = np.where(mask > 0, 1, 0)

    # save image and mask
    save_dir_path = proceeded_folder + "/images"
    os.makedirs(save_dir_path, exist_ok=True)
    np.save(os.path.join(save_dir_path, _id + ".npy"), image_new)

    save_dir_path = proceeded_folder + "/labels"
    os.makedirs(save_dir_path, exist_ok=True)
    np.save(os.path.join(save_dir_path, _id + ".npy"), mask_new)

1169it [02:00,  9.70it/s]


In [14]:
# store numpy standard dataset
ls_image_id = []
for image_path, mask_path in tqdm(zip(image_path_list, mask_path_list)):
    # assert image_path == mask_path
    assert os.path.basename(image_path)[:-4] == os.path.basename(mask_path)[:-4]

    # load image and mask
    _id = os.path.basename(image_path)[:-4]
    ls_image_id.append(_id)

print(len(ls_image_id))

1169it [00:00, 796223.02it/s]

1169





In [14]:
# split ls_image_id into 5 folds
np.random.seed(0)
np.random.shuffle(ls_image_id)
fold_size = len(ls_image_id) // 5
ls_folds = []
for i in range(5):
    ls_folds.append(ls_image_id[i * fold_size : (i + 1) * fold_size])

# save folds as txt
save_dir_path = proceeded_folder
os.makedirs(save_dir_path, exist_ok=True)
for i, fold in enumerate(ls_folds):
    with open(os.path.join(save_dir_path, "fold{}.txt".format(i + 1)), "w") as f:
        for item in fold:
            file_name = f"{item}.npy"
            f.write("%s\n" % file_name)