In [1]:
import cv2
import numpy as np
import glob
from tqdm import tqdm

In [2]:
height = 300
width = 130

In [3]:
path = "../dataset/augmented"
imagepath = path + "/images/*.png"
maskpath = path + "/masks/*.png"

In [4]:
images = glob.glob(imagepath)
masks = glob.glob(maskpath)

print("Number of images: ", len(images))
print("Number of masks: ", len(masks))

Number of images:  2324
Number of masks:  2324


In [5]:
allImages = []
maskImages = []

In [6]:
for image, mask, in tqdm(zip(images, masks), total = len(images)):

    img = cv2.imread(image, cv2.IMREAD_COLOR)
    img = cv2.resize(img, (width, height))
    img = img/255.0
    img = img.astype(np.float32)
    allImages.append(img)

    msk = cv2.imread(mask, cv2.IMREAD_GRAYSCALE)
    msk = cv2.resize(msk, (width, height))
    msk[msk>0] = 1
    maskImages.append(msk)

100%|██████████| 2324/2324 [02:05<00:00, 18.54it/s]


In [7]:
allImagesNP = np.array(allImages)
maskImagesNP = np.array(maskImages)
maskImagesNP = maskImagesNP.astype(int)

In [8]:
print("Images shape: ", allImagesNP.shape)
print("Masks shape: ", maskImagesNP.shape)

Images shape:  (2324, 300, 130, 3)
Masks shape:  (2324, 300, 130)


In [9]:
from sklearn.model_selection import train_test_split

train_size = 0.7
valid_size = 0.2
test_size = 0.1

train_imgs, vt_imgs, train_masks, vt_masks = train_test_split(
    allImagesNP, maskImagesNP, 
    test_size=(1 - train_size), 
    random_state=42
)

vt_valid_size = valid_size / (valid_size + test_size)
vt_test_size = test_size / (valid_size + test_size)

valid_imgs, test_imgs, valid_masks, test_masks = train_test_split(
    vt_imgs, vt_masks, 
    test_size=vt_test_size, 
    random_state=42
)

print("Train Set:")
print(train_imgs.shape)
print(train_masks.shape)

print("Validation Set:")
print(valid_imgs.shape)
print(valid_masks.shape)

print("Test Set:")
print(test_imgs.shape)
print(test_masks.shape)


Train Set:
(1626, 300, 130, 3)
(1626, 300, 130)
Validation Set:
(465, 300, 130, 3)
(465, 300, 130)
Test Set:
(233, 300, 130, 3)
(233, 300, 130)


In [10]:
import os

def save_npy(path, filename, data):
    directory = os.path.dirname(os.path.join(path, filename))
    if not os.path.exists(directory):
        os.makedirs(directory)
    np.save(os.path.join(directory, filename), data)

In [11]:
# Save training data
save_npy(path + "/feed", "train_img.npy", train_imgs)
save_npy(path + "/feed", "train_mask.npy", train_masks)

# Save validation data
save_npy(path + "/feed", "valid_img.npy", valid_imgs)
save_npy(path + "/feed", "valid_mask.npy", valid_masks)

# Save validation data
save_npy(path + "/feed", "test_img.npy", test_imgs)
save_npy(path + "/feed", "test_mask.npy", test_masks)