In [1]:
DATASET_PATH = "datasets/sweet-pepper-seg"
VAL_SIZE = 0.1  # 1.0-VAL_SIZE == TRAIN_SIZE

In [2]:
import glob
import os
import numpy as np
from sklearn.model_selection import train_test_split

images_train_path = DATASET_PATH + "/images/train"
images_val_path = DATASET_PATH + "/images/val"
labels_train_path = DATASET_PATH + "/labels/train"
labels_val_path = DATASET_PATH + "/labels/val"

all_images = glob.glob(images_train_path+"/*")
all_labels = glob.glob(labels_train_path+"/*")

all_samples = {}

for image_path in all_images:
  base_name = os.path.splitext(os.path.basename(image_path))[0]
  all_samples[base_name] = {"image": image_path}

for label_path in all_labels:
  base_name = os.path.splitext(os.path.basename(label_path))[0]
  if base_name in all_samples:
    all_samples[base_name]["label"] = label_path
  else:
    all_samples[base_name] = {"label": label_path}

print("Number of samples:", len(all_samples))

print("Samples with missing information:")
to_be_deleted = []
for k,v in all_samples.items():
  if len(v.values()) != 2:
    print(v)
    to_be_deleted.append(k)

for k in to_be_deleted:
  del all_samples[k]

Number of samples: 900
Samples with missing information:
{'image': 'datasets/sweet-pepper-seg/images/train/png_Color_1607625826403.48242187500000.png'}


In [3]:
X = []
y = []
for v in all_samples.values():
    X.append(v["image"])
    y.append(v["label"])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=VAL_SIZE, random_state=1)

print("len(X_train):", len(X_train))
print("len(X_test):", len(X_test))
print("len(y_train):", len(y_train))
print("len(y_test):", len(y_test))

len(X_train): 809
len(X_test): 90
len(y_train): 809
len(y_test): 90


In [4]:
import shutil

# MOVE SELECTED SAMPLES TO THE VALIDATION SET

for image_src in X_test:
    shutil.move(image_src, DATASET_PATH+"/images/val/")

for label_src in y_test:
    shutil.move(label_src, DATASET_PATH+"/labels/val/")