In [1]:
# import the necessary packages
import os
from imutils import paths
import random
import shutil
 
# initialize the path to the *original* input directory of images
ORIG_INPUT_DATASET = "breast-histopathology-images/IDC_regular_ps50_idx5"
 
# initialize the base path to the *new* directory that will contain
# our images after computing the training and testing split
BASE_PATH = "datasets"
 
# derive the training, validation, and testing directories
TRAIN_PATH = os.path.sep.join([BASE_PATH, "training"])
TEST_PATH = os.path.sep.join([BASE_PATH, "testing"])
 
# define the amount of data that will be used training
TRAIN_SPLIT = 0.8

# grab the paths to all input images in the original input directory
# and shuffle them
imagePaths = list(paths.list_images(ORIG_INPUT_DATASET))
random.seed(40)
random.shuffle(imagePaths)
 
# compute the training and testing split
i = int(len(imagePaths) * TRAIN_SPLIT)
trainPaths = imagePaths[:i]
testPaths = imagePaths[i:]

# define the datasets that we'll be building
datasets = [
    ("training", trainPaths, TRAIN_PATH),
    ("testing", testPaths, TEST_PATH)
]

# loop over the datasets
for (dType, imagePaths, baseOutput) in datasets:
    # show which data split we are creating
    print("[INFO] building '{}' split".format(dType))
 
    # if the output base output directory does not exist, create it
    if not os.path.exists(baseOutput):
        print("[INFO] 'creating {}' directory".format(baseOutput))
        os.makedirs(baseOutput)
 
    # loop over the input image paths
    for inputPath in imagePaths:
        # extract the filename of the input image and extract the
        # class label ("0" for "negative" and "1" for "positive")
        filename = inputPath.split(os.path.sep)[-1]
        label = filename[-5:-4]
 
        # build the path to the label directory
        labelPath = os.path.sep.join([baseOutput, label])
 
        # if the label output directory does not exist, create it
        if not os.path.exists(labelPath):
            print("[INFO] 'creating {}' directory".format(labelPath))
            os.makedirs(labelPath)
 
        # construct the path to the destination image and then copy
        # the image itself
        p = os.path.sep.join([labelPath, filename])
        shutil.copy2(inputPath, p)

[INFO] building 'training' split
[INFO] 'creating datasets\training' directory
[INFO] 'creating datasets\training\0' directory
[INFO] 'creating datasets\training\1' directory
[INFO] building 'testing' split
[INFO] 'creating datasets\testing' directory
[INFO] 'creating datasets\testing\0' directory
[INFO] 'creating datasets\testing\1' directory
