# Data Handling

In [2]:
import os
import cv2 as cv
import tensorflow.keras.datasets.mnist as mnist

from sklearn.model_selection import train_test_split

## Gather images

We will be using a small non-medical images initially as a proof of concept. The obvious choice is the MNIST dataset, so we shall import the which is built into Keras.

In [3]:
mnist_data = mnist.load_data()

The data is split into two arrays, the train and test sets. Each of those are then split into arrays for image data and class labels. We shall split these up.

In [4]:
data_train = mnist_data[0]
data_test = mnist_data[1]

X_train, y_train = data_train[0], data_train[1]
X_test, y_test = data_test[0], data_test[1]

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train)

Now we can organise in the data directory for the data generator.

First we will need to make the classes.

In [5]:
def getdirs(classes):
    cwd = os.getcwd()    
    dir_sets = []
    for s in ["train", "val", "test"]:
        dirs = []
        for c in classes:
            dir = "data/{0}/{1}/".format(s, c)
            dirs.append(os.path.join(cwd, dir))
        dir_sets.append(dirs)
    return dir_sets

def mkdirs(classes):
    dir_sets = getdirs(classes)
    for dirs in dir_sets:
        for dir in dirs:
            if not os.path.isdir(dir):    
                os.makedirs(dir)
            else:
                for filepath in os.listdir(dir):
                    os.remove(os.path.join(dir, filepath))

classes = range(10)
mkdirs(classes)

Now we can organise the images into the directories.

In [6]:
def write_imgs(classes, X_train, y_train, X_val, y_val, X_test, y_test):
    dir_sets = getdirs(classes)
    sets = [(X_train, y_train), (X_val, y_val), (X_test, y_test)]
    for (dirs, (X, y)) in zip(dir_sets, sets):
        counter = [0] * len(classes)
        for (img, label) in zip(X, y):
            filename = "{0}class_{1}_img_{2}.jpg".format(dirs[label], label, counter[label])
            cv.imwrite(filename, img)
            counter[label] += 1

write_imgs(classes, X_train, y_train, X_val, y_val, X_test, y_test)