In [1]:
pip install numpy --upgrade --user

Note: you may need to restart the kernel to use updated packages.


In [3]:
#Import Libraries

import numpy as np #Numerical Computing.
import os #I/O.
import tensorflow as tf #Machine Learning.
from tensorflow.keras.preprocessing import image_dataset_from_directory #Dataset Generator.

In [4]:
#Set Up Variables

main_dir='./original_images/' #Root Directory of Input.
train_dir = [os.path.join(main_dir, 'TRAIN.{}'.format(d)) for d in range(1,5)] #1 Dimensional Array. Represent Training Subset Directories.
test_dir = os.path.join(main_dir, 'TEST') #Evaluation Subset Directory.
classes = ['B','N'] #Binary Class Used for Dataset Generator. Leave It as is.

im_size = (64, 64) #Output Image Size for Dataset Generator.
batch_size = 32 #Batch Size Used in Dataset Generator.
seed = np.random.randint(123456789) #Seed for Shuffling in Dataset Generator.
val_split = 0.1 #Fraction for Validation Subset (0.1 = 10% of Training Subset).

In [5]:
#Set Up Dataset

train_dataset = None #Training Dataset. Leave It as is.
validation_dataset = None #Validation Dataset. Leave It as is.

for directory in train_dir:
    #For every Training Subset Part.
    #Convert Image from Directory to tf.data.Dataset() Object.
    subdataset = image_dataset_from_directory(
        directory=directory, #Source Directory.
        label_mode='binary', #Labeling Mode. Leave It as is.
        class_names=classes,
        color_mode='rgb', #Color Channel.
        batch_size=batch_size,
        image_size=im_size,
        seed=seed,
        validation_split=val_split,
        subset='training' #Subset Indicator. Use Data Readed as Training Subset.
    )
    #Concatenate Each Part of Training Subset to Single Dataset.
    try:
        train_dataset = train_dataset.concatenate(subdataset)
    except:
        train_dataset = subdataset

for directory in train_dir:
    #For every Training Subset Part.
    #Convert Image from Directory to tf.data.Dataset() Object.
    subdataset = image_dataset_from_directory(
        directory=directory, #Source Directory.
        label_mode='binary', #Labeling Mode. Leave It as is.
        class_names=classes,
        color_mode='rgb', #Color Channel.
        batch_size=batch_size,
        image_size=im_size,
        seed=seed,
        validation_split=val_split,
        subset='validation' #Subset Indicator. Use Data Readed as Validation Subset.
    )
    #Concatenate Each Part of Validation Subset to Single Dataset.
    try:
        validation_dataset = train_dataset.concatenate(subdataset)
    except:
        validation_dataset = subdataset

#Convert Image from Directory to tf.data.Dataset() Object.
test_dataset = image_dataset_from_directory(
    directory=test_dir, #Source Directory.
    label_mode='binary', #Labeling Mode. Leave It as is.
    class_names=classes,
    color_mode='rgb', #Color Channel.
    batch_size=batch_size,
    image_size=im_size
)

#Data Pipeline Optimization

#Prefetch, Overlaps the Preprocessing and Model Execution of a Training Step. Reading +1 batch of current batch execution. 
#Reduce Latency because Pipeline Reading Data when It Execute.

#Cache, Create Cache File of Dataset in Memory or Disk.
#Use .cache() to Create Cache on Memory. Faster, but Resource Consuming.
#Use .cache(dir_to_file) to Create Cache on Specific Directory on Disk.
train_dataset = train_dataset.cache().prefetch(tf.data.AUTOTUNE)
validation_dataset = validation_dataset.cache().prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)

Found 59922 files belonging to 2 classes.
Using 53930 files for training.
Found 59922 files belonging to 2 classes.
Using 53930 files for training.
Found 59922 files belonging to 2 classes.
Using 53930 files for training.
Found 59924 files belonging to 2 classes.
Using 53932 files for training.
Found 59922 files belonging to 2 classes.
Using 5992 files for validation.
Found 59922 files belonging to 2 classes.
Using 5992 files for validation.
Found 59922 files belonging to 2 classes.
Using 5992 files for validation.
Found 59924 files belonging to 2 classes.
Using 5992 files for validation.
Found 16726 files belonging to 2 classes.


In [9]:
tf.data.Dataset.from_tensor_slices(list(train_dataset))

InvalidArgumentError: {{function_node __wrapped__Pack_N_2_device_/job:localhost/replica:0/task:0/device:CPU:0}} Shapes of all inputs must match: values[0].shape = [32,64,64,3] != values[1].shape = [32,1] [Op:Pack] name: 0