<a href="https://colab.research.google.com/github/soohyunme/TensorFlow_Tutorial/blob/main/Code/17_Custom_Dataset/1_Images_in_subfolders.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


In [None]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
img_height = 28
img_width = 28
batch_size = 2

In [None]:
model = keras.Sequential([
    layers.Input((28, 28, 1)),
    layers.Conv2D(16, 3, padding='same'),
    layers.Conv2D(32, 3, padding='same'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(10),
])

# METHOD 1
Using dataset_form_directory

In [None]:
ds_train = tf.keras.preprocessing.image_dataset_from_directory(
    'data/mnist_subfolders/',
    labels = 'inferred',
    label_mode = 'int', # categorical, binary
    # class_names = ['0', '1', '2', ...]
    color_mode = 'grayscale',
    batch_size = batch_size,
    image_size = (img_height, img_width), # reshape if not in this size
    shuffle = True,
    seed = 123,
    validation_split = 0.1,
    subset = 'training',
)

Found 50 files belonging to 10 classes.
Using 45 files for training.


In [None]:
ds_validation = tf.keras.preprocessing.image_dataset_from_directory(
    'data/mnist_subfolders/',
    labels = 'inferred',
    label_mode = 'int', # categorical, binary
    # class_names = ['0', '1', '2', ...]
    color_mode = 'grayscale',
    batch_size = batch_size,
    image_size = (img_height, img_width), # reshape if not in this size
    shuffle = True,
    seed = 123,
    validation_split = 0.1,
    subset = 'validation',
)

Found 50 files belonging to 10 classes.
Using 5 files for validation.


In [None]:
def augment(x, y):
    image = tf.image.random_brightness(x, max_delta = 0.05)
    return image, y

In [None]:
ds_train = ds_train.map(augment)


# Custom Loops

In [None]:
for epochs in range(10):
    for x, y in ds_train:
        # train here
        pass

In [None]:
model.compile(
    optimizer = keras.optimizers.Adam(),
    loss = [
            keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    ],
    metrics = ['accuracy'],
)

model.fit(ds_train, epochs=10, verbose=2)

Epoch 1/10
23/23 - 10s - loss: 48.8264 - accuracy: 0.2000 - 10s/epoch - 452ms/step
Epoch 2/10
23/23 - 0s - loss: 4.5173 - accuracy: 0.8222 - 100ms/epoch - 4ms/step
Epoch 3/10
23/23 - 0s - loss: 0.9852 - accuracy: 0.9333 - 92ms/epoch - 4ms/step
Epoch 4/10
23/23 - 0s - loss: 0.0763 - accuracy: 0.9778 - 90ms/epoch - 4ms/step
Epoch 5/10
23/23 - 0s - loss: 3.3084e-05 - accuracy: 1.0000 - 93ms/epoch - 4ms/step
Epoch 6/10
23/23 - 0s - loss: 3.2811e-05 - accuracy: 1.0000 - 89ms/epoch - 4ms/step
Epoch 7/10
23/23 - 0s - loss: 1.8920e-05 - accuracy: 1.0000 - 95ms/epoch - 4ms/step
Epoch 8/10
23/23 - 0s - loss: 1.3625e-05 - accuracy: 1.0000 - 95ms/epoch - 4ms/step
Epoch 9/10
23/23 - 0s - loss: 9.8710e-06 - accuracy: 1.0000 - 91ms/epoch - 4ms/step
Epoch 10/10
23/23 - 0s - loss: 8.0544e-06 - accuracy: 1.0000 - 98ms/epoch - 4ms/step


<keras.callbacks.History at 0x7f38801c7c90>

# METHOD 2
ImageDataGenerator and flow_from_directory

In [None]:
datagen = ImageDataGenerator(
    rescale = 1. / 255,
    rotation_range = 5,
    zoom_range = (0.95, 0.95),
    horizontal_flip = False,
    vertical_flip = False,
    data_format = 'channels_last',
    validation_split = 0,
    dtype = tf.float32,
)

In [None]:
train_generator = datagen.flow_from_directory(
    'data/mnist_subfolders/',
    target_size = (img_height, img_width),
    batch_size = batch_size,
    color_mode = 'grayscale',
    class_mode = 'sparse',
    shuffle = True,
    subset = 'training',
    seed = 123,
)

Found 50 images belonging to 10 classes.


In [None]:
def training() : pass

# Custom Loops
for epochs in range(10):
    num_batches = 0

    for x, y in ds_train:
        num_batches += 1

        # do training
        training()

        if num_batches == 25: # len(train_dataset) / batch_size
            break

In [None]:
# Redo model.compile to reset the optimizer states
model.compile(
    optimizer = keras.optimizers.Adam(),
    loss = [
            keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    ],
    metrics = ['accuracy'],
)

# using model.fit (note steps_per_epoch)
model.fit(train_generator, 
          epochs=10,
          steps_per_epoch=25,
          verbose=2,
          # if we had a validation generator:
          # validation_data = validation_generator,
          # validation_steps = len(validation_set)/batch_size,
          )


Epoch 1/10
25/25 - 2s - loss: 1.8380 - accuracy: 0.4800 - 2s/epoch - 79ms/step
Epoch 2/10
25/25 - 0s - loss: 0.5727 - accuracy: 0.9600 - 124ms/epoch - 5ms/step
Epoch 3/10
25/25 - 0s - loss: 0.0859 - accuracy: 1.0000 - 129ms/epoch - 5ms/step
Epoch 4/10
25/25 - 0s - loss: 0.0247 - accuracy: 1.0000 - 126ms/epoch - 5ms/step
Epoch 5/10
25/25 - 0s - loss: 0.0153 - accuracy: 1.0000 - 138ms/epoch - 6ms/step
Epoch 6/10
25/25 - 0s - loss: 0.0055 - accuracy: 1.0000 - 135ms/epoch - 5ms/step
Epoch 7/10
25/25 - 0s - loss: 0.0033 - accuracy: 1.0000 - 123ms/epoch - 5ms/step
Epoch 8/10
25/25 - 0s - loss: 0.0024 - accuracy: 1.0000 - 133ms/epoch - 5ms/step
Epoch 9/10
25/25 - 0s - loss: 0.0021 - accuracy: 1.0000 - 134ms/epoch - 5ms/step
Epoch 10/10
25/25 - 0s - loss: 0.0028 - accuracy: 1.0000 - 135ms/epoch - 5ms/step


<keras.callbacks.History at 0x7f38800ce490>