In [178]:
# All module imports
import tensorflow as tf
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np

In [179]:
# Initialize data set directory name and allowed image file extensions
dataSetDir = "Data Set"
imageExt = ["jpg","png","jpeg","webp"]

In [180]:
# Prevent tensorflow from using all GPU resources
gpus = tf.config.experimental.list_physical_devices("GPU")
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu,True)

In [None]:
# Test Cell
# Reads all images from all directories within the dataset directory, and displays them in a pyplot
for imageDir in os.listdir(dataSetDir):
    for image in os.listdir(os.path.join(dataSetDir,imageDir)):
        if image.split(".")[-1] in imageExt:
            imagePath = os.path.join(dataSetDir,imageDir,image)
            plt.imshow(cv2.cvtColor(cv2.imread(imagePath),cv2.COLOR_BGR2RGB))
            plt.show()

In [181]:
# Creating an image data pipeline using in-built TensorFlow functions from the nested dataset folder
# Class labels are automatically assigned based on number of inner directories
dataSet = tf.keras.utils.image_dataset_from_directory(dataSetDir)

Found 117 files belonging to 6 classes.


In [None]:
# Creates a numpy iterator from the dataset and splits images into batches of size 32
# next() function retrieves the next batch of images
iterator = dataSet.as_numpy_iterator()

In [None]:
# Test Cell
# Each batch is retrieved by next() function, consisting of images in index 0, and labels in index 1.
# Each image is displayed with its corresponding class label
nextBatch = iterator.next()
for index in range(len(nextBatch[1][:16])):
    plt.imshow(nextBatch[0][index].astype(int))
    plt.title(nextBatch[1][index])
    plt.show()

In [182]:
# Scale the data to fit the pixel values from 0-1 instead of 0-255
# This is done by map() function which applies the lambda function to all data in the numpy array
# Here, only x values i.e image pixel matrices are divided by 255 to scale between 0-1. Class labels remain unchanged.
dataSet = dataSet.map(lambda x,y : (x / 255, y))

In [183]:
# Creates a numpy iterator from the dataset and splits images into batches of size 32
# next() function retrieves the next batch of images
iterator = dataSet.as_numpy_iterator()

In [None]:
# Test Cell
# Run this cell only if dataset is scaled to range 0-1
# Displays first 16 images from the first batch.
nextBatch = iterator.next()
for index in range(len(nextBatch[1][:16])):
    plt.imshow(nextBatch[0][index])
    plt.title(nextBatch[1][index])
    plt.show()

In [None]:
iterator.next()[0].shape