In [9]:
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
import pathlib  # pathlib is in standard library

- **pathlib**:  Imports the pathlib module from the standard library, which is used for working with file paths.

In [10]:

batch_size = 2
img_height = 28
img_width = 28

In [11]:
directory = "data/mnist_images_only/"
ds_train = tf.data.Dataset.list_files(str(pathlib.Path(directory + "*.jpg")))

- here I created a tensorflow dataset called ds_train using tf.data.Dataset.list_files. This function reads the file paths of images in the specified directory ("data/mnist_images_only/") with the "*.jpg" extension.

In [12]:
def process_path(file_path):
    image = tf.io.read_file(file_path)
    image = tf.image.decode_jpeg(image, channels=1)
    label = tf.strings.split(file_path, "\\")
    label = tf.strings.substr(label, pos=0, len=1)[2]
    label = tf.strings.to_number(label, out_type=tf.int64)
    return image, label

- Reads the file's content using tf.io.read_file.
- Decodes the image as a grayscale image using tf.image.decode_jpeg with channels=1.
- Extracts the label from the file path by splitting the path using backslashes and converting the first character to an integer label.

In [13]:
ds_train = ds_train.map(process_path).batch(batch_size)

- applied the process_path function to each element of ds_train using the map method. This function processes the image file paths and returns a dataset of image-label pairs.
- After mapping, you batch the dataset using .batch(batch_size) to create batches of two elements each.

In [14]:

model = keras.Sequential(
    [
        layers.Input((28, 28, 1)),
        layers.Conv2D(16, 3, padding="same"),
        layers.Conv2D(32, 3, padding="same"),
        layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dense(10),
    ]
)

In [15]:
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=[keras.losses.SparseCategoricalCrossentropy(from_logits=True),],
    metrics=["accuracy"],
)

In [16]:
model.fit(ds_train, epochs=10, verbose=2)

Epoch 1/10


25/25 - 1s - loss: 41.1424 - accuracy: 0.2400 - 894ms/epoch - 36ms/step
Epoch 2/10
25/25 - 0s - loss: 3.9967 - accuracy: 0.8800 - 159ms/epoch - 6ms/step
Epoch 3/10
25/25 - 0s - loss: 0.7118 - accuracy: 0.9400 - 122ms/epoch - 5ms/step
Epoch 4/10
25/25 - 0s - loss: 0.0461 - accuracy: 0.9800 - 98ms/epoch - 4ms/step
Epoch 5/10
25/25 - 0s - loss: 0.0313 - accuracy: 0.9800 - 95ms/epoch - 4ms/step
Epoch 6/10
25/25 - 0s - loss: 8.7876e-05 - accuracy: 1.0000 - 96ms/epoch - 4ms/step
Epoch 7/10
25/25 - 0s - loss: 0.0070 - accuracy: 1.0000 - 110ms/epoch - 4ms/step
Epoch 8/10
25/25 - 0s - loss: 1.6451e-07 - accuracy: 1.0000 - 110ms/epoch - 4ms/step
Epoch 9/10
25/25 - 0s - loss: 1.5259e-07 - accuracy: 1.0000 - 99ms/epoch - 4ms/step
Epoch 10/10
25/25 - 0s - loss: 1.4782e-07 - accuracy: 1.0000 - 78ms/epoch - 3ms/step


<keras.src.callbacks.History at 0x1e4c59fadd0>