In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os

print("TensorFlow Version:", tf.__version__)

In [None]:
# Define the path to the dataset relative to the project's root directory
# We are currently in the /notebooks folder, so we use ../ to go one level up
DATA_DIR = '../data/kvasir-dataset-v2'

# Standard image size for the models and batch size
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

In [None]:
# This function automatically loads and labels the images based on the folder structure.
# We'll split the data into 80% for training and 20% for validation.
train_dataset = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    validation_split=0.2,
    subset="training",
    seed=123, # Using a seed ensures the split is the same every time
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)

validation_dataset = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)

In [None]:
# Let's verify that the classes were loaded correctly.
class_names = train_dataset.class_names
print("Classes in the dataset:", class_names)

In [None]:
# Now, let's take a look at a few sample images and their labels.
plt.figure(figsize=(12, 12))

# Take one batch from the training dataset
for images, labels in train_dataset.take(1):
    for i in range(9): # Plot the first 9 images of the batch
        ax = plt.subplot(3, 3, i + 1)
        # Display the image after converting it to a numpy array
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[labels[i]])
        plt.axis("off")

plt.show()