# 📚 Data Preprocessing for Food-101 Dataset

In [None]:
# -- Imports --
import tensorflow as tf
from datasets import load_dataset
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# -- Load Food-101 Dataset --
print("Loading Hugging Face 'food101' dataset...")
food_dataset = load_dataset("food101")

# -- Explore Dataset --
print("Training samples:", len(food_dataset['train']))
print("Validation samples:", len(food_dataset['validation']))

# -- Preprocessing Functions --
IMG_SIZE = (224, 224)

In [None]:
def preprocess(example):
    image = example['image']
    image = tf.image.resize(image, IMG_SIZE)
    image = tf.cast(image, tf.float32) / 255.0  # Normalize to [0,1]
    label = example['label']
    return image, label

# -- Prepare TensorFlow Datasets --
train_ds = food_dataset['train'].with_transform(preprocess)
val_ds = food_dataset['validation'].with_transform(preprocess)

# -- Batch and Prefetch --
BATCH_SIZE = 32

def to_tf_dataset(ds):
    return tf.data.Dataset.from_generator(
        lambda: (preprocess(example) for example in ds),
        output_signature=(
            tf.TensorSpec(shape=(224, 224, 3), dtype=tf.float32),
            tf.TensorSpec(shape=(), dtype=tf.int64)
        )
    ).batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)

train_tfds = to_tf_dataset(train_ds)
val_tfds = to_tf_dataset(val_ds)

# -- Visualize Few Samples --
plt.figure(figsize=(10, 8))
for images, labels in train_tfds.take(1):
    for i in range(9):
        plt.subplot(3, 3, i+1)
        plt.imshow(images[i])
        plt.title(f"Class ID: {labels[i].numpy()}")
        plt.axis("off")
plt.show()

print("✅ Data Preprocessing Complete.")