In [1]:
# ─── Step 1: Organize raw images into train/val/test folders ───
import shutil
from pathlib import Path
from sklearn.model_selection import train_test_split

# Define source and target directories
SOURCE_DIR = Path("../data/raw")      # original data
TARGET_DIR = Path("../data/split")    # new location for train/val/test folders

# Create split folders
for split in ["train", "val", "test"]:
    for class_name in ["rock", "paper", "scissors"]:
        (TARGET_DIR / split / class_name).mkdir(parents=True, exist_ok=True)

# Get all images and labels
all_images = list(SOURCE_DIR.glob("*/*.png"))
labels = [img.parent.name for img in all_images]

# Stratified split: 80% train, 10% val, 10% test
train_val_imgs, test_imgs, train_val_labels, test_labels = train_test_split(
    all_images, labels, stratify=labels, test_size=0.1, random_state=42
)
train_imgs, val_imgs, train_labels, val_labels = train_test_split(
    train_val_imgs, train_val_labels, stratify=train_val_labels, test_size=0.1111, random_state=42
) 

# Helper function to copy images into new folders
def copy_images(image_paths, labels, split_name):
    for img_path, label in zip(image_paths, labels):
        dst = TARGET_DIR / split_name / label / img_path.name
        shutil.copy(img_path, dst)

# Perform the actual copying
copy_images(train_imgs, train_labels, "train")
copy_images(val_imgs, val_labels, "val")
copy_images(test_imgs, test_labels, "test")

print("Image splitting and copying complete.")

# ─── Step 2: Setup Image Parameters and Load with TensorFlow ───
import tensorflow as tf

# Constants
IMG_SIZE = (150, 150)
BATCH_SIZE = 32
classes = ["rock", "paper", "scissors"]

# Base directory for the new dataset
DATA_DIR = TARGET_DIR

# Load datasets using image_dataset_from_directory
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATA_DIR / "train",
    labels='inferred',
    label_mode='int',
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=123
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATA_DIR / "val",
    labels='inferred',
    label_mode='int',
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=False
)

test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATA_DIR / "test",
    labels='inferred',
    label_mode='int',
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=False
)

# ─── Step 3: Normalize Pixel Values ───
from tensorflow.keras import layers

# Normalization layer: rescales [0,255] → [0,1]
normalization_layer = layers.Rescaling(1./255)

# Apply normalization via map (this keeps the datasets lazy/efficient)
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds   = val_ds.map(lambda x, y: (normalization_layer(x), y))
test_ds  = test_ds.map(lambda x, y: (normalization_layer(x), y))

# ─── Step 4: Dataset Performance Optimizations ───
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds   = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds  = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

print("Datasets prepared and optimized for training.")


Image splitting and copying complete.
Found 1750 files belonging to 3 classes.
Found 219 files belonging to 3 classes.
Found 219 files belonging to 3 classes.
Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
Datasets prepared and optimized for training.
