In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import numpy as np

print("Loading 'svhn_cropped' dataset from TFDS...")

# Load the dataset
(raw_train, raw_test), metadata = tfds.load(
    'svhn_cropped',
    split=['train', 'test'],  # We'll split the train set manually later
    with_info=True,
    as_supervised=True,  # Loads as (image, label) tuples
)

print("Dataset loaded successfully.")

In [None]:
# Get the class names from metadata
class_names = metadata.features['label'].names
print("Class names:", class_names)

# Get the number of examples
num_train = metadata.splits['train'].num_examples
num_test = metadata.splits['test'].num_examples

print(f"Number of training examples: {num_train}")
print(f"Number of testing examples: {num_test}")

# Check an example image shape
for image, label in raw_train.take(1):
    print(f"\nImage shape: {image.shape}")

In [None]:
plt.figure(figsize=(10, 10))
# Take 9 examples from the training set
for i, (image, label) in enumerate(raw_train.take(9)):
    ax = plt.subplot(3, 3, i + 1)
    # These are color, so we don't need cmap='gray'
    plt.imshow(image)
    plt.title(f"Label: {class_names[label]}")
    plt.axis('off')
plt.show()

In [None]:
def preprocess(image, label):
    # Cast the image to float32
    image = tf.cast(image, tf.float32)
    # Normalize the pixel values to [0, 1]
    image = image / 255.0
    return image, label

In [None]:
# This dictionary will hold our final datasets
datasets = {}

# Get the total number of training examples
num_train = metadata.splits['train'].num_examples

# Calculate 10% of the training data for validation
num_validation = int(0.1 * num_train)

# Create a new validation set (first 10% of train data)
val_set = raw_train.take(num_validation)

# Create a new training set (the remaining 90%)
train_set = raw_train.skip(num_validation)

# Now, apply preprocessing to all three splits
datasets['train'] = train_set.map(preprocess)
datasets['val'] = val_set.map(preprocess)
datasets['test'] = raw_test.map(preprocess)

# Let's check the new counts
print(f"Original training examples: {num_train}")
print(f"New validation examples: {num_validation}")
print(f"New training examples: {num_train - num_validation}")

In [None]:
BATCH_SIZE = 32

for split in ['train', 'val', 'test']:
    # Shuffle the training data
    if split == 'train':
        datasets[split] = datasets[split].shuffle(1000)

    datasets[split] = datasets[split].batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

print("All datasets are preprocessed, batched, and ready.")
print("Example of a batch (shape):")
for images, labels in datasets['train'].take(1):
    print(f" - Images batch shape: {images.shape}")
    print(f" - Labels batch shape: {labels.shape}")

In [None]:
model = tf.keras.models.Sequential([
    # Input layer specifies the shape of our color images
    tf.keras.layers.Input(shape=(32, 32, 3)),

    # First convolution block
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Dropout(0.3),

    # Second convolution block
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Dropout(0.3),

    # Third convolution block (added one more for a deeper model)
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Dropout(0.3),

    # Flatten the 3D feature maps into a 1D vector
    tf.keras.layers.Flatten(),

    # Dense (fully connected) layers
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),

    # Output layer
    # 10 units for 10 classes (digits 0-9)
    # 'softmax' activation to get probabilities for each class
    tf.keras.layers.Dense(10, activation='softmax')
])

model.summary()

In [None]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print("Model compiled.")

In [None]:
EPOCHS = 15

print("Starting training...")

history = model.fit(
    datasets['train'],
    epochs=EPOCHS,
    validation_data=datasets['val']
)

print("Training finished.")

In [None]:
print("Evaluating on test data...")
loss, accuracy = model.evaluate(datasets['test'])

print(f"\nTest Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy * 100:.2f}%")