Let's import np, tf, scipy and sklearn

In [1]:
import numpy as np
import scipy.io as sio
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow import keras
from tensorflow.keras import layers

ModuleNotFoundError: No module named 'scipy'

We will load the SVHN dataset

In [None]:
# Load the SVHN dataset
train_data = sio.loadmat(f"datasets/train_32x32.mat")
test_data = sio.loadmat(f"datasets/test_32x32.mat")

Let's preprocess the dataset

In [None]:
x_train = np.transpose(train_data['X'], (3, 0, 1, 2))
y_train = train_data['y'][:, 0]

x_test = np.transpose(test_data['X'], (3, 0, 1, 2))
y_test = test_data['y'][:, 0]

y_train[y_train == 10] = 0
y_test[y_test == 10] = 0

# Convert labels to one-hot encoding
num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# Concatenate all images into one array
x = np.concatenate((x_train, x_test), axis=0)
y = np.concatenate((y_train, y_test), axis=0)

# Filter out images with more than 3 digits
digit_counts = np.unique(y.argmax(axis=1), return_counts=True)[1]
valid_indices = np.where(digit_counts <= 3)[0]
x = x[valid_indices]
y = y[valid_indices]

# Normalize pixel values to [0, 1]
x = x.astype('float32') / 255.0

# Shuffle the dataset
x, y = shuffle(x, y, random_state=42)

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
# Define the CNN architecture
model = keras.Sequential([
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(32, 32, 3)),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, batch_size=32, epochs=10, validation_data=(x_test, y_test))

In [None]:
# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
print('Test accuracy:', test_acc)