In [1]:
import tensorflow as tf

# Load and preprocess the MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # Normalize to [0, 1]
x_train = x_train.reshape(-1, 28 * 28)  # Flatten images
x_test = x_test.reshape(-1, 28 * 28)



In [2]:
import numpy as np

def exclude_digits(x_data, y_data, excluded_digits):
    mask = ~np.isin(y_data, excluded_digits)  # Create a mask for non-excluded digits
    x_filtered = x_data[mask]  # Filter input data
    y_filtered = y_data[mask]  # Filter labels
    return x_filtered, y_filtered

In [3]:
# Example usage:
excluded_digits = [0, 1,2]  # Exclude digits 0 and 1
x_train_filtered, y_train_filtered = exclude_digits(x_train, y_train, excluded_digits)

In [4]:
set(y_train_filtered)

{3, 4, 5, 6, 7, 8, 9}

In [5]:
class SimpleNN(tf.keras.Model):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.dense1 = tf.keras.layers.Dense(128, activation='relu')
        self.dense2 = tf.keras.layers.Dense(10, activation='softmax')

    def call(self, inputs):
        x = self.dense1(inputs)
        return self.dense2(x)

In [6]:
def train_model(model,pth_x,pth_y):
    batch_size = 64
    epochs = 5
    num_batches = len(pth_x) // batch_size
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    loss_fn = tf.keras.losses.CategoricalCrossentropy()

    # Convert labels to one-hot encoding
    pth_y_onehot = tf.keras.utils.to_categorical(pth_y, num_classes=10)

    
    for epoch in range(epochs):
        print(f"Epoch {epoch + 1}/{epochs}")
        for i in range(num_batches):
            # Get a batch of data
            start = i * batch_size
            end = start + batch_size
            x_batch = pth_x[start:end]
            y_batch = pth_y_onehot[start:end]
            
            with tf.GradientTape() as tape:
                predictions = model(x_batch, training=True)  # Forward pass
                loss = loss_fn(y_batch, predictions)        # Compute loss
            

            gradients = tape.gradient(loss, model.trainable_variables) 
        
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))  # Update weights

            if i % 200 == 0:  # Print progress every 200 batches
                print(f"Batch {i}/{num_batches}, Loss: {loss.numpy():.4f}")

In [7]:
x_train_1, y_train_1  = exclude_digits(x_train, y_train, excluded_digits=[1, 3, 7])
print("y_train_1 : ",set(y_train_1))
print(len(y_train_1))

y_train_1 :  {0, 2, 4, 5, 6, 8, 9}
40862


In [8]:
x_train_2, y_train_2 = exclude_digits(x_train, y_train, excluded_digits=[2, 5, 8])
print("y_train_2 : ",set(y_train_2))
print(len(y_train_2))

y_train_2 :  {0, 1, 3, 4, 6, 7, 9}
42770


In [9]:
x_train_3, y_train_3 = exclude_digits(x_train, y_train, excluded_digits=[4, 6, 9])
print("y_train_3 : ",set(y_train_3))
print(len(y_train_3))

y_train_3 :  {0, 1, 2, 3, 5, 7, 8}
42291


In [10]:
model1 = SimpleNN()
train_model(model1, x_train_1, y_train_1)


Epoch 1/5


2025-06-06 23:25:11.247042: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2025-06-06 23:25:11.247076: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2025-06-06 23:25:11.247079: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
I0000 00:00:1749227111.247090 9904212 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1749227111.247108 9904212 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Batch 0/638, Loss: 2.6194
Batch 200/638, Loss: 0.0854
Batch 400/638, Loss: 0.2455
Batch 600/638, Loss: 0.1421
Epoch 2/5
Batch 0/638, Loss: 0.1355
Batch 200/638, Loss: 0.0215
Batch 400/638, Loss: 0.1406
Batch 600/638, Loss: 0.0890
Epoch 3/5
Batch 0/638, Loss: 0.1148
Batch 200/638, Loss: 0.0142
Batch 400/638, Loss: 0.0880
Batch 600/638, Loss: 0.0725
Epoch 4/5
Batch 0/638, Loss: 0.0962
Batch 200/638, Loss: 0.0124
Batch 400/638, Loss: 0.0612
Batch 600/638, Loss: 0.0613
Epoch 5/5
Batch 0/638, Loss: 0.0786
Batch 200/638, Loss: 0.0098
Batch 400/638, Loss: 0.0391
Batch 600/638, Loss: 0.0521


In [11]:
model2 = SimpleNN()
train_model(model1, x_train_2, y_train_2)

Epoch 1/5
Batch 0/668, Loss: 6.2816
Batch 200/668, Loss: 0.1218
Batch 400/668, Loss: 0.1146
Batch 600/668, Loss: 0.0341
Epoch 2/5
Batch 0/668, Loss: 0.0377
Batch 200/668, Loss: 0.1029
Batch 400/668, Loss: 0.0500
Batch 600/668, Loss: 0.0220
Epoch 3/5
Batch 0/668, Loss: 0.0258
Batch 200/668, Loss: 0.0806
Batch 400/668, Loss: 0.0289
Batch 600/668, Loss: 0.0158
Epoch 4/5
Batch 0/668, Loss: 0.0190
Batch 200/668, Loss: 0.0623
Batch 400/668, Loss: 0.0193
Batch 600/668, Loss: 0.0123
Epoch 5/5
Batch 0/668, Loss: 0.0124
Batch 200/668, Loss: 0.0374
Batch 400/668, Loss: 0.0129
Batch 600/668, Loss: 0.0093


In [12]:
model3 = SimpleNN()
train_model(model3, x_train_3, y_train_3)

Epoch 1/5
Batch 0/660, Loss: 2.3093
Batch 200/660, Loss: 0.1198
Batch 400/660, Loss: 0.1958
Batch 600/660, Loss: 0.2084
Epoch 2/5
Batch 0/660, Loss: 0.0529
Batch 200/660, Loss: 0.0457
Batch 400/660, Loss: 0.0988
Batch 600/660, Loss: 0.1090
Epoch 3/5
Batch 0/660, Loss: 0.0222
Batch 200/660, Loss: 0.0238
Batch 400/660, Loss: 0.0551
Batch 600/660, Loss: 0.0645
Epoch 4/5
Batch 0/660, Loss: 0.0140
Batch 200/660, Loss: 0.0168
Batch 400/660, Loss: 0.0347
Batch 600/660, Loss: 0.0420
Epoch 5/5
Batch 0/660, Loss: 0.0105
Batch 200/660, Loss: 0.0135
Batch 400/660, Loss: 0.0270
Batch 600/660, Loss: 0.0252


In [13]:
def evaluate_model(model,x_test,y_test):
    loss_fn = tf.keras.losses.CategoricalCrossentropy()

    y_test_onehot = tf.keras.utils.to_categorical(y_test, num_classes=10)

    # Evaluate the model
    test_loss = loss_fn(y_test_onehot, model(x_test))
    test_accuracy = tf.keras.metrics.categorical_accuracy(y_test_onehot, model(x_test))
    return test_loss, test_accuracy

In [14]:
x_test_1, y_test_1  = exclude_digits(x_test, y_test, excluded_digits=[1, 3, 7])
test_loss_1 , test_accuracy_1 = evaluate_model(model1,x_test_1,y_test_1)

print(f"Test Loss: {test_loss_1.numpy():.4f}, Test Accuracy: {tf.reduce_mean(test_accuracy_1).numpy():.4f}")

Test Loss: 4.7512, Test Accuracy: 0.5730


In [15]:
x_test_2, y_test_2  = exclude_digits(x_test, y_test, excluded_digits=[2, 5, 8])
test_loss_2 , test_accuracy_2 = evaluate_model(model1,x_test_2,y_test_2)

print(f"Test Loss: {test_loss_1.numpy():.4f}, Test Accuracy: {tf.reduce_mean(test_accuracy_2).numpy():.4f}")

Test Loss: 4.7512, Test Accuracy: 0.9842


In [16]:
x_test_3, y_test_3  = exclude_digits(x_test, y_test, excluded_digits=[4,6,9])
test_loss_3 , test_accuracy_3 = evaluate_model(model1,x_test_3,y_test_3)

print(f"Test Loss: {test_loss_3.numpy():.4f}, Test Accuracy: {tf.reduce_mean(test_accuracy_3).numpy():.4f}")

Test Loss: 4.5830, Test Accuracy: 0.5901


In [17]:
x_test_2, y_test_2  = exclude_digits(x_test, y_test, excluded_digits=[2, 5, 8])
test_loss_2 , test_accuracy_2 = evaluate_model(model2,x_test_2,y_test_2)

print(f"Test Loss: {test_loss_2.numpy():.4f}, Test Accuracy: {tf.reduce_mean(test_accuracy_2).numpy():.4f}")

Test Loss: 2.4873, Test Accuracy: 0.1122


In [18]:
x_test_3, y_test_3  = exclude_digits(x_test, y_test, excluded_digits=[4,6,9])
test_loss_3 , test_accuracy_3 = evaluate_model(model3,x_test_3,y_test_3)

print(f"Test Loss: {test_loss_3.numpy():.4f}, Test Accuracy: {tf.reduce_mean(test_accuracy_3).numpy():.4f}")

Test Loss: 0.0655, Test Accuracy: 0.9793
