In [None]:
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPool2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import mnist

(train_image, train_labels), (test_image, test_lables) = mnist.load_data()

train_image = train_image/255
test_image = test_image/255

print(train_image.shape)

# adds channel dimensonm because con2d accepts
# 3D but the misnt set has this (60000, 28, 28)
# 60000- no of image, 28,28 - width, height 
# so it doesn't have channel dim so the
# below code add the channel dim that is 1
# like this (60000, 28, 28, 1) because the minst image is grayscale 

train_image = train_image.reshape(-1, 28, 28, 1)
test_image = test_image.reshape(-1, 28, 28, 1)
print(train_image.shape)

train_labels = to_categorical(train_labels)
test_lables = to_categorical(test_lables)

# we can give any size for filter to obtain better learning
# Choosing the Number of Filters
# Small Networks (Simple Problems): 8, 16, or 32 filters
# Medium Complexity: 32, 64 filters (Common for MNIST)
# Deep Networks (Complex Problems like ImageNet): 128, 256, 512 filters

model = Sequential([
    Conv2D(filters=32, kernel_size=(3,3), input_shape=(28,28,1), activation='relu'),
    MaxPool2D((2,2)),
    Conv2D(filters=16, kernel_size=(4,4), activation='relu'), # unlike other activations relu will not 
                                                              #squeeze the values into too small
                                                              # relu helps to learn meaning full
                                                              # patterns before it is feeded into
                                                              # dense network
    MaxPool2D((2,2)),
    Flatten(),
    Dense(16, activation='relu'), 
    Dense(10,activation='softmax') 
    # The output layer predicts probabilities for 10 classes (digits 0-9).
    # softmax converts raw scores into probabilities (summing to 1), making it easy to interpret.
    # Example: softmax([2.0, 1.0, 0.1]) → [0.71, 0.26, 0.03]
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

predicted_labels=model.fit(train_image, train_labels, batch_size=32, epochs=2)

prediction = model.predict(test_image)

print(prediction) # gives prediction result for all test images

