# mnist Digit dataset With Multi Layer Perceptron.

**Here Iam using Tensorflow**

Import the necessary library files as below


In [10]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten,Input
from tensorflow.keras.optimizers import Adam

**Loding the data**

In [3]:
(x_train,y_train),(x_test,y_test)=mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


**Normalizing the data to the scale of 1 to 0 . Each pixel has the pixel ranges from 0 to 255**

In [4]:
x_train = x_train / 255.0
x_test = x_test / 255.0

# Model Architecture Explanation

    •We are using 1 input layer, 4 hidden layers, and 1 output layer.
    
	•The input image has a shape of 28×28 pixels, which is flattened into 784 features using the Flatten() layer.
    
	•The first hidden layer has 392 neurons — roughly half of the flattened input (784).
    
   **Each subsequent hidden layer reduces the number of neurons by half:**
        
	•2nd layer: 192 neurons
    
	•3rd layer: 98 neurons
    
	•This progressive halving strategy helps the model learn abstract patterns efficiently, reducing complexity as it goes deeper.
    
	•The output layer has 10 neurons, representing the digits 0 through 9, with a softmax activation to produce probability scores.
    
	•We use:
    
	    •ReLU (Rectified Linear Unit) as the activation function for all hidden layers to introduce non-linearity.
    
	    •Softmax in the output layer to get class probabilities for multi-class classification.

In [25]:
model=Sequential([
    Input(shape=(28,28)),
    Flatten(),
    Dense(392,activation='relu'),
    Dense(192,activation='relu'),
    Dense(98,activation='relu'),
    Dense(10,activation='softmax')
])

In [26]:
model.compile(
    optimizer=Adam(),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

**As you decrese the batch size the accuracy of the model will increase more as it adjust the weights and bias after each 32 items are seen by the model, but learning speed will decrease. Here we use 32 as batch size with is good for this**



In [27]:
model.fit(
    x_train,y_train,
    epochs=15,
    batch_size=32,
    validation_split=0.1,
    verbose=1
)

Epoch 1/15
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.8850 - loss: 0.3768 - val_accuracy: 0.9727 - val_loss: 0.0892
Epoch 2/15
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9712 - loss: 0.0938 - val_accuracy: 0.9767 - val_loss: 0.0817
Epoch 3/15
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9802 - loss: 0.0617 - val_accuracy: 0.9755 - val_loss: 0.0824
Epoch 4/15
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9845 - loss: 0.0471 - val_accuracy: 0.9775 - val_loss: 0.0841
Epoch 5/15
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9874 - loss: 0.0405 - val_accuracy: 0.9795 - val_loss: 0.0730
Epoch 6/15
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.9904 - loss: 0.0291 - val_accuracy: 0.9788 - val_loss: 0.0822
Epoch 7/15
[1

<keras.src.callbacks.history.History at 0x7937acb93150>

In [28]:
print(model.summary())

None


In [29]:
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=0)
print(f"\nTest Accuracy: {test_accuracy:.4f}")


Test Accuracy: 0.9770
