In [1]:
import tensorflow as tf
from tensorflow.keras import layers,models
import numpy as np
import matplotlib.pyplot as plt

## Load and Prepare data 

In [2]:
mnist = tf.keras.datasets.mnist

(x_train,y_train) , (x_test,y_test) = mnist.load_data()

print("y_train are : ",y_train)
print("X_train are : ",x_train[:2])
print("\n\n")
print("Shape of X_train : ",x_train.shape)
print("Shape of y_test  : ",y_train.shape)
print("Which means 60000 images \nHeight of  each image : 28px  \nWidth each image : 28px")

y_train are :  [5 0 4 ... 5 6 8]
X_train are :  [[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]]



Shape of X_train :  (60000, 28, 28)
Shape of y_test  :  (60000,)
Which means 60000 images 
Height of  each image : 28px  
Width each image : 28px


now we need normalize pixel values to 0-1 range 

In [3]:
x_train = x_train.astype('float32') / 255.0
x_test  = x_test.astype('float32') / 255.0 

x_train[:2]

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]], dtype=float32)

we know that Ann only support 1d array        minist is 2d data so need chnage the shape 

In [4]:
x_train = x_train.reshape(-1,784)      #-1 fill automaticly no of rows(60k) numpy feature
x_test  = x_test.reshape(-1,784) # new width of each data

print("After change of shape : ",x_train.shape)
print("X_train look like \n ",x_train[:2])

After change of shape :  (60000, 784)
X_train look like 
  [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


### Bulding a ANN Model

In [5]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(784,)), #input layer
    tf.keras.layers.Dense(300,activation='relu'), # hidden layer
    tf.keras.layers.Dense(10,activation='softmax') # Output layer
])

In [6]:
a = """Input Layer:

Shape: (784,)
It accepts 784 features (flattened 28x28 image).
----------------------------------------------------------------------
Hidden Layer (Dense Layer with 128 neurons):

Neurons: 128
Activation: ReLU
This layer transforms the input data into a higher-dimensional space, allowing the model to learn complex features.
apply weight and bias 
----------------------------------------------------------------------------------------------------------------------

Output Layer (Dense Layer with 10 neurons):

Neurons: 10
Activation: Softmax
This layer outputs the final probabilities for each of the 10 classes (digits 0-9).
"""

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 300)               235500    
                                                                 
 dense_1 (Dense)             (None, 10)                3010      
                                                                 
Total params: 238,510
Trainable params: 238,510
Non-trainable params: 0
_________________________________________________________________


In [8]:
input_size = 784
hidden_size = 128
output_size = 10

w_hidden = input_size * hidden_size
b_hidden = hidden_size
total_hidden = w_hidden + b_hidden

w_output = hidden_size * output_size
b_output = output_size
total_output = w_output + b_output

total_params = total_hidden + total_output

print(f"Total Parameters = {total_params}")
print(f"Weights in Hidden Layer = {w_hidden}")
print(f"Biases in Hidden Layer = {b_hidden}")
print(f"Weights in Output Layer = {w_output}")
print(f"Biases in Output Layer = {b_output}")

Total Parameters = 101770
Weights in Hidden Layer = 100352
Biases in Hidden Layer = 128
Weights in Output Layer = 1280
Biases in Output Layer = 10


#### Lets apply  loss function,optimizer and evalution metric 

In [9]:
model.compile(
loss = 'sparse_categorical_crossentropy',
optimizer = 'sgd',
metrics = ['accuracy']
)
print("Model compiled........")

Model compiled........


#### Lets train our model 

In [10]:
history = model.fit(x_train,y_train,epochs=10,batch_size=32,validation_data=(x_test,y_test),verbose=1)
print("Model Training is completed...")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model Training is completed...


#### Model evalution and make prediction

In [11]:
test_loss,test_accuracy = model.evaluate(x_test,y_test)



In [12]:
print("Test Loss : ",test_loss)
print("Test accuracy : {:.4f}".format(test_accuracy))

Test Loss :  0.14981237053871155
Test accuracy : 0.9560


Test loss = It store loss of the model on test data after evalution

In [13]:
## Make prediction

t_sample = x_test[:5]
pred = model.predict(t_sample)
pred_label = np.argmax(pred,axis=1)
true_label = y_test[:5]

print("Prediction vs True Label")
for i in range(5):
    print(f"Sample{i+1} : Predicted = {pred_label[i]}, True_label = {true_label[i]}")

Prediction vs True Label
Sample1 : Predicted = 7, True_label = 7
Sample2 : Predicted = 2, True_label = 2
Sample3 : Predicted = 1, True_label = 1
Sample4 : Predicted = 0, True_label = 0
Sample5 : Predicted = 4, True_label = 4
