In [1]:
import tensorflow as tf
import keras
import numpy as np

In [2]:
from keras.datasets import mnist
(train_data,train_labels),(test_data,test_labels) = mnist.load_data()

In [3]:
from keras.layers import Dense,Input, Dropout

In [4]:
train_data.shape

(60000, 28, 28)

In [5]:
# Reshaping the input data and scaling

X_train = train_data.reshape(train_data.shape[0],28*28).astype('float32') / 255.0
y_train = train_labels

X_test = test_data.reshape(test_data.shape[0],28*28).astype('float32') / 255.0
y_test = test_labels

In [6]:
y_train.shape

(60000,)

In [7]:
y_train.shape

(60000,)

In [8]:
inputs = Input(shape=(28*28,)) # input shape = row*column,number of training samples

Hidden_layer1 = Dense(units=512,activation="relu")(inputs)
Hidden_layer1 = Dropout(0.5)(Hidden_layer1)
Hidden_layer2 = Dense(units=64,activation="relu")(Hidden_layer1)

outputs = Dense(units=10,activation="softmax")(Hidden_layer2)

model = keras.Model(inputs=inputs,outputs=outputs)

In [9]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 784)]             0         
                                                                 
 dense (Dense)               (None, 512)               401920    
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                32832     
                                                                 
 dense_2 (Dense)             (None, 10)                650       
                                                                 
Total params: 435,402
Trainable params: 435,402
Non-trainable params: 0
_________________________________________________________________


In [10]:
model.compile(optimizer="rmsprop",loss="sparse_categorical_crossentropy",metrics="accuracy")
# If your targets are provided in one-hot encoded format, you typically use categorical_crossentropy.
# If your targets are provided as integers representing class indices, you typically use sparse_categorical_crossentropy.

In [11]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

In [12]:
# Applying early stopping and model checkpoint(always applied together)  ----->  Early stopping stops the training once validation score does't improve and model checkpoint saves the best model till training is done 
callbacks_list = [EarlyStopping(monitor="val_accuracy",patience=2),ModelCheckpoint(filepath="checkpoint_path.keras",monitor="val_loss",save_best_only=True)]
# monitor --> the parameter that the function will monitor whether its going down or not
# patience --> if the validation loss is not reducing even after patience value number of steps, then the training stops
# -----------------------------------------------------------------------------------------
# filepath --> saves the model in this destination path
# monitor --> the function won't update the model file unless val_loss has improved during training
# save_best_only --> saves only the best model and is decided on the basis of the monitor argument

In [13]:
history = model.fit(X_train,y_train,callbacks=callbacks_list,epochs=100,validation_split=0.4)
history

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100


<keras.callbacks.History at 0x237a3e2fa90>

In [14]:
# training stops at epochs = 10, the validation accuracy stopped improving at epochs = 8, but it took 2 steps more to check , because patience value = 2