In [1]:
import numpy as np #matrix multi
from keras.datasets import mnist #dataset
from keras.models import Sequential #type of model
from keras.layers import Dense #a type of layer which is used in normal perceptron (a set of neurons)
from keras.layers import Dropout #type of layer
from keras.utils import np_utils #keras helper library

Using TensorFlow backend.


In [2]:
seed=2
np.random.seed(seed)
#to get same result everytime

In [3]:
(X_train, y_train), (X_test, y_test) = mnist.load_data() #loading data

In [4]:
# flatten 28*28 images to a 784 vector for each image
numpix = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape(X_train.shape[0], numpix).astype('float32')  #here X_train.shape[1] is 784 
                                                                                #and X_train.shape[0] is 60000
X_test = X_test.reshape(X_test.shape[0], numpix).astype('float32')

In [5]:
X_train=X_train/255
X_test=X_test/255
#scaling

In [6]:
#previous y_train
y_train

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [7]:
# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]

In [8]:
#new y train
y_train       #for ever element in this array there are 10 elements in which only one is 1 and rest are 0
                # this is called one hot encoding

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 1.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  1.,  0.]])

In [9]:
#now we define the whole model
def dnnmodel():
    model = Sequential()
    model.add(Dense(numpix, input_dim=numpix, kernel_initializer='normal',use_bias=True,activation='relu'))
    #this means 784 inputs to a hidden layer having 784 neurons with relu activation in all of them
    model.add(Dense(num_classes, kernel_initializer='normal',activation='softmax',use_bias=True))
    #this means 10 outputs having softmax activation which means it will give probability output for all 10 classes
    
    model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
    '''
    Loss here means during backpropogation, when we need to find difference between the predicted and the actual value we
    need a loss function. Here that loss function is called catergorical_crossentropy
    
    Optimizer is what we will need to minimize that loss. Here we use adam optimizer
    
    Then we need the accuracy as a metric
    '''
    return model
              

In [11]:
#build the model
model = dnnmodel()
#fit the model
model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=10, batch_size=200, verbose=2)
#now we train the data and we test it using our testing data for 10 epochs 200 images at a time
#verbose=2 gives output in one line for every epoch

scores=model.evaluate(X_test, y_test, verbose=0) #final evaluation
print("Baseline Error: %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 8s - loss: 0.2776 - acc: 0.9207 - val_loss: 0.1370 - val_acc: 0.9616
Epoch 2/10
 - 7s - loss: 0.1096 - acc: 0.9680 - val_loss: 0.0963 - val_acc: 0.9699
Epoch 3/10
 - 8s - loss: 0.0712 - acc: 0.9800 - val_loss: 0.0719 - val_acc: 0.9773
Epoch 4/10
 - 7s - loss: 0.0483 - acc: 0.9862 - val_loss: 0.0741 - val_acc: 0.9775
Epoch 5/10
 - 8s - loss: 0.0359 - acc: 0.9902 - val_loss: 0.0652 - val_acc: 0.9795
Epoch 6/10
 - 7s - loss: 0.0261 - acc: 0.9933 - val_loss: 0.0688 - val_acc: 0.9784
Epoch 7/10
 - 7s - loss: 0.0193 - acc: 0.9952 - val_loss: 0.0629 - val_acc: 0.9798
Epoch 8/10
 - 7s - loss: 0.0138 - acc: 0.9969 - val_loss: 0.0633 - val_acc: 0.9812
Epoch 9/10
 - 7s - loss: 0.0107 - acc: 0.9977 - val_loss: 0.0585 - val_acc: 0.9817
Epoch 10/10
 - 7s - loss: 0.0083 - acc: 0.9985 - val_loss: 0.0622 - val_acc: 0.9816
Baseline Error: 1.84%


In [15]:
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 98.16%
