In [96]:
# for Python2 compatibility
from __future__ import print_function  
 

In [97]:
import numpy 

# random numbers are reproducable, i.e we will get same random numbers whenever we run this notebook
numpy.random.seed(1337)

In [98]:
import keras
from keras.datasets import mnist

In [152]:
# number of classes will be 10, because we have digits from 0 to 9
num_classes = 10

# data will be fed to model in batch size of 128 each
batch_size = 128 

# showing the data to machine to learn, one round of it is called 1 epoch, we can have multiple epochs
epochs = 10

In [153]:
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [154]:
#we have 60,000 data points in training data and 10,000 in test
#the size of the image here is 28 by 28

print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)


(60000, 28, 28) (60000,) (10000, 28, 28) (10000,)


In [155]:
# and we want to change it to a flat vector, which will be (28 * 28 = 784 pixels)
x_train = x_train.reshape(60000, 784)  
x_test = x_test.reshape(10000, 784)

# changing the type to float32
x_train = x_train.astype('float32') 
x_test = x_test.astype('float32') 

# current range of data is between 0-255
# normalizing to 0-1
x_train /= 255  # 0 - 255 ...  0-1 
x_test /= 255   # normalizing your data 


print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

60000 train samples
10000 test samples


In [156]:
print(x_train.shape)
print(y_train.shape)

(60000, 784)
(60000,)


In [157]:
x_train[0]


array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

In [158]:
y_train[0]

5

In [159]:
# convert class vectors to binary class matrices
# example 5 -> [0,0,0,0,1,0,0,0,0]
# this is easy for machine to understand

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes) 

In [160]:
# here we captured the multi classs - classification
y_train[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

That's called a one-hot vector. 

In [161]:
# importing sequential model
from keras.models import Sequential

# importing dense layer (hidden layer 1 can be considered dense layer, bcz everything is connected to everything)
from keras.layers import Dense

# SGD optimizer will change model parameters to predict better results
from keras.optimizers import SGD
model = Sequential()

In [175]:
# we will be adding layers in the sequential model

# Dense layer, 512 nodes, we have to put a comma after 784, so that it is considered as a tuple
model.add( Dense(512, activation='sigmoid', input_shape=(784,)  ) )

# input shape is not required, bcz keras can automatically decide from previous layer, what this layer is gonna get
model.add( Dense(512, activation='sigmoid'  ) )

# final layer, nodes will be equal to number of classes i.e 10
model.add( Dense(num_classes, activation='softmax'))

In [176]:
# What does the model look like? 
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_25 (Dense)             (None, 512)               401920    
_________________________________________________________________
dense_26 (Dense)             (None, 512)               262656    
_________________________________________________________________
dense_27 (Dense)             (None, 10)                5130      
_________________________________________________________________
dense_28 (Dense)             (None, 512)               5632      
_________________________________________________________________
dense_29 (Dense)             (None, 512)               262656    
_________________________________________________________________
dense_30 (Dense)             (None, 10)                5130      
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________


In [177]:
# Compile the model 
model.compile(loss='categorical_crossentropy', optimizer=SGD(), 
              metrics=['accuracy'])

In [178]:
# let's perform the learning 
history = model.fit( x_train, y_train, 
           batch_size=batch_size,
           epochs=epochs, 
           verbose=1, 
           validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [179]:
# Let's evaluate the model 
score = model.evaluate(x_test, y_test)



In [180]:
# loss
score[0]

2.2034350791931154

In [174]:
# accuracy
score[1]

0.9026