In [1]:
import tensorflow as tf
import keras
import numpy as np

In [2]:
from keras.datasets import mnist
(train_images,train_labels),(test_images,test_labels) = mnist.load_data()

In [3]:
X_train = train_images.reshape(train_images.shape[0],28,28,1) 
X_train = X_train.astype('float32') / 255.0

X_test = test_images.reshape(test_images.shape[0],28,28,1)
X_test = X_test.astype('float32') / 255.0

In [4]:
from keras.layers import Input,Dense,Conv2D,MaxPool2D,Flatten
from keras.models import Model

In [5]:
# CNN example(also called covnet)
input_layer = Input(shape=(28,28,1))

conv_layer1 = Conv2D(kernel_size=(3,3),filters=32,activation="relu")(input_layer)
max_pool1 = MaxPool2D(pool_size=3)(conv_layer1)
conv_layer2 = Conv2D(kernel_size=(3,3),filters=64,activation="relu")(max_pool1)
max_pool2 = MaxPool2D(pool_size=2)(conv_layer2)
conv_layer3 = Conv2D(kernel_size=(3,3),filters=128,activation="relu")(max_pool2)
flatten_layer = Flatten()(conv_layer3)

output_layer = Dense(10,activation="softmax")(flatten_layer)
model = Model(inputs=input_layer,outputs=output_layer)

In [9]:
model.summary()

# Kernel size = (3,3) ---> output size decreases from 28,28 to 26,26
# Kernal size = (4,4) ---> output size decreases from 28,28 to 25,25
# Pool_size = 2 --> output dimensions get halved
# Pool_size = 3 --> output dimensions get divided by 3 and the floor integer value taken(if its not divisible by 3) --> same for other pool_sizes as well


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 8, 8, 32)         0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 6, 6, 64)          18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 3, 3, 64)         0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 1, 1, 128)         73856 

In [7]:
model.compile(loss="sparse_categorical_crossentropy",optimizer="rmsprop",metrics="accuracy")

In [8]:
model.fit(X_train,train_labels,validation_split=0.2,epochs=5,batch_size=64)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1166c6b84f0>

In [12]:
# (convert the image from large to small size --> downsampling )
# Max pooling used to retrieve the most important or maxed pixel value from parts of the image
# Min pooling used to retrieve the least important or minimum pixel value from parts of the image(Not generally used)
# Average pooling used to retrieve the average of the pixel value from parts of the image(Sometimes used)
# Strides can also be used for downsampling, but not generally used in classification tasks
# Flatten used to convert the 3D tensors to 1D tensors so that we can feed it to the dense NN layer