<a href="https://colab.research.google.com/github/nrjcs/swym/blob/master/swym_mnist_conv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#....................................................................#
#  keras mnist digit classification with convolution neural network  #
#....................................................................#


#Load the dataset => as in the case of FCNN
# -- Keras provides in-built support to many datasets
# -- such as MNIST (Modified National Institute of Standards and Technology database) @ http://yann.lecun.com/exdb/mnist/
	# database of handwritten digits
	# used  extensively in optical character recognition and machine learning research
	# training set of 60,000 examples, and a test set of 10,000 examples
	# digits have been size-normalized and centered in a fixed-size image
	# black and white digits
	# 28 x 28  pixels
	# Keras provides method to load MNIST data set
  
from keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data() 	#Keras function

print ("mnist data downloaded...")


In [0]:
# Print shape of dataset..it will print three tuples, namely the no. of images in dataset, height and width(60000, 28, 28)

print (X_train.shape)

In [0]:
import matplotlib.pyplot as plt

# plot images...subplot function is being used...nice documentation is available on the official webpage of matplotlib
# arguments to subplot functions are number of rows, number of columns and number of subplots in the plot...comma is mandatory if values are less than 10
# you can experiment
# uncomment if do not want to print
plt.subplot(221)	
plt.imshow(X_train[50], cmap=plt.get_cmap('gray')) # ploting first image of training data set
plt.subplot(222)
plt.imshow(X_train[1304], cmap=plt.get_cmap('gray'))	# ploting 135th image in training data set
plt.subplot(223)
plt.imshow(X_test[244], cmap=plt.get_cmap('gray'))	# ploting 2445th image of test date set
plt.subplot(224)
plt.imshow(X_test[39], cmap=plt.get_cmap('gray'))	# ploting 4th image of test data set
# show the plot
plt.show()


In [0]:
# defining some parameters
img_rows, img_cols = 28,28

# data preprocessing
#reshaping the data...Normalize images
X_train = X_train.reshape(60000,28,28,1)
X_test = X_test.reshape(10000,28,28,1)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print('x_test shape:', X_test.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

input_shape = (img_rows, img_cols, 1)
num_classes = 10



import keras.utils
#Convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)


In [0]:
# for reference reproducing from https://keras.io/layers/convolutional/

# keras.layers.Conv2D(filters, kernel_size, strides=(1, 1), padding='valid', 
    # activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', ..... )
  
# creates a convolution kernel that is convolved with the layer input to produce a tensor of outputs. 
# If use_bias is True, a bias vector is created and added to the outputs. 
# Finally, if activation is not None, it is applied to the outputs as well.

# if first layer, provide the keyword argument input_shape e.g. input_shape=(128, 128, 3) for 128x128 RGB

# filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution).
# kernel_size: An integer or tuple/list of 2 integers, specifying the height and width of the 2D convolution window. 
    # Can be a single integer to specify the same value for all spatial dimensions.
# strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the height and width. 
    # Can be a single integer to specify the same value for all spatial dimensions.
# padding: one of "valid" or "same" (case-insensitive).
# activation: Activation function to use. If you don't specify anything, no activation is applied (ie. "linear" activation: a(x) = x).
# use_bias: Boolean, whether the layer uses a bias vector.
# kernel_initializer: Initializer for the kernel weights matrix.
# bias_initializer: Initializer for the bias vector.

## filters and kernel_size parameters are compulsory



# MaxPooling2D reproduced from https://keras.io/layers/pooling/
# keras.layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid', data_format=None)

# pool_size: integer or tuple of 2 integers, factors by which to downscale (vertical, horizontal). 
      # If only one integer is specified, the same window length will be used for both dimensions.
# strides: Integer, tuple of 2 integers, or None. Strides values. If None, it will default to pool_size.
# padding: One of "valid" or "same"
# data_format: A string, one of channels_last (default) or channels_first. The ordering of the dimensions in the inputs.  channels_last
        # corresponds to inputs with shape  (batch, height, width, channels) while channels_first corresponds to inputs with shape  
        # (batch, channels, height, width). It defaults to the image_data_format value found in your Keras config file at 
        # ~/.keras/keras.json. If you never set it, then it will be "channels_last".



In [0]:
# Define model architecture

from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D

model = Sequential()

#arch 1
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten()) #Flattens the input
model.add(Dense(num_classes, activation='softmax'))

#arch 2
#model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
#model.add(Conv2D(64, (3, 3), activation='relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))
#model.add(Dropout(0.25))
#model.add(Flatten()) # Flattens the input
#model.add(Dense(128, activation='relu'))
#model.add(Dropout(0.5))
#model.add(Dense(num_classes, activation='softmax'))

print ("keep going...")

In [0]:
# compiling model

model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy'])

print ("compile successful...")

In [0]:
batch_size = 128
epochs = 2

# taining the network

history=model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0.2)

print ("training done...")


In [0]:
# evaluating the model

score = model.evaluate(X_test, y_test, verbose=1)
print('loss:', score[0])
print('accuracy:', score[1])

In [0]:
print(model.summary())

In [0]:
import matplotlib.pyplot as plt
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

 Improving Performance
 
 > Different architecture provided in code
 
>  Number of layers

> Different layers: dropout etc.

> Different hperparameters: number of filters, stride, and padding

> Different learning rate for optimizer

> batch size

> with different optimizers

> with more number of epochs

> Controlling the optimizer learning rate

> Increasing the size of batch computation




